added documentation, small adjustments

This commit is contained in:
ThetaDev 2022-09-08 17:19:45 +02:00
parent 8146a9be41
commit f5ac3c747e
6 changed files with 86 additions and 211 deletions

View File

@ -3,7 +3,6 @@ package org.schabi.newpipe.extractor.services.youtube;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.utils.JavaScript;
import org.schabi.newpipe.extractor.utils.Parser;
import org.schabi.newpipe.extractor.utils.StringUtils;
import org.schabi.newpipe.extractor.utils.jsextractor.JavaScriptExtractor;
import java.util.HashMap;
@ -118,17 +117,12 @@ public final class YoutubeThrottlingDecrypter {
@Nonnull
private static String parseDecodeFunction(final String playerJsCode, final String functionName)
throws ParsingException {
return parseWithLexer(playerJsCode, functionName);
}
@Nonnull
private static String parseWithParenthesisMatching(final String playerJsCode,
final String functionName) {
final String functionBase = functionName + "=function";
return validateFunction(functionBase
+ StringUtils.matchToClosingParenthesis(playerJsCode, functionBase)
+ ";");
throws Parser.RegexException {
try {
return parseWithLexer(playerJsCode, functionName);
} catch (final Exception e) {
return parseWithRegex(playerJsCode, functionName);
}
}
@Nonnull

View File

@ -1,91 +0,0 @@
package org.schabi.newpipe.extractor.utils;
import javax.annotation.Nonnull;
public final class StringUtils {
private StringUtils() {
}
/**
* @param string The string to search in.
* @param start A string from which to start searching.
* @return A substring where each '{' matches a '}'.
* @throws IndexOutOfBoundsException If {@code string} does not contain {@code start}
* or parenthesis could not be matched .
*/
@Nonnull
public static String matchToClosingParenthesis(@Nonnull final String string,
@Nonnull final String start) {
int startIndex = string.indexOf(start);
if (startIndex < 0) {
throw new IndexOutOfBoundsException();
}
startIndex += start.length();
int endIndex = findNextParenthesis(string, startIndex, true);
++endIndex;
int openParenthesis = 1;
while (openParenthesis > 0) {
endIndex = findNextParenthesis(string, endIndex, false);
switch (string.charAt(endIndex)) {
case '{':
++openParenthesis;
break;
case '}':
--openParenthesis;
break;
default:
break;
}
++endIndex;
}
return string.substring(startIndex, endIndex);
}
private static int findNextParenthesis(@Nonnull final String string,
final int offset,
final boolean onlyOpen) {
boolean lastEscaped = false;
char quote = ' ';
for (int i = offset; i < string.length(); i++) {
boolean thisEscaped = false;
final char c = string.charAt(i);
switch (c) {
case '{':
if (quote == ' ') {
return i;
}
break;
case '}':
if (!onlyOpen && quote == ' ') {
return i;
}
break;
case '\\':
if (!lastEscaped) {
thisEscaped = true;
}
break;
case '\'':
case '"':
if (!lastEscaped) {
if (quote == ' ') {
quote = c;
} else if (quote == c) {
quote = ' ';
}
}
}
lastEscaped = thisEscaped;
}
return -1;
}
}

View File

@ -4,34 +4,46 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException;
import javax.annotation.Nonnull;
/**
* Utility class for extracting functions from JavaScript code.
*/
public final class JavaScriptExtractor {
private JavaScriptExtractor() {
}
/**
* Searches the given JavaScript code for the identifier of a function
* and returns its body.
*
* @param jsCode JavaScript code
* @param start start of the function (without the opening brace)
* @return extracted code (opening brace + function + closing brace)
* @throws ParsingException
*/
@Nonnull
public static String matchToClosingBrace(final String playerJsCode, final String start)
public static String matchToClosingBrace(final String jsCode, final String start)
throws ParsingException {
int startIndex = playerJsCode.indexOf(start);
int startIndex = jsCode.indexOf(start);
if (startIndex < 0) {
throw new ParsingException("start not found");
throw new ParsingException("Start not found");
}
startIndex += start.length();
final String js = playerJsCode.substring(startIndex);
final String js = jsCode.substring(startIndex);
final Lexer lexer = new Lexer(js);
boolean visitedOpenBrace = false;
while (true) {
final Lexer.Item item = lexer.getNextToken();
final Token t = item.token;
final Lexer.ParsedToken parsedToken = lexer.getNextToken();
final Token t = parsedToken.token;
if (t == Token.LC) {
visitedOpenBrace = true;
} else if (visitedOpenBrace && lexer.isBalanced()) {
return js.substring(0, item.end);
return js.substring(0, parsedToken.end);
} else if (t == Token.EOF) {
throw new ParsingException("could not find matching braces");
throw new ParsingException("Could not find matching braces");
}
}
}

View File

@ -5,6 +5,15 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException;
import java.util.Stack;
/**
* JavaScript lexer that is able to parse JavaScript code and return its
* tokens.
*
* <p>
* The algorithm for distinguishing between division operators and regex literals
* was taken from the <a href="https://github.com/rusty-ecma/RESS/">RESS lexer</a>.
* </p>
*/
public class Lexer {
private static class Paren {
public final boolean funcExpr;
@ -95,12 +104,15 @@ public class Lexer {
}
}
public static class Item {
/**
* Parsed token, containing the token and its position in the input string
*/
public static class ParsedToken {
public final Token token;
public final int start;
public final int end;
Item(final Token token, final int start, final int end) {
ParsedToken(final Token token, final int start, final int end) {
this.token = token;
this.start = start;
this.end = end;
@ -112,6 +124,12 @@ public class Lexer {
private final Stack<Brace> braceStack;
private final Stack<Paren> parenStack;
/**
* Create a new JavaScript lexer with the given source code
*
* @param js JavaScript code
* @param languageVersion JavaScript version (from Rhino)
*/
public Lexer(final String js, final int languageVersion) {
stream = new TokenStream(js, 0, languageVersion);
lastThree = new LookBehind();
@ -119,11 +137,21 @@ public class Lexer {
parenStack = new Stack<>();
}
/**
* Create a new JavaScript lexer with the given source code
*
* @param js JavaScript code
*/
public Lexer(final String js) {
this(js, Context.VERSION_DEFAULT);
}
public Item getNextToken() throws ParsingException {
/**
* Continue parsing and return the next token
* @return next token
* @throws ParsingException
*/
public ParsedToken getNextToken() throws ParsingException {
Token token = stream.nextToken();
if ((token == Token.DIV || token == Token.ASSIGN_DIV) && isRegexStart()) {
@ -131,11 +159,15 @@ public class Lexer {
token = Token.REGEXP;
}
final Item item = new Item(token, stream.tokenBeg, stream.tokenEnd);
keepBooks(item);
return item;
final ParsedToken parsedToken = new ParsedToken(token, stream.tokenBeg, stream.tokenEnd);
keepBooks(parsedToken);
return parsedToken;
}
/**
* Check if the parser is balanced (equal amount of open and closed parentheses and braces)
* @return true if balanced
*/
public boolean isBalanced() {
return braceStack.isEmpty() && parenStack.isEmpty();
}
@ -144,9 +176,9 @@ public class Lexer {
* Evaluate the token for possible regex start and handle updating the
* `self.last_three`, `self.paren_stack` and `self.brace_stack`
*/
void keepBooks(final Item item) throws ParsingException {
if (item.token.isPunct) {
switch (item.token) {
void keepBooks(final ParsedToken parsedToken) throws ParsingException {
if (parsedToken.token.isPunct) {
switch (parsedToken.token) {
case LP:
handleOpenParenBooks();
return;
@ -154,15 +186,15 @@ public class Lexer {
handleOpenBraceBooks();
return;
case RP:
handleCloseParenBooks(item.start);
handleCloseParenBooks(parsedToken.start);
return;
case RC:
handleCloseBraceBooks(item.start);
handleCloseBraceBooks(parsedToken.start);
return;
}
}
if (item.token != Token.COMMENT) {
lastThree.push(new MetaToken(item.token, stream.lineno));
if (parsedToken.token != Token.COMMENT) {
lastThree.push(new MetaToken(parsedToken.token, stream.lineno));
}
}

View File

@ -9,43 +9,42 @@ import org.schabi.newpipe.extractor.utils.jsextractor.Token;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.stream.Stream;
import static org.junit.jupiter.api.Assertions.*;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.schabi.newpipe.FileUtils.resolveTestResource;
public class JavaScriptExtractorTest
{
@Test
public void testJsExtractor() throws ParsingException {
void testJsExtractor() throws ParsingException {
final String src = "Wka=function(d){var x = [/,,/,913,/(,)}/g,\"abcdef}\\\"\",];var y = 10/2/1;return x[1][y];}//some={}random-padding+;";
final String result = JavaScriptExtractor.matchToClosingBrace(src, "Wka=function");
assertEquals("(d){var x = [/,,/,913,/(,)}/g,\"abcdef}\\\"\",];var y = 10/2/1;return x[1][y];}", result);
}
@Test
public void testEverythingJs() throws ParsingException, IOException {
File jsFile = resolveTestResource("es5.js");
StringBuilder contentBuilder = new StringBuilder();
Stream<String> stream = Files.lines(jsFile.toPath());
stream.forEach(s -> contentBuilder.append(s).append("\n"));
void testEverythingJs() throws ParsingException, IOException {
final File jsFile = resolveTestResource("es5.js");
final StringBuilder contentBuilder = new StringBuilder();
Files.lines(jsFile.toPath()).forEach(line -> contentBuilder.append(line).append("\n"));
final String js = contentBuilder.toString();
Lexer lexer = new Lexer(js);
Lexer.Item item = null;
final Lexer lexer = new Lexer(js);
Lexer.ParsedToken parsedToken = null;
try {
while (true) {
item = lexer.getNextToken();
if (item.token == Token.EOF) {
parsedToken = lexer.getNextToken();
if (parsedToken.token == Token.EOF) {
break;
}
}
} catch (Exception e){
if (item != null) {
System.out.println("Issue occured at pos " + item.end + ", after\n" +
js.substring(Math.max(0, item.start - 50), item.end));
} catch (final Exception e){
if (parsedToken != null) {
throw new ParsingException("Issue occured at pos " + parsedToken.end + ", after\n" +
js.substring(Math.max(0, parsedToken.start - 50), parsedToken.end), e);
}
throw e;
}

View File

@ -1,71 +0,0 @@
package org.schabi.newpipe.extractor.utils;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.schabi.newpipe.extractor.utils.StringUtils.matchToClosingParenthesis;
public class StringUtilsTest {
@Test
public void actualDecodeFunction__success() {
String preNoise = "if(\"function\"===typeof b&&\"function\"===typeof c||\"function\"===typeof c&&\"function\"===typeof d)throw Error(\"It looks like you are passing several store enhancers to createStore(). This is not supported. Instead, compose them together to a single function.\");\"function\"===typeof b&&\"undefined\"===typeof c&&(c=b,b=void 0);if(\"undefined\"!==typeof c){if(\"function\"!==typeof c)throw Error(\"Expected the enhancer to be a function.\");return c(Dr)(a,b)}if(\"function\"!==typeof a)throw Error(\"Expected the reducer to be a function.\");\n" +
"var l=a,m=b,n=[],p=n,q=!1;h({type:Cr});a={};var t=(a.dispatch=h,a.subscribe=f,a.getState=e,a.replaceReducer=function(u){if(\"function\"!==typeof u)throw Error(\"Expected the nextReducer to be a function.\");l=u;h({type:hha});return t},a[Er]=function(){var u={};\n" +
"return u.subscribe=function(x){function y(){x.next&&x.next(e())}\n" +
"if(\"object\"!==typeof x||null===x)throw new TypeError(\"Expected the observer to be an object.\");y();return{unsubscribe:f(y)}},u[Er]=function(){return this},u},a);\n" +
"return t};\n" +
"Fr=function(a){De.call(this,a,-1,iha)};\n" +
"Gr=function(a){De.call(this,a)};\n" +
"jha=function(a,b){for(;Jd(b);)switch(b.C){case 10:var c=Od(b);Ge(a,1,c);break;case 18:c=Od(b);Ge(a,2,c);break;case 26:c=Od(b);Ge(a,3,c);break;case 34:c=Od(b);Ge(a,4,c);break;case 40:c=Hd(b.i);Ge(a,5,c);break;default:if(!we(b))return a}return a};";
String signature = "kha=function(a)";
String body = "{var b=a.split(\"\"),c=[-1186681497,-1653318181,372630254,function(d,e){for(var f=64,h=[];++f-h.length-32;){switch(f){case 58:f-=14;case 91:case 92:case 93:continue;case 123:f=47;case 94:case 95:case 96:continue;case 46:f=95}h.push(String.fromCharCode(f))}d.forEach(function(l,m,n){this.push(n[m]=h[(h.indexOf(l)-h.indexOf(this[m])+m-32+f--)%h.length])},e.split(\"\"))},\n" +
"-467738125,1158037010,function(d,e){e=(e%d.length+d.length)%d.length;var f=d[0];d[0]=d[e];d[e]=f},\n" +
"\"continue\",158531598,-172776392,function(d,e){e=(e%d.length+d.length)%d.length;d.splice(-e).reverse().forEach(function(f){d.unshift(f)})},\n" +
"-1753359936,function(d){for(var e=d.length;e;)d.push(d.splice(--e,1)[0])},\n" +
"1533713399,-1736576025,-1274201783,function(d){d.reverse()},\n" +
"169126570,1077517431,function(d,e){d.push(e)},\n" +
"-1807932259,-150219E3,480561184,-3495188,-1856307605,1416497372,b,-1034568435,-501230371,1979778585,null,b,-1049521459,function(d,e){e=(e%d.length+d.length)%d.length;d.splice(0,1,d.splice(e,1,d[0])[0])},\n" +
"1119056651,function(d,e){for(e=(e%d.length+d.length)%d.length;e--;)d.unshift(d.pop())},\n" +
"b,1460920438,135616752,-1807932259,-815823682,-387465417,1979778585,113585E4,function(d,e){d.push(e)},\n" +
"-1753359936,-241651400,-386043301,-144139513,null,null,function(d,e){e=(e%d.length+d.length)%d.length;d.splice(e,1)}];\n" +
"c[30]=c;c[49]=c;c[50]=c;try{c[51](c[26],c[25]),c[10](c[30],c[17]),c[5](c[28],c[9]),c[18](c[51]),c[14](c[19],c[21]),c[8](c[40],c[22]),c[50](c[35],c[28]),c[24](c[29],c[3]),c[0](c[31],c[19]),c[27](c[26],c[33]),c[29](c[36],c[40]),c[50](c[26]),c[27](c[32],c[9]),c[8](c[10],c[14]),c[35](c[44],c[28]),c[22](c[44],c[1]),c[8](c[11],c[3]),c[29](c[44]),c[21](c[41],c[45]),c[16](c[32],c[4]),c[17](c[14],c[26]),c[36](c[20],c[45]),c[43](c[35],c[39]),c[43](c[20],c[23]),c[43](c[10],c[51]),c[43](c[34],c[32]),c[29](c[34],\n" +
"c[49]),c[43](c[20],c[44]),c[49](c[20]),c[19](c[15],c[8]),c[36](c[15],c[46]),c[17](c[20],c[37]),c[18](c[10]),c[17](c[34],c[31]),c[19](c[10],c[30]),c[19](c[20],c[2]),c[36](c[20],c[21]),c[43](c[35],c[16]),c[19](c[35],c[5]),c[18](c[46],c[34])}catch(d){return\"enhanced_except_lJMB6-z-_w8_\"+a}return b.join(\"\")}";
String postNoise = "Hr=function(a){this.i=a}";
String substring = matchToClosingParenthesis(preNoise + '\n' + signature + body + ";" + postNoise, signature);
assertEquals(body, substring);
}
@Test
public void moreClosing__success() {
String expected = "{{{}}}";
String string = "a" + expected + "}}";
String substring = matchToClosingParenthesis(string, "a");
assertEquals(expected, substring);
}
@Disabled("Functionality currently not needed")
@Test
public void lessClosing__success() {
String expected = "{{{}}}";
String string = "a{{" + expected;
String substring = matchToClosingParenthesis(string, "a");
assertEquals(expected, substring);
}
@Test
void find_closing_with_quotes() {
final String expected = "{return \",}\\\"/\"}";
final String string = "function(d){return \",}\\\"/\"}";
final String substring = matchToClosingParenthesis(string, "function(d)");
assertEquals(expected, substring);
}
}