added documentation, small adjustments
This commit is contained in:
parent
8146a9be41
commit
f5ac3c747e
|
@ -3,7 +3,6 @@ package org.schabi.newpipe.extractor.services.youtube;
|
|||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.utils.JavaScript;
|
||||
import org.schabi.newpipe.extractor.utils.Parser;
|
||||
import org.schabi.newpipe.extractor.utils.StringUtils;
|
||||
import org.schabi.newpipe.extractor.utils.jsextractor.JavaScriptExtractor;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
@ -118,17 +117,12 @@ public final class YoutubeThrottlingDecrypter {
|
|||
|
||||
@Nonnull
|
||||
private static String parseDecodeFunction(final String playerJsCode, final String functionName)
|
||||
throws ParsingException {
|
||||
return parseWithLexer(playerJsCode, functionName);
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
private static String parseWithParenthesisMatching(final String playerJsCode,
|
||||
final String functionName) {
|
||||
final String functionBase = functionName + "=function";
|
||||
return validateFunction(functionBase
|
||||
+ StringUtils.matchToClosingParenthesis(playerJsCode, functionBase)
|
||||
+ ";");
|
||||
throws Parser.RegexException {
|
||||
try {
|
||||
return parseWithLexer(playerJsCode, functionName);
|
||||
} catch (final Exception e) {
|
||||
return parseWithRegex(playerJsCode, functionName);
|
||||
}
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
|
|
|
@ -1,91 +0,0 @@
|
|||
package org.schabi.newpipe.extractor.utils;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
|
||||
public final class StringUtils {
|
||||
|
||||
private StringUtils() {
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string The string to search in.
|
||||
* @param start A string from which to start searching.
|
||||
* @return A substring where each '{' matches a '}'.
|
||||
* @throws IndexOutOfBoundsException If {@code string} does not contain {@code start}
|
||||
* or parenthesis could not be matched .
|
||||
*/
|
||||
@Nonnull
|
||||
public static String matchToClosingParenthesis(@Nonnull final String string,
|
||||
@Nonnull final String start) {
|
||||
int startIndex = string.indexOf(start);
|
||||
if (startIndex < 0) {
|
||||
throw new IndexOutOfBoundsException();
|
||||
}
|
||||
|
||||
startIndex += start.length();
|
||||
int endIndex = findNextParenthesis(string, startIndex, true);
|
||||
++endIndex;
|
||||
|
||||
int openParenthesis = 1;
|
||||
while (openParenthesis > 0) {
|
||||
endIndex = findNextParenthesis(string, endIndex, false);
|
||||
|
||||
switch (string.charAt(endIndex)) {
|
||||
case '{':
|
||||
++openParenthesis;
|
||||
break;
|
||||
case '}':
|
||||
--openParenthesis;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
++endIndex;
|
||||
}
|
||||
|
||||
return string.substring(startIndex, endIndex);
|
||||
}
|
||||
|
||||
private static int findNextParenthesis(@Nonnull final String string,
|
||||
final int offset,
|
||||
final boolean onlyOpen) {
|
||||
boolean lastEscaped = false;
|
||||
char quote = ' ';
|
||||
|
||||
for (int i = offset; i < string.length(); i++) {
|
||||
boolean thisEscaped = false;
|
||||
final char c = string.charAt(i);
|
||||
|
||||
switch (c) {
|
||||
case '{':
|
||||
if (quote == ' ') {
|
||||
return i;
|
||||
}
|
||||
break;
|
||||
case '}':
|
||||
if (!onlyOpen && quote == ' ') {
|
||||
return i;
|
||||
}
|
||||
break;
|
||||
case '\\':
|
||||
if (!lastEscaped) {
|
||||
thisEscaped = true;
|
||||
}
|
||||
break;
|
||||
case '\'':
|
||||
case '"':
|
||||
if (!lastEscaped) {
|
||||
if (quote == ' ') {
|
||||
quote = c;
|
||||
} else if (quote == c) {
|
||||
quote = ' ';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lastEscaped = thisEscaped;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
}
|
|
@ -4,34 +4,46 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
|||
|
||||
import javax.annotation.Nonnull;
|
||||
|
||||
/**
|
||||
* Utility class for extracting functions from JavaScript code.
|
||||
*/
|
||||
public final class JavaScriptExtractor {
|
||||
private JavaScriptExtractor() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Searches the given JavaScript code for the identifier of a function
|
||||
* and returns its body.
|
||||
*
|
||||
* @param jsCode JavaScript code
|
||||
* @param start start of the function (without the opening brace)
|
||||
* @return extracted code (opening brace + function + closing brace)
|
||||
* @throws ParsingException
|
||||
*/
|
||||
@Nonnull
|
||||
public static String matchToClosingBrace(final String playerJsCode, final String start)
|
||||
public static String matchToClosingBrace(final String jsCode, final String start)
|
||||
throws ParsingException {
|
||||
int startIndex = playerJsCode.indexOf(start);
|
||||
int startIndex = jsCode.indexOf(start);
|
||||
if (startIndex < 0) {
|
||||
throw new ParsingException("start not found");
|
||||
throw new ParsingException("Start not found");
|
||||
}
|
||||
startIndex += start.length();
|
||||
final String js = playerJsCode.substring(startIndex);
|
||||
final String js = jsCode.substring(startIndex);
|
||||
|
||||
final Lexer lexer = new Lexer(js);
|
||||
boolean visitedOpenBrace = false;
|
||||
|
||||
while (true) {
|
||||
final Lexer.Item item = lexer.getNextToken();
|
||||
final Token t = item.token;
|
||||
final Lexer.ParsedToken parsedToken = lexer.getNextToken();
|
||||
final Token t = parsedToken.token;
|
||||
|
||||
if (t == Token.LC) {
|
||||
visitedOpenBrace = true;
|
||||
} else if (visitedOpenBrace && lexer.isBalanced()) {
|
||||
return js.substring(0, item.end);
|
||||
return js.substring(0, parsedToken.end);
|
||||
} else if (t == Token.EOF) {
|
||||
throw new ParsingException("could not find matching braces");
|
||||
throw new ParsingException("Could not find matching braces");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,6 +5,15 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
|||
|
||||
import java.util.Stack;
|
||||
|
||||
/**
|
||||
* JavaScript lexer that is able to parse JavaScript code and return its
|
||||
* tokens.
|
||||
*
|
||||
* <p>
|
||||
* The algorithm for distinguishing between division operators and regex literals
|
||||
* was taken from the <a href="https://github.com/rusty-ecma/RESS/">RESS lexer</a>.
|
||||
* </p>
|
||||
*/
|
||||
public class Lexer {
|
||||
private static class Paren {
|
||||
public final boolean funcExpr;
|
||||
|
@ -95,12 +104,15 @@ public class Lexer {
|
|||
}
|
||||
}
|
||||
|
||||
public static class Item {
|
||||
/**
|
||||
* Parsed token, containing the token and its position in the input string
|
||||
*/
|
||||
public static class ParsedToken {
|
||||
public final Token token;
|
||||
public final int start;
|
||||
public final int end;
|
||||
|
||||
Item(final Token token, final int start, final int end) {
|
||||
ParsedToken(final Token token, final int start, final int end) {
|
||||
this.token = token;
|
||||
this.start = start;
|
||||
this.end = end;
|
||||
|
@ -112,6 +124,12 @@ public class Lexer {
|
|||
private final Stack<Brace> braceStack;
|
||||
private final Stack<Paren> parenStack;
|
||||
|
||||
/**
|
||||
* Create a new JavaScript lexer with the given source code
|
||||
*
|
||||
* @param js JavaScript code
|
||||
* @param languageVersion JavaScript version (from Rhino)
|
||||
*/
|
||||
public Lexer(final String js, final int languageVersion) {
|
||||
stream = new TokenStream(js, 0, languageVersion);
|
||||
lastThree = new LookBehind();
|
||||
|
@ -119,11 +137,21 @@ public class Lexer {
|
|||
parenStack = new Stack<>();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new JavaScript lexer with the given source code
|
||||
*
|
||||
* @param js JavaScript code
|
||||
*/
|
||||
public Lexer(final String js) {
|
||||
this(js, Context.VERSION_DEFAULT);
|
||||
}
|
||||
|
||||
public Item getNextToken() throws ParsingException {
|
||||
/**
|
||||
* Continue parsing and return the next token
|
||||
* @return next token
|
||||
* @throws ParsingException
|
||||
*/
|
||||
public ParsedToken getNextToken() throws ParsingException {
|
||||
Token token = stream.nextToken();
|
||||
|
||||
if ((token == Token.DIV || token == Token.ASSIGN_DIV) && isRegexStart()) {
|
||||
|
@ -131,11 +159,15 @@ public class Lexer {
|
|||
token = Token.REGEXP;
|
||||
}
|
||||
|
||||
final Item item = new Item(token, stream.tokenBeg, stream.tokenEnd);
|
||||
keepBooks(item);
|
||||
return item;
|
||||
final ParsedToken parsedToken = new ParsedToken(token, stream.tokenBeg, stream.tokenEnd);
|
||||
keepBooks(parsedToken);
|
||||
return parsedToken;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the parser is balanced (equal amount of open and closed parentheses and braces)
|
||||
* @return true if balanced
|
||||
*/
|
||||
public boolean isBalanced() {
|
||||
return braceStack.isEmpty() && parenStack.isEmpty();
|
||||
}
|
||||
|
@ -144,9 +176,9 @@ public class Lexer {
|
|||
* Evaluate the token for possible regex start and handle updating the
|
||||
* `self.last_three`, `self.paren_stack` and `self.brace_stack`
|
||||
*/
|
||||
void keepBooks(final Item item) throws ParsingException {
|
||||
if (item.token.isPunct) {
|
||||
switch (item.token) {
|
||||
void keepBooks(final ParsedToken parsedToken) throws ParsingException {
|
||||
if (parsedToken.token.isPunct) {
|
||||
switch (parsedToken.token) {
|
||||
case LP:
|
||||
handleOpenParenBooks();
|
||||
return;
|
||||
|
@ -154,15 +186,15 @@ public class Lexer {
|
|||
handleOpenBraceBooks();
|
||||
return;
|
||||
case RP:
|
||||
handleCloseParenBooks(item.start);
|
||||
handleCloseParenBooks(parsedToken.start);
|
||||
return;
|
||||
case RC:
|
||||
handleCloseBraceBooks(item.start);
|
||||
handleCloseBraceBooks(parsedToken.start);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (item.token != Token.COMMENT) {
|
||||
lastThree.push(new MetaToken(item.token, stream.lineno));
|
||||
if (parsedToken.token != Token.COMMENT) {
|
||||
lastThree.push(new MetaToken(parsedToken.token, stream.lineno));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -9,43 +9,42 @@ import org.schabi.newpipe.extractor.utils.jsextractor.Token;
|
|||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.schabi.newpipe.FileUtils.resolveTestResource;
|
||||
|
||||
public class JavaScriptExtractorTest
|
||||
{
|
||||
@Test
|
||||
public void testJsExtractor() throws ParsingException {
|
||||
void testJsExtractor() throws ParsingException {
|
||||
final String src = "Wka=function(d){var x = [/,,/,913,/(,)}/g,\"abcdef}\\\"\",];var y = 10/2/1;return x[1][y];}//some={}random-padding+;";
|
||||
final String result = JavaScriptExtractor.matchToClosingBrace(src, "Wka=function");
|
||||
assertEquals("(d){var x = [/,,/,913,/(,)}/g,\"abcdef}\\\"\",];var y = 10/2/1;return x[1][y];}", result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEverythingJs() throws ParsingException, IOException {
|
||||
File jsFile = resolveTestResource("es5.js");
|
||||
StringBuilder contentBuilder = new StringBuilder();
|
||||
Stream<String> stream = Files.lines(jsFile.toPath());
|
||||
stream.forEach(s -> contentBuilder.append(s).append("\n"));
|
||||
void testEverythingJs() throws ParsingException, IOException {
|
||||
final File jsFile = resolveTestResource("es5.js");
|
||||
final StringBuilder contentBuilder = new StringBuilder();
|
||||
Files.lines(jsFile.toPath()).forEach(line -> contentBuilder.append(line).append("\n"));
|
||||
|
||||
final String js = contentBuilder.toString();
|
||||
|
||||
Lexer lexer = new Lexer(js);
|
||||
Lexer.Item item = null;
|
||||
final Lexer lexer = new Lexer(js);
|
||||
Lexer.ParsedToken parsedToken = null;
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
item = lexer.getNextToken();
|
||||
if (item.token == Token.EOF) {
|
||||
parsedToken = lexer.getNextToken();
|
||||
if (parsedToken.token == Token.EOF) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch (Exception e){
|
||||
if (item != null) {
|
||||
System.out.println("Issue occured at pos " + item.end + ", after\n" +
|
||||
js.substring(Math.max(0, item.start - 50), item.end));
|
||||
} catch (final Exception e){
|
||||
if (parsedToken != null) {
|
||||
throw new ParsingException("Issue occured at pos " + parsedToken.end + ", after\n" +
|
||||
js.substring(Math.max(0, parsedToken.start - 50), parsedToken.end), e);
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
|
|
|
@ -1,71 +0,0 @@
|
|||
package org.schabi.newpipe.extractor.utils;
|
||||
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.schabi.newpipe.extractor.utils.StringUtils.matchToClosingParenthesis;
|
||||
|
||||
public class StringUtilsTest {
|
||||
|
||||
@Test
|
||||
public void actualDecodeFunction__success() {
|
||||
String preNoise = "if(\"function\"===typeof b&&\"function\"===typeof c||\"function\"===typeof c&&\"function\"===typeof d)throw Error(\"It looks like you are passing several store enhancers to createStore(). This is not supported. Instead, compose them together to a single function.\");\"function\"===typeof b&&\"undefined\"===typeof c&&(c=b,b=void 0);if(\"undefined\"!==typeof c){if(\"function\"!==typeof c)throw Error(\"Expected the enhancer to be a function.\");return c(Dr)(a,b)}if(\"function\"!==typeof a)throw Error(\"Expected the reducer to be a function.\");\n" +
|
||||
"var l=a,m=b,n=[],p=n,q=!1;h({type:Cr});a={};var t=(a.dispatch=h,a.subscribe=f,a.getState=e,a.replaceReducer=function(u){if(\"function\"!==typeof u)throw Error(\"Expected the nextReducer to be a function.\");l=u;h({type:hha});return t},a[Er]=function(){var u={};\n" +
|
||||
"return u.subscribe=function(x){function y(){x.next&&x.next(e())}\n" +
|
||||
"if(\"object\"!==typeof x||null===x)throw new TypeError(\"Expected the observer to be an object.\");y();return{unsubscribe:f(y)}},u[Er]=function(){return this},u},a);\n" +
|
||||
"return t};\n" +
|
||||
"Fr=function(a){De.call(this,a,-1,iha)};\n" +
|
||||
"Gr=function(a){De.call(this,a)};\n" +
|
||||
"jha=function(a,b){for(;Jd(b);)switch(b.C){case 10:var c=Od(b);Ge(a,1,c);break;case 18:c=Od(b);Ge(a,2,c);break;case 26:c=Od(b);Ge(a,3,c);break;case 34:c=Od(b);Ge(a,4,c);break;case 40:c=Hd(b.i);Ge(a,5,c);break;default:if(!we(b))return a}return a};";
|
||||
String signature = "kha=function(a)";
|
||||
String body = "{var b=a.split(\"\"),c=[-1186681497,-1653318181,372630254,function(d,e){for(var f=64,h=[];++f-h.length-32;){switch(f){case 58:f-=14;case 91:case 92:case 93:continue;case 123:f=47;case 94:case 95:case 96:continue;case 46:f=95}h.push(String.fromCharCode(f))}d.forEach(function(l,m,n){this.push(n[m]=h[(h.indexOf(l)-h.indexOf(this[m])+m-32+f--)%h.length])},e.split(\"\"))},\n" +
|
||||
"-467738125,1158037010,function(d,e){e=(e%d.length+d.length)%d.length;var f=d[0];d[0]=d[e];d[e]=f},\n" +
|
||||
"\"continue\",158531598,-172776392,function(d,e){e=(e%d.length+d.length)%d.length;d.splice(-e).reverse().forEach(function(f){d.unshift(f)})},\n" +
|
||||
"-1753359936,function(d){for(var e=d.length;e;)d.push(d.splice(--e,1)[0])},\n" +
|
||||
"1533713399,-1736576025,-1274201783,function(d){d.reverse()},\n" +
|
||||
"169126570,1077517431,function(d,e){d.push(e)},\n" +
|
||||
"-1807932259,-150219E3,480561184,-3495188,-1856307605,1416497372,b,-1034568435,-501230371,1979778585,null,b,-1049521459,function(d,e){e=(e%d.length+d.length)%d.length;d.splice(0,1,d.splice(e,1,d[0])[0])},\n" +
|
||||
"1119056651,function(d,e){for(e=(e%d.length+d.length)%d.length;e--;)d.unshift(d.pop())},\n" +
|
||||
"b,1460920438,135616752,-1807932259,-815823682,-387465417,1979778585,113585E4,function(d,e){d.push(e)},\n" +
|
||||
"-1753359936,-241651400,-386043301,-144139513,null,null,function(d,e){e=(e%d.length+d.length)%d.length;d.splice(e,1)}];\n" +
|
||||
"c[30]=c;c[49]=c;c[50]=c;try{c[51](c[26],c[25]),c[10](c[30],c[17]),c[5](c[28],c[9]),c[18](c[51]),c[14](c[19],c[21]),c[8](c[40],c[22]),c[50](c[35],c[28]),c[24](c[29],c[3]),c[0](c[31],c[19]),c[27](c[26],c[33]),c[29](c[36],c[40]),c[50](c[26]),c[27](c[32],c[9]),c[8](c[10],c[14]),c[35](c[44],c[28]),c[22](c[44],c[1]),c[8](c[11],c[3]),c[29](c[44]),c[21](c[41],c[45]),c[16](c[32],c[4]),c[17](c[14],c[26]),c[36](c[20],c[45]),c[43](c[35],c[39]),c[43](c[20],c[23]),c[43](c[10],c[51]),c[43](c[34],c[32]),c[29](c[34],\n" +
|
||||
"c[49]),c[43](c[20],c[44]),c[49](c[20]),c[19](c[15],c[8]),c[36](c[15],c[46]),c[17](c[20],c[37]),c[18](c[10]),c[17](c[34],c[31]),c[19](c[10],c[30]),c[19](c[20],c[2]),c[36](c[20],c[21]),c[43](c[35],c[16]),c[19](c[35],c[5]),c[18](c[46],c[34])}catch(d){return\"enhanced_except_lJMB6-z-_w8_\"+a}return b.join(\"\")}";
|
||||
String postNoise = "Hr=function(a){this.i=a}";
|
||||
|
||||
String substring = matchToClosingParenthesis(preNoise + '\n' + signature + body + ";" + postNoise, signature);
|
||||
|
||||
assertEquals(body, substring);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void moreClosing__success() {
|
||||
String expected = "{{{}}}";
|
||||
String string = "a" + expected + "}}";
|
||||
|
||||
String substring = matchToClosingParenthesis(string, "a");
|
||||
|
||||
assertEquals(expected, substring);
|
||||
}
|
||||
|
||||
@Disabled("Functionality currently not needed")
|
||||
@Test
|
||||
public void lessClosing__success() {
|
||||
String expected = "{{{}}}";
|
||||
String string = "a{{" + expected;
|
||||
|
||||
String substring = matchToClosingParenthesis(string, "a");
|
||||
|
||||
assertEquals(expected, substring);
|
||||
}
|
||||
|
||||
@Test
|
||||
void find_closing_with_quotes() {
|
||||
final String expected = "{return \",}\\\"/\"}";
|
||||
final String string = "function(d){return \",}\\\"/\"}";
|
||||
|
||||
final String substring = matchToClosingParenthesis(string, "function(d)");
|
||||
|
||||
assertEquals(expected, substring);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue