diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecrypter.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecrypter.java
index 8160e6315..f812e7007 100644
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecrypter.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecrypter.java
@@ -3,7 +3,7 @@ package org.schabi.newpipe.extractor.services.youtube;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.utils.JavaScript;
import org.schabi.newpipe.extractor.utils.Parser;
-import org.schabi.newpipe.extractor.utils.StringUtils;
+import org.schabi.newpipe.extractor.utils.jsextractor.JavaScriptExtractor;
import java.util.HashMap;
import java.util.Map;
@@ -119,21 +119,12 @@ public final class YoutubeThrottlingDecrypter {
private static String parseDecodeFunction(final String playerJsCode, final String functionName)
throws Parser.RegexException {
try {
- return parseWithParenthesisMatching(playerJsCode, functionName);
+ return parseWithLexer(playerJsCode, functionName);
} catch (final Exception e) {
return parseWithRegex(playerJsCode, functionName);
}
}
- @Nonnull
- private static String parseWithParenthesisMatching(final String playerJsCode,
- final String functionName) {
- final String functionBase = functionName + "=function";
- return validateFunction(functionBase
- + StringUtils.matchToClosingParenthesis(playerJsCode, functionBase)
- + ";");
- }
-
@Nonnull
private static String parseWithRegex(final String playerJsCode, final String functionName)
throws Parser.RegexException {
@@ -153,6 +144,14 @@ public final class YoutubeThrottlingDecrypter {
return function;
}
+ @Nonnull
+ private static String parseWithLexer(final String playerJsCode, final String functionName)
+ throws ParsingException {
+ final String functionBase = functionName + "=function";
+ return functionBase + JavaScriptExtractor.matchToClosingBrace(playerJsCode, functionBase)
+ + ";";
+ }
+
private static boolean containsNParam(final String url) {
return Parser.isMatch(N_PARAM_PATTERN, url);
}
diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/StringUtils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/StringUtils.java
deleted file mode 100644
index 9a6091a4d..000000000
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/StringUtils.java
+++ /dev/null
@@ -1,91 +0,0 @@
-package org.schabi.newpipe.extractor.utils;
-
-import javax.annotation.Nonnull;
-
-public final class StringUtils {
-
- private StringUtils() {
- }
-
- /**
- * @param string The string to search in.
- * @param start A string from which to start searching.
- * @return A substring where each '{' matches a '}'.
- * @throws IndexOutOfBoundsException If {@code string} does not contain {@code start}
- * or parenthesis could not be matched .
- */
- @Nonnull
- public static String matchToClosingParenthesis(@Nonnull final String string,
- @Nonnull final String start) {
- int startIndex = string.indexOf(start);
- if (startIndex < 0) {
- throw new IndexOutOfBoundsException();
- }
-
- startIndex += start.length();
- int endIndex = findNextParenthesis(string, startIndex, true);
- ++endIndex;
-
- int openParenthesis = 1;
- while (openParenthesis > 0) {
- endIndex = findNextParenthesis(string, endIndex, false);
-
- switch (string.charAt(endIndex)) {
- case '{':
- ++openParenthesis;
- break;
- case '}':
- --openParenthesis;
- break;
- default:
- break;
- }
- ++endIndex;
- }
-
- return string.substring(startIndex, endIndex);
- }
-
- private static int findNextParenthesis(@Nonnull final String string,
- final int offset,
- final boolean onlyOpen) {
- boolean lastEscaped = false;
- char quote = ' ';
-
- for (int i = offset; i < string.length(); i++) {
- boolean thisEscaped = false;
- final char c = string.charAt(i);
-
- switch (c) {
- case '{':
- if (quote == ' ') {
- return i;
- }
- break;
- case '}':
- if (!onlyOpen && quote == ' ') {
- return i;
- }
- break;
- case '\\':
- if (!lastEscaped) {
- thisEscaped = true;
- }
- break;
- case '\'':
- case '"':
- if (!lastEscaped) {
- if (quote == ' ') {
- quote = c;
- } else if (quote == c) {
- quote = ' ';
- }
- }
- }
-
- lastEscaped = thisEscaped;
- }
-
- return -1;
- }
-}
diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/jsextractor/JavaScriptExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/jsextractor/JavaScriptExtractor.java
new file mode 100644
index 000000000..da2aadac3
--- /dev/null
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/jsextractor/JavaScriptExtractor.java
@@ -0,0 +1,50 @@
+package org.schabi.newpipe.extractor.utils.jsextractor;
+
+import org.schabi.newpipe.extractor.exceptions.ParsingException;
+
+import javax.annotation.Nonnull;
+
+/**
+ * Utility class for extracting functions from JavaScript code.
+ */
+public final class JavaScriptExtractor {
+ private JavaScriptExtractor() {
+
+ }
+
+ /**
+ * Searches the given JavaScript code for the identifier of a function
+ * and returns its body.
+ *
+ * @param jsCode JavaScript code
+ * @param start start of the function (without the opening brace)
+ * @return extracted code (opening brace + function + closing brace)
+ * @throws ParsingException
+ */
+ @Nonnull
+ public static String matchToClosingBrace(final String jsCode, final String start)
+ throws ParsingException {
+ int startIndex = jsCode.indexOf(start);
+ if (startIndex < 0) {
+ throw new ParsingException("Start not found");
+ }
+ startIndex += start.length();
+ final String js = jsCode.substring(startIndex);
+
+ final Lexer lexer = new Lexer(js);
+ boolean visitedOpenBrace = false;
+
+ while (true) {
+ final Lexer.ParsedToken parsedToken = lexer.getNextToken();
+ final Token t = parsedToken.token;
+
+ if (t == Token.LC) {
+ visitedOpenBrace = true;
+ } else if (visitedOpenBrace && lexer.isBalanced()) {
+ return js.substring(0, parsedToken.end);
+ } else if (t == Token.EOF) {
+ throw new ParsingException("Could not find matching braces");
+ }
+ }
+ }
+}
diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/jsextractor/Lexer.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/jsextractor/Lexer.java
new file mode 100644
index 000000000..b92a850cf
--- /dev/null
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/jsextractor/Lexer.java
@@ -0,0 +1,311 @@
+package org.schabi.newpipe.extractor.utils.jsextractor;
+
+import org.mozilla.javascript.Context;
+import org.schabi.newpipe.extractor.exceptions.ParsingException;
+
+import java.util.Stack;
+
+/**
+ * JavaScript lexer that is able to parse JavaScript code and return its
+ * tokens.
+ *
+ *
+ * The algorithm for distinguishing between division operators and regex literals
+ * was taken from the RESS lexer.
+ *
+ */
+public class Lexer {
+ private static class Paren {
+ public final boolean funcExpr;
+ public final boolean conditional;
+
+ Paren(final boolean funcExpr, final boolean conditional) {
+ this.funcExpr = funcExpr;
+ this.conditional = conditional;
+ }
+ }
+
+ private static class Brace {
+ public final boolean isBlock;
+ public final Paren paren;
+
+ Brace(final boolean isBlock, final Paren paren) {
+ this.isBlock = isBlock;
+ this.paren = paren;
+ }
+ }
+
+ private static class MetaToken {
+ public final Token token;
+ public final int lineno;
+
+ MetaToken(final Token token, final int lineno) {
+ this.token = token;
+ this.lineno = lineno;
+ }
+ }
+
+ private static class BraceMetaToken extends MetaToken {
+ public final Brace brace;
+
+ BraceMetaToken(final Token token, final int lineno, final Brace brace) {
+ super(token, lineno);
+ this.brace = brace;
+ }
+ }
+
+ private static class ParenMetaToken extends MetaToken {
+ public final Paren paren;
+
+ ParenMetaToken(final Token token, final int lineno, final Paren paren) {
+ super(token, lineno);
+ this.paren = paren;
+ }
+ }
+
+ private static class LookBehind {
+ private final MetaToken[] list;
+
+ LookBehind() {
+ list = new MetaToken[3];
+ }
+
+ void push(final MetaToken t) {
+ MetaToken toShift = t;
+ for (int i = 0; i < 3; i++) {
+ final MetaToken tmp = list[i];
+ list[i] = toShift;
+ toShift = tmp;
+ }
+ }
+
+ MetaToken one() {
+ return list[0];
+ }
+
+ MetaToken two() {
+ return list[1];
+ }
+
+ MetaToken three() {
+ return list[2];
+ }
+
+ boolean oneIs(final Token token) {
+ return list[0] != null && list[0].token == token;
+ }
+
+ boolean twoIs(final Token token) {
+ return list[1] != null && list[1].token == token;
+ }
+
+ boolean threeIs(final Token token) {
+ return list[2] != null && list[2].token == token;
+ }
+ }
+
+ /**
+ * Parsed token, containing the token and its position in the input string
+ */
+ public static class ParsedToken {
+ public final Token token;
+ public final int start;
+ public final int end;
+
+ ParsedToken(final Token token, final int start, final int end) {
+ this.token = token;
+ this.start = start;
+ this.end = end;
+ }
+ }
+
+ private final TokenStream stream;
+ private final LookBehind lastThree;
+ private final Stack braceStack;
+ private final Stack parenStack;
+
+ /**
+ * Create a new JavaScript lexer with the given source code
+ *
+ * @param js JavaScript code
+ * @param languageVersion JavaScript version (from Rhino)
+ */
+ public Lexer(final String js, final int languageVersion) {
+ stream = new TokenStream(js, 0, languageVersion);
+ lastThree = new LookBehind();
+ braceStack = new Stack<>();
+ parenStack = new Stack<>();
+ }
+
+ /**
+ * Create a new JavaScript lexer with the given source code
+ *
+ * @param js JavaScript code
+ */
+ public Lexer(final String js) {
+ this(js, Context.VERSION_DEFAULT);
+ }
+
+ /**
+ * Continue parsing and return the next token
+ * @return next token
+ * @throws ParsingException
+ */
+ public ParsedToken getNextToken() throws ParsingException {
+ Token token = stream.nextToken();
+
+ if ((token == Token.DIV || token == Token.ASSIGN_DIV) && isRegexStart()) {
+ stream.readRegExp(token);
+ token = Token.REGEXP;
+ }
+
+ final ParsedToken parsedToken = new ParsedToken(token, stream.tokenBeg, stream.tokenEnd);
+ keepBooks(parsedToken);
+ return parsedToken;
+ }
+
+ /**
+ * Check if the parser is balanced (equal amount of open and closed parentheses and braces)
+ * @return true if balanced
+ */
+ public boolean isBalanced() {
+ return braceStack.isEmpty() && parenStack.isEmpty();
+ }
+
+ /**
+ * Evaluate the token for possible regex start and handle updating the
+ * `self.last_three`, `self.paren_stack` and `self.brace_stack`
+ */
+ void keepBooks(final ParsedToken parsedToken) throws ParsingException {
+ if (parsedToken.token.isPunct) {
+ switch (parsedToken.token) {
+ case LP:
+ handleOpenParenBooks();
+ return;
+ case LC:
+ handleOpenBraceBooks();
+ return;
+ case RP:
+ handleCloseParenBooks(parsedToken.start);
+ return;
+ case RC:
+ handleCloseBraceBooks(parsedToken.start);
+ return;
+ }
+ }
+ if (parsedToken.token != Token.COMMENT) {
+ lastThree.push(new MetaToken(parsedToken.token, stream.lineno));
+ }
+ }
+
+ /**
+ * Handle the book keeping when we find an `(`
+ */
+ void handleOpenParenBooks() {
+ boolean funcExpr = false;
+ if (lastThree.oneIs(Token.FUNCTION)) {
+ funcExpr = lastThree.two() != null && checkForExpression(lastThree.two().token);
+ } else if (lastThree.twoIs(Token.FUNCTION)) {
+ funcExpr = lastThree.three() != null && checkForExpression(lastThree.three().token);
+ }
+
+ final boolean conditional = lastThree.one() != null
+ && lastThree.one().token.isConditional();
+
+ final Paren paren = new Paren(funcExpr, conditional);
+ parenStack.push(paren);
+ lastThree.push(new ParenMetaToken(Token.LP, stream.lineno, paren));
+ }
+
+ /**
+ * Handle the book keeping when we find an `{`
+ */
+ void handleOpenBraceBooks() {
+ boolean isBlock = true;
+ if (lastThree.one() != null) {
+ switch (lastThree.one().token) {
+ case LP:
+ case LC:
+ case CASE:
+ isBlock = false;
+ break;
+ case COLON:
+ isBlock = !braceStack.isEmpty() && braceStack.lastElement().isBlock;
+ break;
+ case RETURN:
+ case YIELD:
+ case YIELD_STAR:
+ isBlock = lastThree.two() != null && lastThree.two().lineno != stream.lineno;
+ break;
+ default:
+ isBlock = !lastThree.one().token.isOp;
+ }
+ }
+
+ Paren paren = null;
+ if (lastThree.one() instanceof ParenMetaToken && lastThree.one().token == Token.RP) {
+ paren = ((ParenMetaToken) lastThree.one()).paren;
+ }
+ final Brace brace = new Brace(isBlock, paren);
+ braceStack.push(brace);
+ lastThree.push(new BraceMetaToken(Token.LC, stream.lineno, brace));
+ }
+
+ /**
+ * Handle the book keeping when we find an `)`
+ */
+ void handleCloseParenBooks(final int start) throws ParsingException {
+ if (parenStack.isEmpty()) {
+ throw new ParsingException("unmached closing paren at " + start);
+ }
+ lastThree.push(new ParenMetaToken(Token.RP, stream.lineno, parenStack.pop()));
+ }
+
+ /**
+ * Handle the book keeping when we find an `}`
+ */
+ void handleCloseBraceBooks(final int start) throws ParsingException {
+ if (braceStack.isEmpty()) {
+ throw new ParsingException("unmatched closing brace at " + start);
+ }
+ lastThree.push(new BraceMetaToken(Token.RC, stream.lineno, braceStack.pop()));
+ }
+
+ boolean checkForExpression(final Token token) {
+ return token.isOp || token == Token.RETURN || token == Token.CASE;
+ }
+
+ /**
+ * Detect if the `/` is the beginning of a regex or is division
+ * see this for more details
+ *
+ * @return isRegexStart
+ */
+ boolean isRegexStart() {
+ if (lastThree.one() != null) {
+ final Token t = lastThree.one().token;
+ if (t.isKeyw) {
+ return t != Token.THIS;
+ } else if (t == Token.RP && lastThree.one() instanceof ParenMetaToken) {
+ return ((ParenMetaToken) lastThree.one()).paren.conditional;
+ } else if (t == Token.RC && lastThree.one() instanceof BraceMetaToken) {
+ final BraceMetaToken mt = (BraceMetaToken) lastThree.one();
+ if (mt.brace.isBlock) {
+ if (mt.brace.paren != null) {
+ return !mt.brace.paren.funcExpr;
+ } else {
+ return true;
+ }
+ } else {
+ return false;
+ }
+ } else if (t.isPunct) {
+ return t != Token.RB;
+ } else {
+ return false;
+ }
+ }
+
+ return true;
+ }
+}
diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/jsextractor/Token.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/jsextractor/Token.java
new file mode 100644
index 000000000..2c4fb414a
--- /dev/null
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/jsextractor/Token.java
@@ -0,0 +1,121 @@
+package org.schabi.newpipe.extractor.utils.jsextractor;
+
+public enum Token {
+ ERROR,
+ EOF,
+ EOL,
+ RETURN(false, false, true),
+ BITOR(true, true, false),
+ BITXOR(true, true, false),
+ BITAND(true, true, false),
+ EQ(true, true, false),
+ NE(true, true, false),
+ LT(true, true, false),
+ LE(true, true, false),
+ GT(true, true, false),
+ GE(true, true, false),
+ LSH(true, true, false),
+ RSH(true, true, false),
+ URSH(true, true, false),
+ ADD(true, true, false),
+ SUB(true, true, false),
+ MUL(true, true, false),
+ DIV(true, true, false),
+ MOD(true, true, false),
+ NOT(true, true, false),
+ BITNOT(true, true, false),
+ NEW(true, false, true),
+ DELPROP(true, false, true),
+ TYPEOF(true, false, true),
+ NAME,
+ NUMBER,
+ STRING,
+ NULL(false, false, true),
+ THIS(false, false, true),
+ FALSE(false, false, true),
+ TRUE(false, false, true),
+ SHEQ(true, true, false), // shallow equality (===)
+ SHNE(true, true, false), // shallow inequality (!==)
+ REGEXP,
+ THROW(true, false, true),
+ IN(true, false, true),
+ INSTANCEOF(true, false, true),
+ YIELD(false, false, true), // JS 1.7 yield pseudo keyword
+ EXP(true, true, false), // Exponentiation Operator
+ BIGINT, // ES2020 BigInt
+ TRY(false, false, true),
+ SEMI(false, true, false), // semicolon
+ LB(false, true, false), // left and right brackets
+ RB(false, true, false),
+ LC(false, true, false), // left and right curlies (braces)
+ RC(false, true, false),
+ LP(false, true, false), // left and right parentheses
+ RP(false, true, false),
+ COMMA(false, true, false), // comma operator
+ ASSIGN(true, true, false), // simple assignment (=)
+ ASSIGN_BITOR(true, true, false), // |=
+ ASSIGN_BITXOR(true, true, false), // ^=
+ ASSIGN_BITAND(true, true, false), // |=
+ ASSIGN_LSH(true, true, false), // <<=
+ ASSIGN_RSH(true, true, false), // >>=
+ ASSIGN_URSH(true, true, false), // >>>=
+ ASSIGN_ADD(true, true, false), // +=
+ ASSIGN_SUB(true, true, false), // -=
+ ASSIGN_MUL(true, true, false), // *=
+ ASSIGN_DIV(true, true, false), // /=
+ ASSIGN_MOD(true, true, false), // %=
+ ASSIGN_EXP(true, true, false), // **=
+ HOOK(true, true, false), // conditional (?:)
+ COLON(true, true, false),
+ OR(true, true, false), // logical or (||)
+ AND(true, true, false), // logical and (&&)
+ INC(true, true, false), // increment/decrement (++ --)
+ DEC(true, true, false),
+ DOT(false, true, false), // member operator (.)
+ FUNCTION(false, false, true), // function keyword
+ EXPORT(false, false, true), // export keyword
+ IMPORT(false, false, true), // import keyword
+ IF(false, false, true), // if keyword
+ ELSE(false, false, true), // else keyword
+ SWITCH(false, false, true), // switch keyword
+ CASE(false, false, true), // case keyword
+ DEFAULT(false, false, true), // default keyword
+ WHILE(false, false, true), // while keyword
+ DO(false, false, true), // do keyword
+ FOR(false, false, true), // for keyword
+ BREAK(false, false, true), // break keyword
+ CONTINUE(false, false, true), // continue keyword
+ VAR(false, false, true), // var keyword
+ WITH(false, false, true), // with keyword
+ CATCH(false, false, true), // catch keyword
+ FINALLY(false, false, true), // finally keyword
+ VOID(true, false, true), // void keyword
+ RESERVED(false, false, true), // reserved keywords
+ LET(false, false, true), // JS 1.7 let pseudo keyword
+ CONST(false, false, true),
+ DEBUGGER(false, false, true),
+ COMMENT,
+ ARROW(false, true, false), // ES6 ArrowFunction
+ YIELD_STAR(false, false, true), // ES6 "yield *", a specialization of yield
+ TEMPLATE_LITERAL; // template literal
+
+ public final boolean isOp;
+ public final boolean isPunct;
+ public final boolean isKeyw;
+
+ Token(final boolean isOp, final boolean isPunct, final boolean isKeyw) {
+ this.isOp = isOp;
+ this.isPunct = isPunct;
+ this.isKeyw = isKeyw;
+ }
+
+ Token() {
+ this.isOp = false;
+ this.isPunct = false;
+ this.isKeyw = false;
+ }
+
+ public boolean isConditional() {
+ return this == IF || this == FOR || this == WHILE || this == WITH;
+ }
+}
diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/jsextractor/TokenStream.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/jsextractor/TokenStream.java
new file mode 100644
index 000000000..81651d227
--- /dev/null
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/jsextractor/TokenStream.java
@@ -0,0 +1,1161 @@
+package org.schabi.newpipe.extractor.utils.jsextractor;
+
+import org.mozilla.javascript.Context;
+import org.mozilla.javascript.Kit;
+import org.mozilla.javascript.ObjToIntMap;
+import org.mozilla.javascript.ScriptRuntime;
+import org.schabi.newpipe.extractor.exceptions.ParsingException;
+
+/* Source: Mozilla Rhino, org.mozilla.javascript.Token
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ * */
+class TokenStream {
+ /*
+ * For chars - because we need something out-of-range
+ * to check. (And checking EOF by exception is annoying.)
+ * Note distinction from EOF token type!
+ */
+ private static final int EOF_CHAR = -1;
+
+ /*
+ * Return value for readDigits() to signal the caller has
+ * to return an number format problem.
+ */
+ private static final int REPORT_NUMBER_FORMAT_ERROR = -2;
+
+ private static final char BYTE_ORDER_MARK = '\uFEFF';
+ private static final char NUMERIC_SEPARATOR = '_';
+
+ TokenStream(final String sourceString, final int lineno, final int languageVersion) {
+ this.sourceString = sourceString;
+ this.sourceCursor = 0;
+ this.cursor = 0;
+
+ this.lineno = lineno;
+ this.languageVersion = languageVersion;
+ }
+
+ static boolean isKeyword(final String s, final int version, final boolean isStrict) {
+ return Token.EOF != stringToKeyword(s, version, isStrict);
+ }
+
+ private static Token stringToKeyword(final String name, final int version,
+ final boolean isStrict) {
+ if (version < Context.VERSION_ES6) {
+ return stringToKeywordForJS(name);
+ }
+ return stringToKeywordForES(name, isStrict);
+ }
+
+ /** JavaScript 1.8 and earlier */
+ private static Token stringToKeywordForJS(final String name) {
+ switch (name) {
+ case "break":
+ return Token.BREAK;
+ case "case":
+ return Token.CASE;
+ case "continue":
+ return Token.CONTINUE;
+ case "default":
+ return Token.DEFAULT;
+ case "delete":
+ return Token.DELPROP;
+ case "do":
+ return Token.DO;
+ case "else":
+ return Token.ELSE;
+ case "export":
+ return Token.EXPORT;
+ case "false":
+ return Token.FALSE;
+ case "for":
+ return Token.FOR;
+ case "function":
+ return Token.FUNCTION;
+ case "if":
+ return Token.IF;
+ case "in":
+ return Token.IN;
+ case "let":
+ return Token.LET;
+ case "new":
+ return Token.NEW;
+ case "null":
+ return Token.NULL;
+ case "return":
+ return Token.RETURN;
+ case "switch":
+ return Token.SWITCH;
+ case "this":
+ return Token.THIS;
+ case "true":
+ return Token.TRUE;
+ case "typeof":
+ return Token.TYPEOF;
+ case "var":
+ return Token.VAR;
+ case "void":
+ return Token.VOID;
+ case "while":
+ return Token.WHILE;
+ case "with":
+ return Token.WITH;
+ case "yield":
+ return Token.YIELD;
+ case "throw":
+ return Token.THROW;
+ case "catch":
+ return Token.CATCH;
+ case "const":
+ return Token.CONST;
+ case "debugger":
+ return Token.DEBUGGER;
+ case "finally":
+ return Token.FINALLY;
+ case "instanceof":
+ return Token.INSTANCEOF;
+ case "try":
+ return Token.TRY;
+ case "abstract":
+ case "boolean":
+ case "byte":
+ case "char":
+ case "class":
+ case "double":
+ case "enum":
+ case "extends":
+ case "final":
+ case "float":
+ case "goto":
+ case "implements":
+ case "import":
+ case "int":
+ case "interface":
+ case "long":
+ case "native":
+ case "package":
+ case "private":
+ case "protected":
+ case "public":
+ case "short":
+ case "static":
+ case "super":
+ case "synchronized":
+ case "throws":
+ case "transient":
+ case "volatile":
+ return Token.RESERVED;
+ }
+ return Token.EOF;
+ }
+
+ /** ECMAScript 6. */
+ private static Token stringToKeywordForES(final String name, final boolean isStrict) {
+ switch (name) {
+ case "break":
+ return Token.BREAK;
+ case "case":
+ return Token.CASE;
+ case "catch":
+ return Token.CATCH;
+ case "const":
+ return Token.CONST;
+ case "continue":
+ return Token.CONTINUE;
+ case "debugger":
+ return Token.DEBUGGER;
+ case "default":
+ return Token.DEFAULT;
+ case "delete":
+ return Token.DELPROP;
+ case "do":
+ return Token.DO;
+ case "else":
+ return Token.ELSE;
+ case "export":
+ return Token.EXPORT;
+ case "finally":
+ return Token.FINALLY;
+ case "for":
+ return Token.FOR;
+ case "function":
+ return Token.FUNCTION;
+ case "if":
+ return Token.IF;
+ case "import":
+ return Token.IMPORT;
+ case "in":
+ return Token.IN;
+ case "instanceof":
+ return Token.INSTANCEOF;
+ case "new":
+ return Token.NEW;
+ case "return":
+ return Token.RETURN;
+ case "switch":
+ return Token.SWITCH;
+ case "this":
+ return Token.THIS;
+ case "throw":
+ return Token.THROW;
+ case "try":
+ return Token.TRY;
+ case "typeof":
+ return Token.TYPEOF;
+ case "var":
+ return Token.VAR;
+ case "void":
+ return Token.VOID;
+ case "while":
+ return Token.WHILE;
+ case "with":
+ return Token.WITH;
+ case "yield":
+ return Token.YIELD;
+ case "false":
+ return Token.FALSE;
+ case "null":
+ return Token.NULL;
+ case "true":
+ return Token.TRUE;
+ case "let":
+ return Token.LET;
+ case "class":
+ case "extends":
+ case "super":
+ case "await":
+ case "enum":
+ return Token.RESERVED;
+ case "implements":
+ case "interface":
+ case "package":
+ case "private":
+ case "protected":
+ case "public":
+ case "static":
+ if (isStrict) {
+ return Token.RESERVED;
+ }
+ break;
+ }
+ return Token.EOF;
+ }
+
+ @SuppressWarnings("checkstyle:MethodLength")
+ final Token getToken() throws ParsingException {
+ int c;
+
+ for (;;) {
+ // Eat whitespace, possibly sensitive to newlines.
+ for (;;) {
+ c = getChar();
+ if (c == EOF_CHAR) {
+ tokenBeg = cursor - 1;
+ tokenEnd = cursor;
+ return Token.EOF;
+ } else if (c == '\n') {
+ dirtyLine = false;
+ tokenBeg = cursor - 1;
+ tokenEnd = cursor;
+ return Token.EOL;
+ } else if (!isJSSpace(c)) {
+ if (c != '-') {
+ dirtyLine = true;
+ }
+ break;
+ }
+ }
+
+ // Assume the token will be 1 char - fixed up below.
+ tokenBeg = cursor - 1;
+ tokenEnd = cursor;
+
+ // identifier/keyword/instanceof?
+ // watch out for starting with a
+ final boolean identifierStart;
+ boolean isUnicodeEscapeStart = false;
+ if (c == '\\') {
+ c = getChar();
+ if (c == 'u') {
+ identifierStart = true;
+ isUnicodeEscapeStart = true;
+ stringBufferTop = 0;
+ } else {
+ identifierStart = false;
+ ungetChar(c);
+ c = '\\';
+ }
+ } else {
+ identifierStart = Character.isJavaIdentifierStart((char) c);
+ if (identifierStart) {
+ stringBufferTop = 0;
+ addToString(c);
+ }
+ }
+
+ if (identifierStart) {
+ boolean containsEscape = isUnicodeEscapeStart;
+ for (;;) {
+ if (isUnicodeEscapeStart) {
+ // strictly speaking we should probably push-back
+ // all the bad characters if the uXXXX
+ // sequence is malformed. But since there isn't a
+ // correct context(is there?) for a bad Unicode
+ // escape sequence in an identifier, we can report
+ // an error here.
+ int escapeVal = 0;
+ for (int i = 0; i != 4; ++i) {
+ c = getChar();
+ escapeVal = Kit.xDigitToInt(c, escapeVal);
+ // Next check takes care about c < 0 and bad escape
+ if (escapeVal < 0) {
+ break;
+ }
+ }
+ if (escapeVal < 0) {
+ throw new ParsingException("invalid unicode escape");
+ }
+ addToString(escapeVal);
+ isUnicodeEscapeStart = false;
+ } else {
+ c = getChar();
+ if (c == '\\') {
+ c = getChar();
+ if (c == 'u') {
+ isUnicodeEscapeStart = true;
+ containsEscape = true;
+ } else {
+ throw new ParsingException(
+ String.format("illegal character: '%c'", c));
+ }
+ } else {
+ if (c == EOF_CHAR
+ || c == BYTE_ORDER_MARK
+ || !Character.isJavaIdentifierPart((char) c)) {
+ break;
+ }
+ addToString(c);
+ }
+ }
+ }
+ ungetChar(c);
+
+ String str = getStringFromBuffer();
+ if (!containsEscape) {
+ // OPT we shouldn't have to make a string (object!) to
+ // check if it's a keyword.
+
+ // Return the corresponding token if it's a keyword
+ Token result = stringToKeyword(str, languageVersion, STRICT_MODE);
+ if (result != Token.EOF) {
+ if ((result == Token.LET || result == Token.YIELD)
+ && languageVersion < Context.VERSION_1_7) {
+ // LET and YIELD are tokens only in 1.7 and later
+ string = result == Token.LET ? "let" : "yield";
+ result = Token.NAME;
+ }
+ // Save the string in case we need to use in
+ // object literal definitions.
+ this.string = (String) allStrings.intern(str);
+ if (result != Token.RESERVED) {
+ return result;
+ } else if (languageVersion >= Context.VERSION_ES6) {
+ return result;
+ } else if (!IS_RESERVED_KEYWORD_AS_IDENTIFIER) {
+ return result;
+ }
+ }
+ } else if (isKeyword(
+ str,
+ languageVersion,
+ STRICT_MODE)) {
+ // If a string contains unicodes, and converted to a keyword,
+ // we convert the last character back to unicode
+ str = convertLastCharToHex(str);
+ }
+ this.string = (String) allStrings.intern(str);
+ return Token.NAME;
+ }
+
+ // is it a number?
+ if (isDigit(c) || (c == '.' && isDigit(peekChar()))) {
+ stringBufferTop = 0;
+ int base = 10;
+ final boolean es6 = languageVersion >= Context.VERSION_ES6;
+ boolean isOldOctal = false;
+
+ if (c == '0') {
+ c = getChar();
+ if (c == 'x' || c == 'X') {
+ base = 16;
+ c = getChar();
+ } else if (es6 && (c == 'o' || c == 'O')) {
+ base = 8;
+ c = getChar();
+ } else if (es6 && (c == 'b' || c == 'B')) {
+ base = 2;
+ c = getChar();
+ } else if (isDigit(c)) {
+ base = 8;
+ isOldOctal = true;
+ } else {
+ addToString('0');
+ }
+ }
+
+ final int emptyDetector = stringBufferTop;
+ if (base == 10 || base == 16 || (base == 8 && !isOldOctal) || base == 2) {
+ c = readDigits(base, c);
+ if (c == REPORT_NUMBER_FORMAT_ERROR) {
+ throw new ParsingException("number format error");
+ }
+ } else {
+ while (isDigit(c)) {
+ // finally the oldOctal case
+ if (c >= '8') {
+ /*
+ * We permit 08 and 09 as decimal numbers, which
+ * makes our behavior a superset of the ECMA
+ * numeric grammar. We might not always be so
+ * permissive, so we warn about it.
+ */
+ base = 10;
+
+ c = readDigits(base, c);
+ if (c == REPORT_NUMBER_FORMAT_ERROR) {
+ throw new ParsingException("number format error");
+ }
+ break;
+ }
+ addToString(c);
+ c = getChar();
+ }
+ }
+ if (stringBufferTop == emptyDetector && base != 10) {
+ throw new ParsingException("number format error");
+ }
+
+ if (es6 && c == 'n') {
+ c = getChar();
+ } else if (base == 10 && (c == '.' || c == 'e' || c == 'E')) {
+ if (c == '.') {
+ addToString(c);
+ c = getChar();
+ c = readDigits(base, c);
+ if (c == REPORT_NUMBER_FORMAT_ERROR) {
+ throw new ParsingException("number format error");
+ }
+ }
+ if (c == 'e' || c == 'E') {
+ addToString(c);
+ c = getChar();
+ if (c == '+' || c == '-') {
+ addToString(c);
+ c = getChar();
+ }
+ if (!isDigit(c)) {
+ throw new ParsingException("missing exponent");
+ }
+ c = readDigits(base, c);
+ if (c == REPORT_NUMBER_FORMAT_ERROR) {
+ throw new ParsingException("number format error");
+ }
+ }
+ }
+ ungetChar(c);
+ this.string = getStringFromBuffer();
+ return Token.NUMBER;
+ }
+
+ // is it a string or template literal?
+ if (c == '"' || c == '\'' || c == '`') {
+ // We attempt to accumulate a string the fast way, by
+ // building it directly out of the reader. But if there
+ // are any escaped characters in the string, we revert to
+ // building it out of a StringBuffer.
+
+ // delimiter for last string literal scanned
+ final int quoteChar = c;
+ stringBufferTop = 0;
+
+ c = getCharIgnoreLineEnd(false);
+ strLoop:
+ while (c != quoteChar) {
+ boolean unterminated = false;
+ if (c == EOF_CHAR) {
+ unterminated = true;
+ } else if (c == '\n') {
+ switch (lineEndChar) {
+ case '\n':
+ case '\r':
+ unterminated = true;
+ break;
+ case 0x2028: //
+ case 0x2029: //
+ // Line/Paragraph separators need to be included as is
+ c = lineEndChar;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (unterminated) {
+ throw new ParsingException("unterminated string literal");
+ }
+
+ if (c == '\\') {
+ // We've hit an escaped character
+ int escapeVal;
+
+ c = getChar();
+ switch (c) {
+ case 'b':
+ c = '\b';
+ break;
+ case 'f':
+ c = '\f';
+ break;
+ case 'n':
+ c = '\n';
+ break;
+ case 'r':
+ c = '\r';
+ break;
+ case 't':
+ c = '\t';
+ break;
+
+ // \v a late addition to the ECMA spec,
+ // it is not in Java, so use 0xb
+ case 'v':
+ c = 0xb;
+ break;
+
+ case 'u':
+ // Get 4 hex digits; if the u escape is not
+ // followed by 4 hex digits, use 'u' + the
+ // literal character sequence that follows.
+ final int escapeStart = stringBufferTop;
+ addToString('u');
+ escapeVal = 0;
+ for (int i = 0; i != 4; ++i) {
+ c = getChar();
+ escapeVal = Kit.xDigitToInt(c, escapeVal);
+ if (escapeVal < 0) {
+ continue strLoop;
+ }
+ addToString(c);
+ }
+ // prepare for replace of stored 'u' sequence
+ // by escape value
+ stringBufferTop = escapeStart;
+ c = escapeVal;
+ break;
+ case 'x':
+ // Get 2 hex digits, defaulting to 'x'+literal
+ // sequence, as above.
+ c = getChar();
+ escapeVal = Kit.xDigitToInt(c, 0);
+ if (escapeVal < 0) {
+ addToString('x');
+ continue strLoop;
+ }
+ final int c1 = c;
+ c = getChar();
+ escapeVal = Kit.xDigitToInt(c, escapeVal);
+ if (escapeVal < 0) {
+ addToString('x');
+ addToString(c1);
+ continue strLoop;
+ }
+ // got 2 hex digits
+ c = escapeVal;
+ break;
+
+ case '\n':
+ // Remove line terminator after escape to follow
+ // SpiderMonkey and C/C++
+ c = getChar();
+ continue strLoop;
+
+ default:
+ if ('0' <= c && c < '8') {
+ int val = c - '0';
+ c = getChar();
+ if ('0' <= c && c < '8') {
+ val = 8 * val + c - '0';
+ c = getChar();
+ if ('0' <= c && c < '8' && val <= 037) {
+ // c is 3rd char of octal sequence only
+ // if the resulting val <= 0377
+ val = 8 * val + c - '0';
+ c = getChar();
+ }
+ }
+ ungetChar(c);
+ c = val;
+ }
+ }
+ }
+ addToString(c);
+ c = getChar(false);
+ }
+
+ final String str = getStringFromBuffer();
+ this.string = (String) allStrings.intern(str);
+ return quoteChar == '`' ? Token.TEMPLATE_LITERAL : Token.STRING;
+ }
+
+ switch (c) {
+ case ';':
+ return Token.SEMI;
+ case '[':
+ return Token.LB;
+ case ']':
+ return Token.RB;
+ case '{':
+ return Token.LC;
+ case '}':
+ return Token.RC;
+ case '(':
+ return Token.LP;
+ case ')':
+ return Token.RP;
+ case ',':
+ return Token.COMMA;
+ case '?':
+ return Token.HOOK;
+ case ':':
+ return Token.COLON;
+ case '.':
+ return Token.DOT;
+
+ case '|':
+ if (matchChar('|')) {
+ return Token.OR;
+ } else if (matchChar('=')) {
+ return Token.ASSIGN_BITOR;
+ } else {
+ return Token.BITOR;
+ }
+
+ case '^':
+ if (matchChar('=')) {
+ return Token.ASSIGN_BITXOR;
+ }
+ return Token.BITXOR;
+
+ case '&':
+ if (matchChar('&')) {
+ return Token.AND;
+ } else if (matchChar('=')) {
+ return Token.ASSIGN_BITAND;
+ } else {
+ return Token.BITAND;
+ }
+
+ case '=':
+ if (matchChar('=')) {
+ if (matchChar('=')) {
+ return Token.SHEQ;
+ }
+ return Token.EQ;
+ } else if (matchChar('>')) {
+ return Token.ARROW;
+ } else {
+ return Token.ASSIGN;
+ }
+
+ case '!':
+ if (matchChar('=')) {
+ if (matchChar('=')) {
+ return Token.SHNE;
+ }
+ return Token.NE;
+ }
+ return Token.NOT;
+
+ case '<':
+ /* NB:treat HTML begin-comment as comment-till-eol */
+ if (matchChar('!')) {
+ if (matchChar('-')) {
+ if (matchChar('-')) {
+ tokenBeg = cursor - 4;
+ skipLine();
+ return Token.COMMENT;
+ }
+ ungetCharIgnoreLineEnd('-');
+ }
+ ungetCharIgnoreLineEnd('!');
+ }
+ if (matchChar('<')) {
+ if (matchChar('=')) {
+ return Token.ASSIGN_LSH;
+ }
+ return Token.LSH;
+ }
+ if (matchChar('=')) {
+ return Token.LE;
+ }
+ return Token.LT;
+
+ case '>':
+ if (matchChar('>')) {
+ if (matchChar('>')) {
+ if (matchChar('=')) {
+ return Token.ASSIGN_URSH;
+ }
+ return Token.URSH;
+ }
+ if (matchChar('=')) {
+ return Token.ASSIGN_RSH;
+ }
+ return Token.RSH;
+ }
+ if (matchChar('=')) {
+ return Token.GE;
+ }
+ return Token.GT;
+
+ case '*':
+ if (languageVersion >= Context.VERSION_ES6) {
+ if (matchChar('*')) {
+ if (matchChar('=')) {
+ return Token.ASSIGN_EXP;
+ }
+ return Token.EXP;
+ }
+ }
+ if (matchChar('=')) {
+ return Token.ASSIGN_MUL;
+ }
+ return Token.MUL;
+
+ case '/':
+ // is it a // comment?
+ if (matchChar('/')) {
+ tokenBeg = cursor - 2;
+ skipLine();
+ return Token.COMMENT;
+ }
+ // is it a /* or /** comment?
+ if (matchChar('*')) {
+ boolean lookForSlash = false;
+ tokenBeg = cursor - 2;
+ if (matchChar('*')) {
+ lookForSlash = true;
+ }
+ for (;;) {
+ c = getChar();
+ if (c == EOF_CHAR) {
+ tokenEnd = cursor - 1;
+ throw new ParsingException("unterminated comment");
+ } else if (c == '*') {
+ lookForSlash = true;
+ } else if (c == '/') {
+ if (lookForSlash) {
+ tokenEnd = cursor;
+ return Token.COMMENT;
+ }
+ } else {
+ lookForSlash = false;
+ tokenEnd = cursor;
+ }
+ }
+ }
+
+ if (matchChar('=')) {
+ return Token.ASSIGN_DIV;
+ }
+ return Token.DIV;
+
+ case '%':
+ if (matchChar('=')) {
+ return Token.ASSIGN_MOD;
+ }
+ return Token.MOD;
+
+ case '~':
+ return Token.BITNOT;
+
+ case '+':
+ if (matchChar('=')) {
+ return Token.ASSIGN_ADD;
+ } else if (matchChar('+')) {
+ return Token.INC;
+ } else {
+ return Token.ADD;
+ }
+
+ case '-':
+ Token t = Token.SUB;
+ if (matchChar('=')) {
+ t = Token.ASSIGN_SUB;
+ } else if (matchChar('-')) {
+ if (!dirtyLine) {
+ // treat HTML end-comment after possible whitespace
+ // after line start as comment-until-eol
+ if (matchChar('>')) {
+ skipLine();
+ return Token.COMMENT;
+ }
+ }
+ t = Token.DEC;
+ }
+ dirtyLine = true;
+ return t;
+
+ default:
+ throw new ParsingException(String.format("illegal character: '%c'", c));
+ }
+ }
+ }
+
+ /*
+ * Helper to read the next digits according to the base
+ * and ignore the number separator if there is one.
+ */
+ private int readDigits(final int base, final int firstC) {
+ if (isDigit(base, firstC)) {
+ addToString(firstC);
+
+ int c = getChar();
+ if (c == EOF_CHAR) {
+ return EOF_CHAR;
+ }
+
+ while (true) {
+ if (c == NUMERIC_SEPARATOR) {
+ // we do no peek here, we are optimistic for performance
+ // reasons and because peekChar() only does an getChar/ungetChar.
+ c = getChar();
+ // if the line ends after the separator we have
+ // to report this as an error
+ if (c == '\n' || c == EOF_CHAR) {
+ return REPORT_NUMBER_FORMAT_ERROR;
+ }
+
+ if (!isDigit(base, c)) {
+ // bad luck we have to roll back
+ ungetChar(c);
+ return NUMERIC_SEPARATOR;
+ }
+ addToString(NUMERIC_SEPARATOR);
+ } else if (isDigit(base, c)) {
+ addToString(c);
+ c = getChar();
+ if (c == EOF_CHAR) {
+ return EOF_CHAR;
+ }
+ } else {
+ return c;
+ }
+ }
+ }
+ return firstC;
+ }
+
+ private static boolean isAlpha(final int c) {
+ // Use 'Z' < 'a'
+ if (c <= 'Z') {
+ return 'A' <= c;
+ }
+ return 'a' <= c && c <= 'z';
+ }
+
+ private static boolean isDigit(final int base, final int c) {
+ return (base == 10 && isDigit(c))
+ || (base == 16 && isHexDigit(c))
+ || (base == 8 && isOctalDigit(c))
+ || (base == 2 && isDualDigit(c));
+ }
+
+ private static boolean isDualDigit(final int c) {
+ return '0' == c || c == '1';
+ }
+
+ private static boolean isOctalDigit(final int c) {
+ return '0' <= c && c <= '7';
+ }
+
+ private static boolean isDigit(final int c) {
+ return '0' <= c && c <= '9';
+ }
+
+ private static boolean isHexDigit(final int c) {
+ return ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F');
+ }
+
+ /* As defined in ECMA. jsscan.c uses C isspace() (which allows
+ * \v, I think.) note that code in getChar() implicitly accepts
+ * '\r' == \u000D as well.
+ */
+ private static boolean isJSSpace(final int c) {
+ if (c <= 127) {
+ return c == 0x20 || c == 0x9 || c == 0xC || c == 0xB;
+ }
+ return c == 0xA0
+ || c == BYTE_ORDER_MARK
+ || Character.getType((char) c) == Character.SPACE_SEPARATOR;
+ }
+
+ private static boolean isJSFormatChar(final int c) {
+ return c > 127 && Character.getType((char) c) == Character.FORMAT;
+ }
+
+ /** Parser calls the method when it gets / or /= in literal context. */
+ void readRegExp(final Token startToken) throws ParsingException {
+ final int start = tokenBeg;
+ stringBufferTop = 0;
+ if (startToken == Token.ASSIGN_DIV) {
+ // Miss-scanned /=
+ addToString('=');
+ } else {
+ if (startToken != Token.DIV) {
+ Kit.codeBug();
+ }
+ if (peekChar() == '*') {
+ tokenEnd = cursor - 1;
+ this.string = new String(stringBuffer, 0, stringBufferTop);
+ throw new ParsingException("msg.unterminated.re.lit");
+ }
+ }
+
+ boolean inCharSet = false; // true if inside a '['..']' pair
+ int c;
+ while ((c = getChar()) != '/' || inCharSet) {
+ if (c == '\n' || c == EOF_CHAR) {
+ throw new ParsingException("msg.unterminated.re.lit");
+ }
+ if (c == '\\') {
+ addToString(c);
+ c = getChar();
+ if (c == '\n' || c == EOF_CHAR) {
+ throw new ParsingException("msg.unterminated.re.lit");
+ }
+ } else if (c == '[') {
+ inCharSet = true;
+ } else if (c == ']') {
+ inCharSet = false;
+ }
+ addToString(c);
+ }
+ final int reEnd = stringBufferTop;
+
+ while (true) {
+ c = getCharIgnoreLineEnd();
+ if ("gimysu".indexOf(c) != -1) {
+ addToString(c);
+ } else if (isAlpha(c)) {
+ throw new ParsingException("msg.invalid.re.flag");
+ } else {
+ ungetCharIgnoreLineEnd(c);
+ break;
+ }
+ }
+
+ tokenEnd = start + stringBufferTop + 2; // include slashes
+ this.string = new String(stringBuffer, 0, reEnd);
+ }
+
+ private String getStringFromBuffer() {
+ tokenEnd = cursor;
+ return new String(stringBuffer, 0, stringBufferTop);
+ }
+
+ private void addToString(final int c) {
+ final int n = stringBufferTop;
+ if (n == stringBuffer.length) {
+ final char[] tmp = new char[stringBuffer.length * 2];
+ System.arraycopy(stringBuffer, 0, tmp, 0, n);
+ stringBuffer = tmp;
+ }
+ stringBuffer[n] = (char) c;
+ stringBufferTop = n + 1;
+ }
+
+ private void ungetChar(final int c) {
+ // can not unread past across line boundary
+ if (ungetCursor != 0 && ungetBuffer[ungetCursor - 1] == '\n') {
+ Kit.codeBug();
+ }
+ ungetBuffer[ungetCursor++] = c;
+ cursor--;
+ }
+
+ private boolean matchChar(final int test) {
+ final int c = getCharIgnoreLineEnd();
+ if (c == test) {
+ tokenEnd = cursor;
+ return true;
+ }
+ ungetCharIgnoreLineEnd(c);
+ return false;
+ }
+
+ private int peekChar() {
+ final int c = getChar();
+ ungetChar(c);
+ return c;
+ }
+
+ private int getChar() {
+ return getChar(true, false);
+ }
+
+ private int getChar(final boolean skipFormattingChars) {
+ return getChar(skipFormattingChars, false);
+ }
+
+ private int getChar(final boolean skipFormattingChars, final boolean ignoreLineEnd) {
+ if (ungetCursor != 0) {
+ cursor++;
+ return ungetBuffer[--ungetCursor];
+ }
+
+ for (;;) {
+ if (sourceCursor == sourceString.length()) {
+ hitEOF = true;
+ return EOF_CHAR;
+ }
+ cursor++;
+ int c = sourceString.charAt(sourceCursor++);
+
+ if (!ignoreLineEnd && lineEndChar >= 0) {
+ if (lineEndChar == '\r' && c == '\n') {
+ lineEndChar = '\n';
+ continue;
+ }
+ lineEndChar = -1;
+ lineStart = sourceCursor - 1;
+ lineno++;
+ }
+
+ if (c <= 127) {
+ if (c == '\n' || c == '\r') {
+ lineEndChar = c;
+ c = '\n';
+ }
+ } else {
+ if (c == BYTE_ORDER_MARK) {
+ return c; // BOM is considered whitespace
+ }
+ if (skipFormattingChars && isJSFormatChar(c)) {
+ continue;
+ }
+ if (ScriptRuntime.isJSLineTerminator(c)) {
+ lineEndChar = c;
+ c = '\n';
+ }
+ }
+ return c;
+ }
+ }
+
+ private int getCharIgnoreLineEnd() {
+ return getChar(true, true);
+ }
+
+ private int getCharIgnoreLineEnd(final boolean skipFormattingChars) {
+ return getChar(skipFormattingChars, true);
+ }
+
+ private void ungetCharIgnoreLineEnd(final int c) {
+ ungetBuffer[ungetCursor++] = c;
+ cursor--;
+ }
+
+ @SuppressWarnings("checkstyle:emptyblock")
+ private void skipLine() {
+ // skip to end of line
+ int c;
+ while ((c = getChar()) != EOF_CHAR && c != '\n') { }
+ ungetChar(c);
+ tokenEnd = cursor;
+ }
+
+ /** Return the current position of the scanner cursor. */
+ public int getCursor() {
+ return cursor;
+ }
+
+ /** Return the absolute source offset of the last scanned token. */
+ public int getTokenBeg() {
+ return tokenBeg;
+ }
+
+ /** Return the absolute source end-offset of the last scanned token. */
+ public int getTokenEnd() {
+ return tokenEnd;
+ }
+
+ /** Return tokenEnd - tokenBeg */
+ public int getTokenLength() {
+ return tokenEnd - tokenBeg;
+ }
+
+ public String getTokenRaw() {
+ return sourceString.substring(tokenBeg, tokenEnd);
+ }
+
+ private static String convertLastCharToHex(final String str) {
+ final int lastIndex = str.length() - 1;
+ final StringBuilder buf = new StringBuilder(str.substring(0, lastIndex));
+ buf.append("\\u");
+ final String hexCode = Integer.toHexString(str.charAt(lastIndex));
+ for (int i = 0; i < 4 - hexCode.length(); ++i) {
+ buf.append('0');
+ }
+ buf.append(hexCode);
+ return buf.toString();
+ }
+
+ public Token nextToken() throws ParsingException {
+ Token tt = getToken();
+ while (tt == Token.EOL || tt == Token.COMMENT) {
+ tt = getToken();
+ }
+ return tt;
+ }
+
+ // stuff other than whitespace since start of line
+ private boolean dirtyLine;
+ private String string = "";
+
+ private char[] stringBuffer = new char[128];
+ private int stringBufferTop;
+ private final ObjToIntMap allStrings = new ObjToIntMap(50);
+
+ // Room to backtrace from to < on failed match of the last - in