From abcee871677d8e961780dbef7bcdd71b1828c5bc Mon Sep 17 00:00:00 2001 From: AudricV <74829229+AudricV@users.noreply.github.com> Date: Sat, 24 Sep 2022 21:28:09 +0200 Subject: [PATCH 1/2] [YouTube] Fix throttling parameter decryption function regex - Quote the function name, as it may contain special regex symbols, such as dollar; - Support multiple lines; - Use what looks like the end of the function for the end of the regex (this part is inspired from yt-dlp throttling parameter decryption regex); - Move the throttling function body regex into a private and static constant. --- .../youtube/YoutubeThrottlingDecrypter.java | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecrypter.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecrypter.java index f812e7007..f87a07c9a 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecrypter.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecrypter.java @@ -40,6 +40,12 @@ public final class YoutubeThrottlingDecrypter { private static final Pattern DECRYPT_FUNCTION_NAME_PATTERN = Pattern.compile( "\\.get\\(\"n\"\\)\\)&&\\(b=([a-zA-Z0-9$]+)(?:\\[(\\d+)])?\\([a-zA-Z0-9]\\)"); + // Escape the curly end brace to allow compatibility with Android's regex engine + // See https://stackoverflow.com/q/45074813 + @SuppressWarnings("RegExpRedundantEscape") + private static final String DECRYPT_FUNCTION_BODY_REGEX = + "=\\s*function([\\S\\s]*?\\}\\s*return [\\w$]+?\\.join\\(\"\"\\)\\s*\\};)"; + private static final Map N_PARAMS_CACHE = new HashMap<>(); private static String decryptFunction; private static String decryptFunctionName; @@ -128,11 +134,9 @@ public final class YoutubeThrottlingDecrypter { @Nonnull private static String parseWithRegex(final String playerJsCode, final String functionName) throws Parser.RegexException { - // Escape the curly end brace to allow compatibility with Android's regex engine - // See https://stackoverflow.com/q/45074813 - //noinspection RegExpRedundantEscape - final Pattern functionPattern = Pattern.compile(functionName + "=function(.*?\\};)\n", - Pattern.DOTALL); + // Quote the function name, as it may contain special regex characters such as dollar + final Pattern functionPattern = Pattern.compile( + Pattern.quote(functionName) + DECRYPT_FUNCTION_BODY_REGEX, Pattern.DOTALL); return validateFunction("function " + functionName + Parser.matchGroup1(functionPattern, playerJsCode)); From 8067c43837b61e52878caa030c249deed45c48f4 Mon Sep 17 00:00:00 2001 From: AudricV <74829229+AudricV@users.noreply.github.com> Date: Sat, 24 Sep 2022 21:49:22 +0200 Subject: [PATCH 2/2] [YouTube] Don't use a specific letter for the decryption function name pattern Use the same possible characters for variables everywhere, in order to avoid potential future throttling parameter decryption function name parsing issues related to the usage of other letter(s) than b. --- .../services/youtube/YoutubeThrottlingDecrypter.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecrypter.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecrypter.java index f87a07c9a..1f135bde7 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecrypter.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecrypter.java @@ -38,7 +38,9 @@ public final class YoutubeThrottlingDecrypter { private static final Pattern N_PARAM_PATTERN = Pattern.compile("[&?]n=([^&]+)"); private static final Pattern DECRYPT_FUNCTION_NAME_PATTERN = Pattern.compile( - "\\.get\\(\"n\"\\)\\)&&\\(b=([a-zA-Z0-9$]+)(?:\\[(\\d+)])?\\([a-zA-Z0-9]\\)"); + // CHECKSTYLE:OFF + "\\.get\\(\"n\"\\)\\)&&\\([a-zA-Z0-9$_]=([a-zA-Z0-9$_]+)(?:\\[(\\d+)])?\\([a-zA-Z0-9$_]\\)"); + // CHECKSTYLE:ON // Escape the curly end brace to allow compatibility with Android's regex engine // See https://stackoverflow.com/q/45074813