From a02ee2e952a4d002491c07de4296d0abd3401f72 Mon Sep 17 00:00:00 2001 From: XiangRongLin <41164160+XiangRongLin@users.noreply.github.com> Date: Sat, 17 Jul 2021 19:10:09 +0200 Subject: [PATCH] Rewrite youtube throttling solution and add tests --- .../youtube/YoutubeJavascriptExtractor.java | 101 +++++++++++++++++ .../youtube/YoutubeThrottlingDecoder.java | 104 ------------------ .../youtube/YoutubeThrottlingDecrypter.java | 88 +++++++++++++++ .../extractors/YoutubeStreamExtractor.java | 55 ++------- .../YoutubeJavascriptExtractorTest.java | 47 ++++++++ .../YoutubeThrottlingDecrypterTest.java | 40 +++++++ 6 files changed, 287 insertions(+), 148 deletions(-) create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeJavascriptExtractor.java delete mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecoder.java create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecrypter.java create mode 100644 extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeJavascriptExtractorTest.java create mode 100644 extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecrypterTest.java diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeJavascriptExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeJavascriptExtractor.java new file mode 100644 index 000000000..06f2c44c2 --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeJavascriptExtractor.java @@ -0,0 +1,101 @@ +package org.schabi.newpipe.extractor.services.youtube; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.localization.Localization; +import org.schabi.newpipe.extractor.utils.Parser; + +import javax.annotation.Nonnull; + +/** + * Youtube restricts streaming their media in multiple ways by requiring clients to apply a cipher function + * on parameters of requests. + * The cipher function is sent alongside as a JavaScript function. + *
+ * This class handling fetching the JavaScript file in order to allow other classes to extract the needed functions. + */ +public class YoutubeJavascriptExtractor { + + private static final String HTTPS = "https:"; + private static String cachedJavascriptCode; + + /** + * Extracts the JavaScript file. The result is cached, so subsequent calls use the result of previous calls. + * + * @param videoId Does not influence the result, but a valid video id can prevent tracking + * @return The whole javascript file as a string. + * @throws ParsingException If the extraction failed. + */ + @Nonnull + public static String extractJavascriptCode(String videoId) throws ParsingException { + if (cachedJavascriptCode == null) { + final YoutubeJavascriptExtractor extractor = new YoutubeJavascriptExtractor(); + String playerJsUrl = extractor.cleanJavascriptUrl(extractor.extractJavascriptUrl(videoId)); + cachedJavascriptCode = extractor.downloadJavascriptCode(playerJsUrl); + } + + return cachedJavascriptCode; + } + + /** + * Same as {@link YoutubeJavascriptExtractor#extractJavascriptCode(String)} but with a constant value for videoId. + * Possible because the videoId has no influence on the result. + * + * For tracking avoidance purposes it may make sense to pass in valid video ids. + */ + @Nonnull + public static String extractJavascriptCode() throws ParsingException { + return extractJavascriptCode("d4IGg5dqeO8"); + } + + private String extractJavascriptUrl(String videoId) throws ParsingException { + try { + final String embedUrl = "https://www.youtube.com/embed/" + videoId; + final String embedPageContent = NewPipe.getDownloader() + .get(embedUrl, Localization.DEFAULT).responseBody(); + + try { + final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")"; + return Parser.matchGroup1(assetsPattern, embedPageContent) + .replace("\\", "").replace("\"", ""); + } catch (final Parser.RegexException ex) { + // playerJsUrl is still available in the file, just somewhere else TODO + // it is ok not to find it, see how that's handled in getDeobfuscationCode() + final Document doc = Jsoup.parse(embedPageContent); + final Elements elems = doc.select("script").attr("name", "player_ias/base"); + for (final Element elem : elems) { + if (elem.attr("src").contains("base.js")) { + return elem.attr("src"); + } + } + } + + } catch (final Exception i) { + throw new ParsingException("Embedded info did not provide YouTube player js url"); + } + throw new ParsingException("Embedded info did not provide YouTube player js url"); + } + + private String cleanJavascriptUrl(String playerJsUrl) { + if (playerJsUrl.startsWith("//")) { + return HTTPS + playerJsUrl; + } else if (playerJsUrl.startsWith("/")) { + // sometimes https://www.youtube.com part has to be added manually + return HTTPS + "//www.youtube.com" + playerJsUrl; + } else { + return playerJsUrl; + } + } + + private String downloadJavascriptCode(String playerJsUrl) throws ParsingException { + try { + return NewPipe.getDownloader().get(playerJsUrl, Localization.DEFAULT).responseBody(); + } catch (Exception e) { + throw new ParsingException("Could not get player js code from url: " + playerJsUrl); + } + } +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecoder.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecoder.java deleted file mode 100644 index 05b871ce3..000000000 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecoder.java +++ /dev/null @@ -1,104 +0,0 @@ -package org.schabi.newpipe.extractor.services.youtube; - -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; -import org.schabi.newpipe.extractor.NewPipe; -import org.schabi.newpipe.extractor.exceptions.ParsingException; -import org.schabi.newpipe.extractor.localization.Localization; -import org.schabi.newpipe.extractor.utils.Javascript; -import org.schabi.newpipe.extractor.utils.Parser; - -import java.util.regex.Pattern; - -public class YoutubeThrottlingDecoder { - - private static final String HTTPS = "https:"; - private static final String N_PARAM_REGEX = "[&?]n=([^&]+)"; - private static String playerJsCode; - - private final String functionName; - private final String function; - - public YoutubeThrottlingDecoder(String videoId, Localization localization) throws ParsingException { - if (playerJsCode == null) { - String playerJsUrl = cleanPlayerJsUrl(extractPlayerJsUrl(videoId, localization)); - playerJsCode = downloadPlayerJsCode(localization, playerJsUrl); - } - - functionName = parseDecodeFunctionName(playerJsCode); - function = parseDecodeFunction(playerJsCode, functionName); - } - - private String extractPlayerJsUrl(String videoId, Localization localization) throws ParsingException { - try { - final String embedUrl = "https://www.youtube.com/embed/" + videoId; - final String embedPageContent = NewPipe.getDownloader() - .get(embedUrl, localization).responseBody(); - - try { - final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")"; - return Parser.matchGroup1(assetsPattern, embedPageContent) - .replace("\\", "").replace("\"", ""); - } catch (final Parser.RegexException ex) { - // playerJsUrl is still available in the file, just somewhere else TODO - // it is ok not to find it, see how that's handled in getDeobfuscationCode() - final Document doc = Jsoup.parse(embedPageContent); - final Elements elems = doc.select("script").attr("name", "player_ias/base"); - for (final Element elem : elems) { - if (elem.attr("src").contains("base.js")) { - return elem.attr("src"); - } - } - } - - } catch (final Exception i) { - throw new ParsingException("Embedded info did not provide YouTube player js url"); - } - throw new ParsingException("Embedded info did not provide YouTube player js url"); - } - - private String cleanPlayerJsUrl(String playerJsUrl) { - if (playerJsUrl.startsWith("//")) { - return HTTPS + playerJsUrl; - } else if (playerJsUrl.startsWith("/")) { - // sometimes https://www.youtube.com part has to be added manually - return HTTPS + "//www.youtube.com" + playerJsUrl; - } else { - return playerJsUrl; - } - } - - private String downloadPlayerJsCode(Localization localization, String playerJsUrl) throws ParsingException { - try { - return NewPipe.getDownloader().get(playerJsUrl, localization).responseBody(); - } catch (Exception e) { - throw new ParsingException("Could not get player js code from url: " + playerJsUrl); - } - } - - private String parseDecodeFunctionName(String playerJsCode) throws Parser.RegexException { - Pattern pattern = Pattern.compile("b=a\\.get\\(\"n\"\\)\\)&&\\(b=(\\w+)\\(b\\),a\\.set\\(\"n\",b\\)"); - return Parser.matchGroup1(pattern, playerJsCode); - } - - private String parseDecodeFunction(String playerJsCode, String functionName) throws Parser.RegexException { - Pattern functionPattern = Pattern.compile(functionName + "=function(.*?;)\n", Pattern.DOTALL); - return "function " + functionName + Parser.matchGroup1(functionPattern, playerJsCode); - } - - public String parseNParam(String url) throws Parser.RegexException { - Pattern nValuePattern = Pattern.compile(N_PARAM_REGEX); - return Parser.matchGroup1(nValuePattern, url); - } - - public String decodeNParam(String nParam) { - Javascript javascript = new Javascript(); - return javascript.run(function, functionName, nParam); - } - - public String replaceNParam(String url, String oldValue, String newValue) { - return url.replace(oldValue, newValue); - } -} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecrypter.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecrypter.java new file mode 100644 index 000000000..d8295113d --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecrypter.java @@ -0,0 +1,88 @@ +package org.schabi.newpipe.extractor.services.youtube; + +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.utils.Javascript; +import org.schabi.newpipe.extractor.utils.Parser; + +import java.util.regex.Pattern; + +/** + *
+ * YouTube's media is protected with a cipher, which modifies the "n" query parameter of it's video playback urls. + * This class handles extracting that "n" query parameter, applying the cipher on it and returning the resulting url + * which is not throttled. + *
+ * + *+ * https://r5---sn-4g5ednsz.googlevideo.com/videoplayback?n=VVF2xyZLVRZZxHXZ&other=other + *
+ * becomes + *+ * https://r5---sn-4g5ednsz.googlevideo.com/videoplayback?n=iHywZkMipkszqA&other=other + *
+ */ +public class YoutubeThrottlingDecrypter { + + private static final String N_PARAM_REGEX = "[&?]n=([^&]+)"; + + private final String functionName; + private final String function; + + /** + *+ * Use this if you care about the off chance that YouTube tracks with which videoId the cipher is requested. + *
+ * Otherwise use the no-arg constructor which uses a constant value. + */ + public YoutubeThrottlingDecrypter(String videoId) throws ParsingException { + final String playerJsCode = YoutubeJavascriptExtractor.extractJavascriptCode(videoId); + + functionName = parseDecodeFunctionName(playerJsCode); + function = parseDecodeFunction(playerJsCode, functionName); + } + + public YoutubeThrottlingDecrypter() throws ParsingException { + final String playerJsCode = YoutubeJavascriptExtractor.extractJavascriptCode(); + + functionName = parseDecodeFunctionName(playerJsCode); + function = parseDecodeFunction(playerJsCode, functionName); + } + + private String parseDecodeFunctionName(String playerJsCode) throws Parser.RegexException { + Pattern pattern = Pattern.compile("b=a\\.get\\(\"n\"\\)\\)&&\\(b=(\\w+)\\(b\\),a\\.set\\(\"n\",b\\)"); + return Parser.matchGroup1(pattern, playerJsCode); + } + + private String parseDecodeFunction(String playerJsCode, String functionName) throws Parser.RegexException { + Pattern functionPattern = Pattern.compile(functionName + "=function(.*?;)\n", Pattern.DOTALL); + return "function " + functionName + Parser.matchGroup1(functionPattern, playerJsCode); + } + + public String apply(String url) throws Parser.RegexException { + if (containsNParam(url)) { + String oldNParam = parseNParam(url); + String newNParam = decryptNParam(oldNParam); + return replaceNParam(url, oldNParam, newNParam); + } else { + return url; + } + } + + private boolean containsNParam(String url) { + return Parser.isMatch(N_PARAM_REGEX, url); + } + + private String parseNParam(String url) throws Parser.RegexException { + Pattern nValuePattern = Pattern.compile(N_PARAM_REGEX); + return Parser.matchGroup1(nValuePattern, url); + } + + private String decryptNParam(String nParam) { + Javascript javascript = new Javascript(); + return javascript.run(function, functionName, nParam); + } + + private String replaceNParam(String url, String oldValue, String newValue) { + return url.replace(oldValue, newValue); + } +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 25a37cec7..bfb765278 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -4,7 +4,6 @@ import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParserException; - import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; @@ -18,15 +17,7 @@ import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.downloader.Downloader; import org.schabi.newpipe.extractor.downloader.Response; -import org.schabi.newpipe.extractor.exceptions.AgeRestrictedContentException; -import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException; -import org.schabi.newpipe.extractor.exceptions.ExtractionException; -import org.schabi.newpipe.extractor.exceptions.GeographicRestrictionException; -import org.schabi.newpipe.extractor.exceptions.PaidContentException; -import org.schabi.newpipe.extractor.exceptions.ParsingException; -import org.schabi.newpipe.extractor.exceptions.PrivateContentException; -import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; -import org.schabi.newpipe.extractor.exceptions.YoutubeMusicPremiumContentException; +import org.schabi.newpipe.extractor.exceptions.*; import org.schabi.newpipe.extractor.linkhandler.LinkHandler; import org.schabi.newpipe.extractor.localization.DateWrapper; import org.schabi.newpipe.extractor.localization.Localization; @@ -34,44 +25,23 @@ import org.schabi.newpipe.extractor.localization.TimeAgoParser; import org.schabi.newpipe.extractor.localization.TimeAgoPatternsManager; import org.schabi.newpipe.extractor.services.youtube.ItagItem; import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper; -import org.schabi.newpipe.extractor.services.youtube.YoutubeThrottlingDecoder; +import org.schabi.newpipe.extractor.services.youtube.YoutubeThrottlingDecrypter; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory; -import org.schabi.newpipe.extractor.stream.AudioStream; -import org.schabi.newpipe.extractor.stream.Description; -import org.schabi.newpipe.extractor.stream.Frameset; -import org.schabi.newpipe.extractor.stream.Stream; -import org.schabi.newpipe.extractor.stream.StreamExtractor; -import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor; -import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; -import org.schabi.newpipe.extractor.stream.StreamSegment; -import org.schabi.newpipe.extractor.stream.StreamType; -import org.schabi.newpipe.extractor.stream.SubtitlesStream; -import org.schabi.newpipe.extractor.stream.VideoStream; +import org.schabi.newpipe.extractor.stream.*; import org.schabi.newpipe.extractor.utils.JsonUtils; import org.schabi.newpipe.extractor.utils.Parser; import org.schabi.newpipe.extractor.utils.Utils; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.time.LocalDate; import java.time.OffsetDateTime; import java.time.format.DateTimeFormatter; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Objects; +import java.util.*; -import javax.annotation.Nonnull; -import javax.annotation.Nullable; - -import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.fixThumbnailUrl; -import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonResponse; -import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject; -import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getUrlFromNavigationEndpoint; +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.*; import static org.schabi.newpipe.extractor.utils.Utils.EMPTY_STRING; import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; @@ -553,18 +523,15 @@ public class YoutubeStreamExtractor extends StreamExtractor { public List