From aafe543334f20e9ad97f9559ea597f2c36aac70c Mon Sep 17 00:00:00 2001 From: John Zhen Mo Date: Thu, 1 Feb 2018 21:48:34 -0800 Subject: [PATCH] -Removed new method in favor of current method. -Fixed current method to output all available captions. -Fixed tests. --- .../youtube/YoutubeStreamExtractor.java | 98 +++++++++++++------ .../YoutubeStreamExtractorDefaultTest.java | 2 +- .../YoutubeStreamExtractorRestrictedTest.java | 2 +- 3 files changed, 72 insertions(+), 30 deletions(-) diff --git a/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractor.java b/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractor.java index 4ad78fd8f..14b49e155 100644 --- a/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractor.java +++ b/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractor.java @@ -7,7 +7,6 @@ import com.grack.nanojson.JsonParserException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; import org.mozilla.javascript.Context; import org.mozilla.javascript.Function; import org.mozilla.javascript.ScriptableObject; @@ -456,7 +455,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { collector.commit(extractVideoPreviewInfo(doc.select("div[class=\"watch-sidebar-section\"]") .first().select("li").first())); - return ((StreamInfoItem) collector.getItemList().get(0)); + return collector.getItemList().get(0); } catch (Exception e) { throw new ParsingException("Could not get next video", e); } @@ -555,7 +554,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { } if (availableSubtitles.isEmpty()) { - availableSubtitles.addAll(getAvailableSubtitles(getId())); + availableSubtitles.addAll(getAvailableSubtitles()); } } @@ -709,24 +708,57 @@ public class YoutubeStreamExtractor extends StreamExtractor { return result == null ? "" : result.toString(); } - private List getAvailableSubtitles(final String id) throws SubtitlesException { - try { - final String listingUrl = getVideoSubtitlesListingUrl(id); - final String pageContent = NewPipe.getDownloader().download(listingUrl); - final Document listing = Jsoup.parse(pageContent, listingUrl); - final Elements tracks = listing.select("track"); + @Nonnull + private List getAvailableSubtitles() throws SubtitlesException { + // If the video is age restricted getPlayerConfig will fail + if(isAgeRestricted) return Collections.emptyList(); - List subtitles = new ArrayList<>(tracks.size() * 2); - for (final Element track : tracks) { - final String languageCode = track.attr("lang_code"); - subtitles.add(getVideoSubtitlesUrl(id, languageCode, SubtitlesFormat.TTML)); - subtitles.add(getVideoSubtitlesUrl(id, languageCode, SubtitlesFormat.VTT)); - // todo: add transcripts, they are currently omitted since they are incompatible with ExoPlayer - } - return subtitles; - } catch (IOException | ReCaptchaException e) { - throw new SubtitlesException("Unable to download subtitles listing", e); + final JsonObject playerConfig; + try { + playerConfig = getPlayerConfig(getPageHtml(NewPipe.getDownloader())); + } catch (IOException | ExtractionException e) { + throw new SubtitlesException("Unable to download player configs", e); } + final String playerResponse = playerConfig.getObject("args").getString("player_response"); + + final JsonObject captions; + try { + if (!JsonParser.object().from(playerResponse).has("captions")) { + // Captions does not exist + return Collections.emptyList(); + } + captions = JsonParser.object().from(playerResponse).getObject("captions"); + } catch (JsonParserException e) { + // Failed to parse subtitles + throw new SubtitlesException("Unable to parse subtitles listing", e); + } + + final JsonObject renderer = captions.getObject("playerCaptionsTracklistRenderer"); + final JsonArray captionsArray = renderer.getArray("captionTracks"); + final JsonArray autoCaptionsArray = renderer.getArray("translationLanguages"); + + final int captionsSize = captionsArray.size(); + // Should not happen, if there is the "captions" object, it should always has some captions in it + if(captionsSize == 0) return Collections.emptyList(); + // Obtain the base url, this only needs to be done once + final String baseUrl = captionsArray.getObject(0).getString("baseUrl"); + + Set manualLanguageCodes = new HashSet<>(); + for (int i = 0; i < captionsSize; i++) { + manualLanguageCodes.add(captionsArray.getObject(i).getString("languageCode")); + } + Set automaticLanguageCodes = new HashSet<>(); + for (int i = 0; i < autoCaptionsArray.size(); i++) { + automaticLanguageCodes.add(autoCaptionsArray.getObject(i).getString("languageCode")); + } + + List result = new ArrayList<>(); + result.addAll(getVideoSubtitlesUrl(baseUrl, new ArrayList<>(manualLanguageCodes), + new ArrayList<>(automaticLanguageCodes), SubtitlesFormat.VTT)); + result.addAll(getVideoSubtitlesUrl(baseUrl, new ArrayList<>(manualLanguageCodes), + new ArrayList<>(automaticLanguageCodes), SubtitlesFormat.TTML)); + // todo: add transcripts, they are currently omitted since they are incompatible with ExoPlayer + return result; } /*////////////////////////////////////////////////////////////////////////// // Data Class @@ -754,16 +786,26 @@ public class YoutubeStreamExtractor extends StreamExtractor { } @Nonnull - private static String getVideoSubtitlesListingUrl(final String id) { - return "https://video.google.com/timedtext?type=list&v=" + id; - } + private static List getVideoSubtitlesUrl(final String baseUrl, + final List manualCaptionLanguageCodes, + final List automaticCaptionLanguageCodes, + final SubtitlesFormat format) { + final String cleanUrl = baseUrl + .replaceAll("&fmt=[^&]*", "") // Remove preexisting format if exists + .replaceAll("&tlang=[^&]*", "") // Remove translation language + .replaceAll("&kind=[^&]*", ""); // Remove automatic generation toggle + final String builderUrl = cleanUrl + "&fmt=" + format.getExtension() + "&tlang="; - @Nonnull - private static Subtitles getVideoSubtitlesUrl(final String id, final String locale, final SubtitlesFormat format) { - final String url = "https://www.youtube.com/api/timedtext?lang=" + locale + - "&fmt=" + format.getExtension() + "&v=" + id; - // These are all non-generated - return new Subtitles(format, locale, url, false); + List subtitles = new ArrayList<>(manualCaptionLanguageCodes.size() + + automaticCaptionLanguageCodes.size()); + for (final String languageCode : manualCaptionLanguageCodes) { + subtitles.add(new Subtitles(format, languageCode, builderUrl + languageCode, false)); + } + for (final String languageCode : automaticCaptionLanguageCodes) { + final String fullUrl = builderUrl + languageCode + "&kind=asr"; + subtitles.add(new Subtitles(format, languageCode, fullUrl, true)); + } + return subtitles; } private Map getItags(String encodedUrlMapKey, ItagItem.ItagType itagTypeWanted) throws ParsingException { diff --git a/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDefaultTest.java b/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDefaultTest.java index e0bf2f76c..2b44eb835 100644 --- a/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDefaultTest.java +++ b/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDefaultTest.java @@ -153,6 +153,6 @@ public class YoutubeStreamExtractorDefaultTest { @Test public void testGetSubtitlesList() throws IOException, ExtractionException { // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null - assertTrue(extractor.getSubtitlesDefault().isEmpty()); + assertTrue(extractor.getSubtitles(SubtitlesFormat.TTML).isEmpty()); } } diff --git a/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorRestrictedTest.java b/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorRestrictedTest.java index 0589a9cd6..77fff6a64 100644 --- a/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorRestrictedTest.java +++ b/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorRestrictedTest.java @@ -128,6 +128,6 @@ public class YoutubeStreamExtractorRestrictedTest { @Test public void testGetSubtitlesList() throws IOException, ExtractionException { // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null - assertTrue(extractor.getSubtitles(SubtitlesFormat.VTT).isEmpty()); + assertTrue(extractor.getSubtitles(SubtitlesFormat.TTML).isEmpty()); } }