From 6031695343b9705013e89cf954b6edbab07552e6 Mon Sep 17 00:00:00 2001 From: tonakriz Date: Wed, 22 Nov 2017 18:39:38 +0100 Subject: [PATCH] Addede fetching subtitles for youtube and its tests --- NewPipeExtractor.iml | 25 +------ .../soundcloud/SoundcloudStreamExtractor.java | 6 ++ .../youtube/YoutubeStreamExtractor.java | 73 +++++++++++++++++-- .../extractor/stream/StreamExtractor.java | 3 + .../SoundcloudStreamExtractorDefaultTest.java | 6 ++ .../YoutubeStreamExtractorDefaultTest.java | 7 ++ .../YoutubeStreamExtractorRestrictedTest.java | 6 ++ 7 files changed, 100 insertions(+), 26 deletions(-) diff --git a/NewPipeExtractor.iml b/NewPipeExtractor.iml index fa027b002..cdb9deeb8 100644 --- a/NewPipeExtractor.iml +++ b/NewPipeExtractor.iml @@ -1,30 +1,13 @@ - - - - - - - - - - + + - - - - + + - - - - - \ No newline at end of file diff --git a/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractor.java b/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractor.java index ba2cd2d14..aa6e732e2 100644 --- a/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractor.java +++ b/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractor.java @@ -15,6 +15,7 @@ import org.schabi.newpipe.extractor.utils.Parser; import java.io.IOException; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; public class SoundcloudStreamExtractor extends StreamExtractor { @@ -192,6 +193,11 @@ public class SoundcloudStreamExtractor extends StreamExtractor { return null; } + @Override + public HashMap getSubtitles() throws IOException, ExtractionException, JsonParserException { + return new HashMap<>(); + } + @Override public StreamType getStreamType() { return StreamType.AUDIO_STREAM; diff --git a/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractor.java b/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractor.java index 8dbb9ead3..2a030f593 100644 --- a/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractor.java +++ b/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractor.java @@ -1,7 +1,9 @@ package org.schabi.newpipe.extractor.services.youtube; +import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; +import com.grack.nanojson.JsonParserException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; @@ -20,10 +22,7 @@ import org.schabi.newpipe.extractor.utils.Parser; import org.schabi.newpipe.extractor.utils.Utils; import java.io.IOException; -import java.util.ArrayList; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -421,6 +420,59 @@ public class YoutubeStreamExtractor extends StreamExtractor { return videoOnlyStreams; } + /** + * Example output: + * { + * #language code#: [ + * [0]"captions URL", + * [1]"language Name" + * ], + * "a.en": { // a.#language code# == auto generated + * [0]"https://youtube.com/api/timedtext..." + * [1]"English (Auto-generated)" + * }, + * ".en": { // .#language code# == normal (not auto generated) + * [0]"https://youtube.com/api/timedtext..." + * [1]"English" + * } + * } + * + * Example usage: + * 1) Get list of keys in the Map if there are any + * 2) Get + * + * @return Map(String, StringArray[2]) + * @throws IOException - Thrown when parsing HTML page + * @throws ExtractionException - Thrown when parsing HTML + * @throws JsonParserException - Thrown when parsing JSON from the web page + */ + @Override + public HashMap getSubtitles() throws IOException, ExtractionException, JsonParserException { + HashMap result = new HashMap<>(); + + JsonObject playerConfig = getPlayerConfig(getPageHtml()); + + String playerResponse = playerConfig.getObject("args").getString("player_response"); + + if (!JsonParser.object().from(playerResponse).has("captions")) { + return new HashMap<>(); + } + + JsonObject captions = JsonParser.object().from(playerResponse).getObject("captions"); + JsonArray captionsArray = captions.getObject("playerCaptionsTracklistRenderer").getArray("captionTracks"); + + for (int x = 0; x < captionsArray.size(); x++) { + String captionsUrlAndName[] = new String[2]; + captionsUrlAndName[0] = captionsArray.getObject(x).getString("baseUrl"); + captionsUrlAndName[1] = captionsArray.getObject(x).getObject("name").getString("simpleText"); + String captionsLangCode = captionsArray.getObject(x).getString("vssId"); + + result.put(captionsLangCode, captionsUrlAndName); + } + + return result; + } + @Override public StreamType getStreamType() throws ParsingException { //todo: if implementing livestream support this value should be generated dynamically @@ -498,13 +550,24 @@ public class YoutubeStreamExtractor extends StreamExtractor { private static volatile String decryptionCode = ""; + private static String pageHtml = null; + + private String getPageHtml() throws IOException, ExtractionException{ + if (pageHtml == null) { + Downloader dl = NewPipe.getDownloader(); + pageHtml = dl.download(getCleanUrl()); + } + return pageHtml; + } + @Override public void fetchPage() throws IOException, ExtractionException { Downloader dl = NewPipe.getDownloader(); - String pageContent = dl.download(getCleanUrl()); + String pageContent = getPageHtml(); doc = Jsoup.parse(pageContent, getCleanUrl()); + String playerUrl; // Check if the video is age restricted if (pageContent.contains(". */ +import com.grack.nanojson.JsonParserException; import org.schabi.newpipe.extractor.Extractor; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.UrlIdHandler; @@ -27,6 +28,7 @@ import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; import java.io.IOException; +import java.util.HashMap; import java.util.List; /** @@ -64,6 +66,7 @@ public abstract class StreamExtractor extends Extractor { public abstract List getAudioStreams() throws IOException, ExtractionException; public abstract List getVideoStreams() throws IOException, ExtractionException; public abstract List getVideoOnlyStreams() throws IOException, ExtractionException; + public abstract HashMap getSubtitles() throws IOException, ExtractionException, JsonParserException; public abstract StreamType getStreamType() throws ParsingException; public abstract StreamInfoItem getNextVideo() throws IOException, ExtractionException; diff --git a/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractorDefaultTest.java b/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractorDefaultTest.java index 2d76bdd30..6df8d4303 100644 --- a/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractorDefaultTest.java +++ b/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractorDefaultTest.java @@ -1,5 +1,6 @@ package org.schabi.newpipe.extractor.services.soundcloud; +import com.grack.nanojson.JsonParserException; import org.junit.Before; import org.junit.Test; import org.schabi.newpipe.Downloader; @@ -101,4 +102,9 @@ public class SoundcloudStreamExtractorDefaultTest { assertFalse(relatedVideos.getItemList().isEmpty()); assertTrue(relatedVideos.getErrors().isEmpty()); } + + @Test + public void testGetSubtitles() throws IOException, ExtractionException, JsonParserException { + assertTrue(extractor.getSubtitles() != null); + } } diff --git a/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDefaultTest.java b/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDefaultTest.java index 63dfaed03..d05695f7a 100644 --- a/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDefaultTest.java +++ b/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDefaultTest.java @@ -1,5 +1,6 @@ package org.schabi.newpipe.extractor.services.youtube; +import com.grack.nanojson.JsonParserException; import org.junit.Before; import org.junit.Test; import org.schabi.newpipe.Downloader; @@ -12,6 +13,7 @@ import org.schabi.newpipe.extractor.stream.StreamType; import org.schabi.newpipe.extractor.stream.VideoStream; import java.io.IOException; +import java.util.HashMap; import static org.junit.Assert.*; import static org.schabi.newpipe.extractor.ServiceList.YouTube; @@ -148,4 +150,9 @@ public class YoutubeStreamExtractorDefaultTest { } assertTrue(relatedVideos.getErrors().isEmpty()); } + + @Test + public void testGetSubtitles() throws IOException, ExtractionException, JsonParserException { + assertTrue(extractor.getSubtitles() != null); + } } diff --git a/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorRestrictedTest.java b/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorRestrictedTest.java index fadd13df4..f9bc39995 100644 --- a/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorRestrictedTest.java +++ b/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorRestrictedTest.java @@ -1,5 +1,6 @@ package org.schabi.newpipe.extractor.services.youtube; +import com.grack.nanojson.JsonParserException; import org.junit.Before; import org.junit.Test; import org.schabi.newpipe.Downloader; @@ -103,4 +104,9 @@ public class YoutubeStreamExtractorRestrictedTest { 0 <= s.format && s.format <= 4); } } + + @Test + public void testGetSubtitles() throws IOException, ExtractionException, JsonParserException { + assertTrue(extractor.getSubtitles() != null); + } }