Addede fetching subtitles for youtube and its tests

This commit is contained in:
tonakriz 2017-11-22 18:39:38 +01:00
parent b9d0941411
commit 6031695343
7 changed files with 100 additions and 26 deletions

View File

@ -1,30 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<module external.linked.project.id=":NewPipeExtractor" external.linked.project.path="$MODULE_DIR$" external.root.project.path="$MODULE_DIR$/.." external.system.id="GRADLE" type="JAVA_MODULE" version="4"> <module external.linked.project.id="NewPipeExtractor" external.linked.project.path="$MODULE_DIR$" external.root.project.path="$MODULE_DIR$" external.system.id="GRADLE" external.system.module.group="" external.system.module.version="unspecified" type="JAVA_MODULE" version="4">
<component name="FacetManager"> <component name="NewModuleRootManager" inherit-compiler-output="true">
<facet type="java-gradle" name="Java-Gradle">
<configuration>
<option name="BUILD_FOLDER_PATH" value="$MODULE_DIR$/build" />
<option name="BUILDABLE" value="true" />
</configuration>
</facet>
</component>
<component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_7" inherit-compiler-output="false">
<output url="file://$MODULE_DIR$/build/classes/java/main" />
<output-test url="file://$MODULE_DIR$/build/classes/java/test" />
<exclude-output /> <exclude-output />
<content url="file://$MODULE_DIR$"> <content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
<sourceFolder url="file://$MODULE_DIR$/src/test/resources" type="java-test-resource" />
<excludeFolder url="file://$MODULE_DIR$/.gradle" /> <excludeFolder url="file://$MODULE_DIR$/.gradle" />
<excludeFolder url="file://$MODULE_DIR$/build" />
<excludeFolder url="file://$MODULE_DIR$/out" />
</content> </content>
<orderEntry type="inheritedJdk" /> <orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" /> <orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" exported="" scope="PROVIDED" name="nanojson-1.1" level="project" />
<orderEntry type="library" exported="" scope="PROVIDED" name="jsoup-1.9.2" level="project" />
<orderEntry type="library" exported="" scope="PROVIDED" name="rhino-1.7.7.1" level="project" />
<orderEntry type="library" exported="" scope="TEST" name="junit-4.12" level="project" />
<orderEntry type="library" exported="" scope="TEST" name="hamcrest-core-1.3" level="project" />
</component> </component>
</module> </module>

View File

@ -15,6 +15,7 @@ import org.schabi.newpipe.extractor.utils.Parser;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap;
import java.util.List; import java.util.List;
public class SoundcloudStreamExtractor extends StreamExtractor { public class SoundcloudStreamExtractor extends StreamExtractor {
@ -192,6 +193,11 @@ public class SoundcloudStreamExtractor extends StreamExtractor {
return null; return null;
} }
@Override
public HashMap<String, String[]> getSubtitles() throws IOException, ExtractionException, JsonParserException {
return new HashMap<>();
}
@Override @Override
public StreamType getStreamType() { public StreamType getStreamType() {
return StreamType.AUDIO_STREAM; return StreamType.AUDIO_STREAM;

View File

@ -1,7 +1,9 @@
package org.schabi.newpipe.extractor.services.youtube; package org.schabi.newpipe.extractor.services.youtube;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
@ -20,10 +22,7 @@ import org.schabi.newpipe.extractor.utils.Parser;
import org.schabi.newpipe.extractor.utils.Utils; import org.schabi.newpipe.extractor.utils.Utils;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.*;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
@ -421,6 +420,59 @@ public class YoutubeStreamExtractor extends StreamExtractor {
return videoOnlyStreams; return videoOnlyStreams;
} }
/**
* Example output:
* {
* #language code#: [
* [0]"captions URL",
* [1]"language Name"
* ],
* "a.en": { // a.#language code# == auto generated
* [0]"https://youtube.com/api/timedtext..."
* [1]"English (Auto-generated)"
* },
* ".en": { // .#language code# == normal (not auto generated)
* [0]"https://youtube.com/api/timedtext..."
* [1]"English"
* }
* }
*
* Example usage:
* 1) Get list of keys in the Map if there are any
* 2) Get
*
* @return Map(String, StringArray[2])
* @throws IOException - Thrown when parsing HTML page
* @throws ExtractionException - Thrown when parsing HTML
* @throws JsonParserException - Thrown when parsing JSON from the web page
*/
@Override
public HashMap<String, String[]> getSubtitles() throws IOException, ExtractionException, JsonParserException {
HashMap<String, String[]> result = new HashMap<>();
JsonObject playerConfig = getPlayerConfig(getPageHtml());
String playerResponse = playerConfig.getObject("args").getString("player_response");
if (!JsonParser.object().from(playerResponse).has("captions")) {
return new HashMap<>();
}
JsonObject captions = JsonParser.object().from(playerResponse).getObject("captions");
JsonArray captionsArray = captions.getObject("playerCaptionsTracklistRenderer").getArray("captionTracks");
for (int x = 0; x < captionsArray.size(); x++) {
String captionsUrlAndName[] = new String[2];
captionsUrlAndName[0] = captionsArray.getObject(x).getString("baseUrl");
captionsUrlAndName[1] = captionsArray.getObject(x).getObject("name").getString("simpleText");
String captionsLangCode = captionsArray.getObject(x).getString("vssId");
result.put(captionsLangCode, captionsUrlAndName);
}
return result;
}
@Override @Override
public StreamType getStreamType() throws ParsingException { public StreamType getStreamType() throws ParsingException {
//todo: if implementing livestream support this value should be generated dynamically //todo: if implementing livestream support this value should be generated dynamically
@ -498,13 +550,24 @@ public class YoutubeStreamExtractor extends StreamExtractor {
private static volatile String decryptionCode = ""; private static volatile String decryptionCode = "";
private static String pageHtml = null;
private String getPageHtml() throws IOException, ExtractionException{
if (pageHtml == null) {
Downloader dl = NewPipe.getDownloader();
pageHtml = dl.download(getCleanUrl());
}
return pageHtml;
}
@Override @Override
public void fetchPage() throws IOException, ExtractionException { public void fetchPage() throws IOException, ExtractionException {
Downloader dl = NewPipe.getDownloader(); Downloader dl = NewPipe.getDownloader();
String pageContent = dl.download(getCleanUrl()); String pageContent = getPageHtml();
doc = Jsoup.parse(pageContent, getCleanUrl()); doc = Jsoup.parse(pageContent, getCleanUrl());
String playerUrl; String playerUrl;
// Check if the video is age restricted // Check if the video is age restricted
if (pageContent.contains("<meta property=\"og:restrictions:age")) { if (pageContent.contains("<meta property=\"og:restrictions:age")) {

View File

@ -20,6 +20,7 @@ package org.schabi.newpipe.extractor.stream;
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>. * along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/ */
import com.grack.nanojson.JsonParserException;
import org.schabi.newpipe.extractor.Extractor; import org.schabi.newpipe.extractor.Extractor;
import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.UrlIdHandler; import org.schabi.newpipe.extractor.UrlIdHandler;
@ -27,6 +28,7 @@ import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import java.io.IOException; import java.io.IOException;
import java.util.HashMap;
import java.util.List; import java.util.List;
/** /**
@ -64,6 +66,7 @@ public abstract class StreamExtractor extends Extractor {
public abstract List<AudioStream> getAudioStreams() throws IOException, ExtractionException; public abstract List<AudioStream> getAudioStreams() throws IOException, ExtractionException;
public abstract List<VideoStream> getVideoStreams() throws IOException, ExtractionException; public abstract List<VideoStream> getVideoStreams() throws IOException, ExtractionException;
public abstract List<VideoStream> getVideoOnlyStreams() throws IOException, ExtractionException; public abstract List<VideoStream> getVideoOnlyStreams() throws IOException, ExtractionException;
public abstract HashMap<String, String[]> getSubtitles() throws IOException, ExtractionException, JsonParserException;
public abstract StreamType getStreamType() throws ParsingException; public abstract StreamType getStreamType() throws ParsingException;
public abstract StreamInfoItem getNextVideo() throws IOException, ExtractionException; public abstract StreamInfoItem getNextVideo() throws IOException, ExtractionException;

View File

@ -1,5 +1,6 @@
package org.schabi.newpipe.extractor.services.soundcloud; package org.schabi.newpipe.extractor.services.soundcloud;
import com.grack.nanojson.JsonParserException;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
import org.schabi.newpipe.Downloader; import org.schabi.newpipe.Downloader;
@ -101,4 +102,9 @@ public class SoundcloudStreamExtractorDefaultTest {
assertFalse(relatedVideos.getItemList().isEmpty()); assertFalse(relatedVideos.getItemList().isEmpty());
assertTrue(relatedVideos.getErrors().isEmpty()); assertTrue(relatedVideos.getErrors().isEmpty());
} }
@Test
public void testGetSubtitles() throws IOException, ExtractionException, JsonParserException {
assertTrue(extractor.getSubtitles() != null);
}
} }

View File

@ -1,5 +1,6 @@
package org.schabi.newpipe.extractor.services.youtube; package org.schabi.newpipe.extractor.services.youtube;
import com.grack.nanojson.JsonParserException;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
import org.schabi.newpipe.Downloader; import org.schabi.newpipe.Downloader;
@ -12,6 +13,7 @@ import org.schabi.newpipe.extractor.stream.StreamType;
import org.schabi.newpipe.extractor.stream.VideoStream; import org.schabi.newpipe.extractor.stream.VideoStream;
import java.io.IOException; import java.io.IOException;
import java.util.HashMap;
import static org.junit.Assert.*; import static org.junit.Assert.*;
import static org.schabi.newpipe.extractor.ServiceList.YouTube; import static org.schabi.newpipe.extractor.ServiceList.YouTube;
@ -148,4 +150,9 @@ public class YoutubeStreamExtractorDefaultTest {
} }
assertTrue(relatedVideos.getErrors().isEmpty()); assertTrue(relatedVideos.getErrors().isEmpty());
} }
@Test
public void testGetSubtitles() throws IOException, ExtractionException, JsonParserException {
assertTrue(extractor.getSubtitles() != null);
}
} }

View File

@ -1,5 +1,6 @@
package org.schabi.newpipe.extractor.services.youtube; package org.schabi.newpipe.extractor.services.youtube;
import com.grack.nanojson.JsonParserException;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
import org.schabi.newpipe.Downloader; import org.schabi.newpipe.Downloader;
@ -103,4 +104,9 @@ public class YoutubeStreamExtractorRestrictedTest {
0 <= s.format && s.format <= 4); 0 <= s.format && s.format <= 4);
} }
} }
@Test
public void testGetSubtitles() throws IOException, ExtractionException, JsonParserException {
assertTrue(extractor.getSubtitles() != null);
}
} }