-Removed new method in favor of current method.

-Fixed current method to output all available captions.
-Fixed tests.
This commit is contained in:
John Zhen Mo 2018-02-01 21:48:34 -08:00
parent b3699cdcfc
commit aafe543334
3 changed files with 72 additions and 30 deletions

View File

@ -7,7 +7,6 @@ import com.grack.nanojson.JsonParserException;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.mozilla.javascript.Context; import org.mozilla.javascript.Context;
import org.mozilla.javascript.Function; import org.mozilla.javascript.Function;
import org.mozilla.javascript.ScriptableObject; import org.mozilla.javascript.ScriptableObject;
@ -456,7 +455,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
collector.commit(extractVideoPreviewInfo(doc.select("div[class=\"watch-sidebar-section\"]") collector.commit(extractVideoPreviewInfo(doc.select("div[class=\"watch-sidebar-section\"]")
.first().select("li").first())); .first().select("li").first()));
return ((StreamInfoItem) collector.getItemList().get(0)); return collector.getItemList().get(0);
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get next video", e); throw new ParsingException("Could not get next video", e);
} }
@ -555,7 +554,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
} }
if (availableSubtitles.isEmpty()) { if (availableSubtitles.isEmpty()) {
availableSubtitles.addAll(getAvailableSubtitles(getId())); availableSubtitles.addAll(getAvailableSubtitles());
} }
} }
@ -709,24 +708,57 @@ public class YoutubeStreamExtractor extends StreamExtractor {
return result == null ? "" : result.toString(); return result == null ? "" : result.toString();
} }
private List<Subtitles> getAvailableSubtitles(final String id) throws SubtitlesException { @Nonnull
try { private List<Subtitles> getAvailableSubtitles() throws SubtitlesException {
final String listingUrl = getVideoSubtitlesListingUrl(id); // If the video is age restricted getPlayerConfig will fail
final String pageContent = NewPipe.getDownloader().download(listingUrl); if(isAgeRestricted) return Collections.emptyList();
final Document listing = Jsoup.parse(pageContent, listingUrl);
final Elements tracks = listing.select("track");
List<Subtitles> subtitles = new ArrayList<>(tracks.size() * 2); final JsonObject playerConfig;
for (final Element track : tracks) { try {
final String languageCode = track.attr("lang_code"); playerConfig = getPlayerConfig(getPageHtml(NewPipe.getDownloader()));
subtitles.add(getVideoSubtitlesUrl(id, languageCode, SubtitlesFormat.TTML)); } catch (IOException | ExtractionException e) {
subtitles.add(getVideoSubtitlesUrl(id, languageCode, SubtitlesFormat.VTT)); throw new SubtitlesException("Unable to download player configs", e);
// todo: add transcripts, they are currently omitted since they are incompatible with ExoPlayer
}
return subtitles;
} catch (IOException | ReCaptchaException e) {
throw new SubtitlesException("Unable to download subtitles listing", e);
} }
final String playerResponse = playerConfig.getObject("args").getString("player_response");
final JsonObject captions;
try {
if (!JsonParser.object().from(playerResponse).has("captions")) {
// Captions does not exist
return Collections.emptyList();
}
captions = JsonParser.object().from(playerResponse).getObject("captions");
} catch (JsonParserException e) {
// Failed to parse subtitles
throw new SubtitlesException("Unable to parse subtitles listing", e);
}
final JsonObject renderer = captions.getObject("playerCaptionsTracklistRenderer");
final JsonArray captionsArray = renderer.getArray("captionTracks");
final JsonArray autoCaptionsArray = renderer.getArray("translationLanguages");
final int captionsSize = captionsArray.size();
// Should not happen, if there is the "captions" object, it should always has some captions in it
if(captionsSize == 0) return Collections.emptyList();
// Obtain the base url, this only needs to be done once
final String baseUrl = captionsArray.getObject(0).getString("baseUrl");
Set<String> manualLanguageCodes = new HashSet<>();
for (int i = 0; i < captionsSize; i++) {
manualLanguageCodes.add(captionsArray.getObject(i).getString("languageCode"));
}
Set<String> automaticLanguageCodes = new HashSet<>();
for (int i = 0; i < autoCaptionsArray.size(); i++) {
automaticLanguageCodes.add(autoCaptionsArray.getObject(i).getString("languageCode"));
}
List<Subtitles> result = new ArrayList<>();
result.addAll(getVideoSubtitlesUrl(baseUrl, new ArrayList<>(manualLanguageCodes),
new ArrayList<>(automaticLanguageCodes), SubtitlesFormat.VTT));
result.addAll(getVideoSubtitlesUrl(baseUrl, new ArrayList<>(manualLanguageCodes),
new ArrayList<>(automaticLanguageCodes), SubtitlesFormat.TTML));
// todo: add transcripts, they are currently omitted since they are incompatible with ExoPlayer
return result;
} }
/*////////////////////////////////////////////////////////////////////////// /*//////////////////////////////////////////////////////////////////////////
// Data Class // Data Class
@ -754,16 +786,26 @@ public class YoutubeStreamExtractor extends StreamExtractor {
} }
@Nonnull @Nonnull
private static String getVideoSubtitlesListingUrl(final String id) { private static List<Subtitles> getVideoSubtitlesUrl(final String baseUrl,
return "https://video.google.com/timedtext?type=list&v=" + id; final List<String> manualCaptionLanguageCodes,
} final List<String> automaticCaptionLanguageCodes,
final SubtitlesFormat format) {
final String cleanUrl = baseUrl
.replaceAll("&fmt=[^&]*", "") // Remove preexisting format if exists
.replaceAll("&tlang=[^&]*", "") // Remove translation language
.replaceAll("&kind=[^&]*", ""); // Remove automatic generation toggle
final String builderUrl = cleanUrl + "&fmt=" + format.getExtension() + "&tlang=";
@Nonnull List<Subtitles> subtitles = new ArrayList<>(manualCaptionLanguageCodes.size() +
private static Subtitles getVideoSubtitlesUrl(final String id, final String locale, final SubtitlesFormat format) { automaticCaptionLanguageCodes.size());
final String url = "https://www.youtube.com/api/timedtext?lang=" + locale + for (final String languageCode : manualCaptionLanguageCodes) {
"&fmt=" + format.getExtension() + "&v=" + id; subtitles.add(new Subtitles(format, languageCode, builderUrl + languageCode, false));
// These are all non-generated }
return new Subtitles(format, locale, url, false); for (final String languageCode : automaticCaptionLanguageCodes) {
final String fullUrl = builderUrl + languageCode + "&kind=asr";
subtitles.add(new Subtitles(format, languageCode, fullUrl, true));
}
return subtitles;
} }
private Map<String, ItagItem> getItags(String encodedUrlMapKey, ItagItem.ItagType itagTypeWanted) throws ParsingException { private Map<String, ItagItem> getItags(String encodedUrlMapKey, ItagItem.ItagType itagTypeWanted) throws ParsingException {

View File

@ -153,6 +153,6 @@ public class YoutubeStreamExtractorDefaultTest {
@Test @Test
public void testGetSubtitlesList() throws IOException, ExtractionException { public void testGetSubtitlesList() throws IOException, ExtractionException {
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
assertTrue(extractor.getSubtitlesDefault().isEmpty()); assertTrue(extractor.getSubtitles(SubtitlesFormat.TTML).isEmpty());
} }
} }

View File

@ -128,6 +128,6 @@ public class YoutubeStreamExtractorRestrictedTest {
@Test @Test
public void testGetSubtitlesList() throws IOException, ExtractionException { public void testGetSubtitlesList() throws IOException, ExtractionException {
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
assertTrue(extractor.getSubtitles(SubtitlesFormat.VTT).isEmpty()); assertTrue(extractor.getSubtitles(SubtitlesFormat.TTML).isEmpty());
} }
} }