From efce384d9bbf66737c89bb9a4c2cff82c280aabe Mon Sep 17 00:00:00 2001 From: TobiGr Date: Mon, 5 Dec 2022 18:03:34 +0100 Subject: [PATCH] [YouTube] Add support for extracting auto-translated captions Closes TeamNewPipe/NewPipeExtractor#977 Based on and adresses TeamNewPipe/NewPipe#8023 --- .../extractors/PeertubeStreamExtractor.java | 1 + .../extractors/YoutubeStreamExtractor.java | 28 ++++++++++--- .../extractor/stream/SubtitlesStream.java | 42 ++++++++++++++++++- 3 files changed, 63 insertions(+), 8 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeStreamExtractor.java index 186063771..df40da208 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeStreamExtractor.java @@ -416,6 +416,7 @@ public class PeertubeStreamExtractor extends StreamExtractor { .setMediaFormat(fmt) .setLanguageCode(languageCode) .setAutoGenerated(false) + .setAutoTranslated(false) .build()); } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 150acf87b..18ee3bdaa 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -688,7 +688,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Override @Nonnull - public List getSubtitles(final MediaFormat format) throws ParsingException { + public List getSubtitles(@Nonnull final MediaFormat format) { assertPageFetched(); // We cannot store the subtitles list because the media format may change @@ -696,13 +696,12 @@ public class YoutubeStreamExtractor extends StreamExtractor { final JsonObject renderer = playerResponse.getObject("captions") .getObject("playerCaptionsTracklistRenderer"); final JsonArray captionsArray = renderer.getArray("captionTracks"); - // TODO: use this to apply auto translation to different language from a source language - // final JsonArray autoCaptionsArray = renderer.getArray("translationLanguages"); for (int i = 0; i < captionsArray.size(); i++) { - final String languageCode = captionsArray.getObject(i).getString("languageCode"); - final String baseUrl = captionsArray.getObject(i).getString("baseUrl"); - final String vssId = captionsArray.getObject(i).getString("vssId"); + final JsonObject caption = captionsArray.getObject(i); + final String languageCode = caption.getString("languageCode"); + final String baseUrl = caption.getString("baseUrl"); + final String vssId = caption.getString("vssId"); if (languageCode != null && baseUrl != null && vssId != null) { final boolean isAutoGenerated = vssId.startsWith("a."); @@ -717,7 +716,24 @@ public class YoutubeStreamExtractor extends StreamExtractor { .setMediaFormat(format) .setLanguageCode(languageCode) .setAutoGenerated(isAutoGenerated) + .setAutoTranslated(false) .build()); + if (i == 0 && caption.getBoolean("isTranslatable") + && renderer.has("translationLanguages")) { + final JsonArray languages = renderer.getArray("translationLanguages"); + for (int j = 0; j < languages.size(); j++) { + final JsonObject lang = languages.getObject(j); + final String tLanguageCode = lang.getString("languageCode"); + subtitlesToReturn.add(new SubtitlesStream.Builder() + .setContent(cleanUrl + "&fmt=" + format.getSuffix() + + "&tlang=" + tLanguageCode, true) + .setMediaFormat(format) + .setLanguageCode(tLanguageCode) + .setAutoGenerated(isAutoGenerated) + .setAutoTranslated(true) + .build()); + } + } } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/SubtitlesStream.java b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/SubtitlesStream.java index 778a85c93..7943fc6fb 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/SubtitlesStream.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/SubtitlesStream.java @@ -12,6 +12,7 @@ public final class SubtitlesStream extends Stream { private final MediaFormat format; private final Locale locale; private final boolean autoGenerated; + private final boolean autoTranslated; private final String code; /** @@ -30,6 +31,7 @@ public final class SubtitlesStream extends Stream { private String languageCode; // Use of the Boolean class instead of the primitive type needed for setter call check private Boolean autoGenerated; + private Boolean autoTranslated; /** * Create a new {@link Builder} instance with default values. @@ -150,6 +152,18 @@ public final class SubtitlesStream extends Stream { return this; } + /** + * Set whether the subtitles have been automatically translated + * (i.e. by a machine like Google Translator) by the streaming service. + * @param autoTranslated whether the subtitles have been automatically translated by the + * streaming service + * @return this {@link Builder} instance + */ + public Builder setAutoTranslated(final boolean autoTranslated) { + this.autoTranslated = autoTranslated; + return this; + } + /** * Build a {@link SubtitlesStream} using the builder's current values. * @@ -194,13 +208,19 @@ public final class SubtitlesStream extends Stream { + "with setIsAutoGenerated."); } + if (autoTranslated == null) { + throw new IllegalStateException("The subtitles stream has been not set as an " + + "automatically translated subtitles stream or not. " + + "Please specify this information with setIsAutoTranslated."); + } + if (id == null) { id = languageCode + (mediaFormat != null ? "." + mediaFormat.suffix : ""); } return new SubtitlesStream(id, content, isUrl, mediaFormat, deliveryMethod, - languageCode, autoGenerated, manifestUrl); + languageCode, autoGenerated, autoTranslated, manifestUrl); } } @@ -217,6 +237,7 @@ public final class SubtitlesStream extends Stream { * @param deliveryMethod the {@link DeliveryMethod} of the stream * @param languageCode the language code of the stream * @param autoGenerated whether the subtitles are auto-generated by the streaming service + * @param autoTranslated whether the subtitles are auto-translated by the streaming service * @param manifestUrl the URL of the manifest this stream comes from (if applicable, * otherwise null) */ @@ -228,6 +249,7 @@ public final class SubtitlesStream extends Stream { @Nonnull final DeliveryMethod deliveryMethod, @Nonnull final String languageCode, final boolean autoGenerated, + final boolean autoTranslated, @Nullable final String manifestUrl) { super(id, content, isUrl, mediaFormat, deliveryMethod, manifestUrl); @@ -253,6 +275,7 @@ public final class SubtitlesStream extends Stream { this.code = languageCode; this.format = mediaFormat; this.autoGenerated = autoGenerated; + this.autoTranslated = autoTranslated; } /** @@ -265,7 +288,7 @@ public final class SubtitlesStream extends Stream { } /** - * Return whether if the subtitles are auto-generated. + * Return whether the subtitles are auto-generated. *

* Some streaming services can generate subtitles for their contents, like YouTube. *

@@ -276,6 +299,21 @@ public final class SubtitlesStream extends Stream { return autoGenerated; } + /** + * Whether the subtitles are translated automatically by a machine. + * + *

+ * Some streaming services provide automatically translated subtitles. + * YouTube, for example, uses Google translator to generate translated subtitles. + * Automatically translated subtitles might not coincide completely with the original text. + *

+ * + * @return {code true} if the subtitles are auto-translated, {@link false} otherwise + */ + public boolean isAutoTranslated() { + return autoTranslated; + } + /** * {@inheritDoc} */