NewPipeExtractor/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeMixPlaylistExtractor...

266 lines
11 KiB
Java
Raw Normal View History

package org.schabi.newpipe.extractor.services.youtube.extractors;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.DISABLE_PRETTY_PRINT_PARAMETER;
2022-03-18 15:09:06 +01:00
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.YOUTUBEI_V1_URL;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.addClientInfoHeaders;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.extractCookieValue;
2022-03-26 19:46:10 +01:00
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.extractPlaylistTypeFromPlaylistId;
2022-03-18 15:09:06 +01:00
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getKey;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getValidJsonResponseBody;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
import static org.schabi.newpipe.extractor.utils.Utils.EMPTY_STRING;
import static org.schabi.newpipe.extractor.utils.Utils.getQueryValue;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
import static org.schabi.newpipe.extractor.utils.Utils.stringToURL;
import com.grack.nanojson.JsonArray;
2021-06-24 18:39:16 +02:00
import com.grack.nanojson.JsonBuilder;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonWriter;
2022-03-18 15:09:06 +01:00
import org.schabi.newpipe.extractor.ListExtractor;
import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.downloader.Downloader;
import org.schabi.newpipe.extractor.downloader.Response;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
import org.schabi.newpipe.extractor.localization.Localization;
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.playlist.PlaylistExtractor;
import org.schabi.newpipe.extractor.playlist.PlaylistInfo;
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
import org.schabi.newpipe.extractor.utils.JsonUtils;
import java.io.IOException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
2022-03-18 15:09:06 +01:00
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
2020-02-02 18:15:47 +01:00
/**
* A {@link YoutubePlaylistExtractor} for a mix (auto-generated playlist).
* It handles URLs in the format of
* {@code youtube.com/watch?v=videoId&list=playlistId}
2020-02-02 18:15:47 +01:00
*/
public class YoutubeMixPlaylistExtractor extends PlaylistExtractor {
/**
* YouTube identifies mixes based on this cookie. With this information it can generate
* continuations without duplicates.
*/
public static final String COOKIE_NAME = "VISITOR_INFO1_LIVE";
private JsonObject initialData;
private JsonObject playlistData;
private String cookieValue;
public YoutubeMixPlaylistExtractor(final StreamingService service,
final ListLinkHandler linkHandler) {
super(service, linkHandler);
}
@Override
public void onFetchPage(@Nonnull final Downloader downloader)
throws IOException, ExtractionException {
final Localization localization = getExtractorLocalization();
final URL url = stringToURL(getUrl());
final String mixPlaylistId = getId();
final String videoId = getQueryValue(url, "v");
final String playlistIndexString = getQueryValue(url, "index");
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`: - the desktop API is fetched. If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API. - if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent. Otherwise, the next endpoint will be fetched normally, if the content is available. If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made. We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used. If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used. Other code changes: - `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder` - `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder` - two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder` - `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper` - a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor` - `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>` - the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page - some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
final JsonBuilder<JsonObject> jsonBody = prepareDesktopJsonBuilder(localization,
2021-06-24 18:39:16 +02:00
getExtractorContentCountry()).value("playlistId", mixPlaylistId);
if (videoId != null) {
2021-06-24 18:39:16 +02:00
jsonBody.value("videoId", videoId);
}
2021-06-24 18:39:16 +02:00
if (playlistIndexString != null) {
jsonBody.value("playlistIndex", Integer.parseInt(playlistIndexString));
}
final byte[] body = JsonWriter.string(jsonBody.done()).getBytes(StandardCharsets.UTF_8);
final Map<String, List<String>> headers = new HashMap<>();
addClientInfoHeaders(headers);
final Response response = getDownloader().post(YOUTUBEI_V1_URL + "next?key=" + getKey()
+ DISABLE_PRETTY_PRINT_PARAMETER, headers, body, localization);
initialData = JsonUtils.toJsonObject(getValidJsonResponseBody(response));
playlistData = initialData.getObject("contents").getObject("twoColumnWatchNextResults")
.getObject("playlist").getObject("playlist");
2022-03-18 15:09:06 +01:00
if (isNullOrEmpty(playlistData)) {
throw new ExtractionException("Could not get playlistData");
}
cookieValue = extractCookieValue(COOKIE_NAME, response);
}
@Nonnull
@Override
public String getName() throws ParsingException {
final String name = YoutubeParsingHelper.getTextAtKey(playlistData, "title");
if (isNullOrEmpty(name)) {
throw new ParsingException("Could not get playlist name");
}
return name;
}
@Nonnull
@Override
public String getThumbnailUrl() throws ParsingException {
try {
return getThumbnailUrlFromPlaylistId(playlistData.getString("playlistId"));
} catch (final Exception e) {
try {
// Fallback to thumbnail of current video. Always the case for channel mix
return getThumbnailUrlFromVideoId(initialData.getObject("currentVideoEndpoint")
.getObject("watchEndpoint").getString("videoId"));
} catch (final Exception ignored) {
}
throw new ParsingException("Could not get playlist thumbnail", e);
}
}
@Override
public String getUploaderUrl() {
// YouTube mixes are auto-generated by YouTube
return EMPTY_STRING;
}
@Override
public String getUploaderName() {
// YouTube mixes are auto-generated by YouTube
return "YouTube";
}
@Override
public String getUploaderAvatarUrl() {
// YouTube mixes are auto-generated by YouTube
return EMPTY_STRING;
}
@Override
public boolean isUploaderVerified() throws ParsingException {
return false;
}
@Override
public long getStreamCount() {
// Auto-generated playlists always start with 25 videos and are endless
return ListExtractor.ITEM_COUNT_INFINITE;
}
@Nonnull
@Override
public InfoItemsPage<StreamInfoItem> getInitialPage()
throws IOException, ExtractionException {
final StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
collectStreamsFrom(collector, playlistData.getArray("contents"));
2021-04-07 12:25:59 +02:00
final Map<String, String> cookies = new HashMap<>();
cookies.put(COOKIE_NAME, cookieValue);
return new InfoItemsPage<>(collector, getNextPageFrom(playlistData, cookies));
}
@Nonnull
private Page getNextPageFrom(@Nonnull final JsonObject playlistJson,
final Map<String, String> cookies)
throws IOException, ExtractionException {
final JsonObject lastStream = ((JsonObject) playlistJson.getArray("contents")
.get(playlistJson.getArray("contents").size() - 1));
if (lastStream == null || lastStream.getObject("playlistPanelVideoRenderer") == null) {
throw new ExtractionException("Could not extract next page url");
}
final JsonObject watchEndpoint = lastStream.getObject("playlistPanelVideoRenderer")
.getObject("navigationEndpoint").getObject("watchEndpoint");
final String playlistId = watchEndpoint.getString("playlistId");
final String videoId = watchEndpoint.getString("videoId");
final int index = watchEndpoint.getInt("index");
final String params = watchEndpoint.getString("params");
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`: - the desktop API is fetched. If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API. - if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent. Otherwise, the next endpoint will be fetched normally, if the content is available. If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made. We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used. If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used. Other code changes: - `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder` - `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder` - two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder` - `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper` - a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor` - `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>` - the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page - some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(getExtractorLocalization(),
getExtractorContentCountry())
.value("videoId", videoId)
.value("playlistId", playlistId)
.value("playlistIndex", index)
.value("params", params)
.done())
.getBytes(StandardCharsets.UTF_8);
return new Page(YOUTUBEI_V1_URL + "next?key=" + getKey(), null, null, cookies, body);
}
@Override
public InfoItemsPage<StreamInfoItem> getPage(final Page page) throws IOException,
ExtractionException {
if (page == null || isNullOrEmpty(page.getUrl())) {
throw new IllegalArgumentException("Page doesn't contain an URL");
}
if (!page.getCookies().containsKey(COOKIE_NAME)) {
throw new IllegalArgumentException("Cookie '" + COOKIE_NAME + "' is missing");
}
final StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
final Map<String, List<String>> headers = new HashMap<>();
addClientInfoHeaders(headers);
final Response response = getDownloader().post(page.getUrl(), headers, page.getBody(),
getExtractorLocalization());
final JsonObject ajaxJson = JsonUtils.toJsonObject(getValidJsonResponseBody(response));
final JsonObject playlistJson = ajaxJson.getObject("contents")
.getObject("twoColumnWatchNextResults").getObject("playlist").getObject("playlist");
final JsonArray allStreams = playlistJson.getArray("contents");
// Sublist because YouTube returns up to 24 previous streams in the mix
// +1 because the stream of "currentIndex" was already extracted in previous request
final List<Object> newStreams =
allStreams.subList(playlistJson.getInt("currentIndex") + 1, allStreams.size());
collectStreamsFrom(collector, newStreams);
return new InfoItemsPage<>(collector, getNextPageFrom(playlistJson, page.getCookies()));
}
private void collectStreamsFrom(@Nonnull final StreamInfoItemsCollector collector,
@Nullable final List<Object> streams) {
if (streams == null) {
return;
}
final TimeAgoParser timeAgoParser = getTimeAgoParser();
streams.stream()
.filter(JsonObject.class::isInstance)
2022-03-16 20:14:08 +01:00
.map(JsonObject.class::cast)
.map(stream -> stream.getObject("playlistPanelVideoRenderer"))
.filter(Objects::nonNull)
.map(streamInfo -> new YoutubeStreamInfoItemExtractor(streamInfo, timeAgoParser))
.forEachOrdered(collector::commit);
}
@Nonnull
private String getThumbnailUrlFromPlaylistId(@Nonnull final String playlistId)
throws ParsingException {
return getThumbnailUrlFromVideoId(YoutubeParsingHelper.extractVideoIdFromMixId(playlistId));
}
@Nonnull
private String getThumbnailUrlFromVideoId(final String videoId) {
return "https://i.ytimg.com/vi/" + videoId + "/hqdefault.jpg";
}
@Nonnull
@Override
public PlaylistInfo.PlaylistType getPlaylistType() throws ParsingException {
return extractPlaylistTypeFromPlaylistId(playlistData.getString("playlistId"));
}
}