NewPipeExtractor/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java

352 lines
14 KiB
Java
Raw Normal View History

2018-05-08 21:19:03 +02:00
package org.schabi.newpipe.extractor.services.youtube.extractors;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.DISABLE_PRETTY_PRINT_PARAMETER;
2022-03-18 15:09:06 +01:00
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.YOUTUBEI_V1_URL;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.addClientInfoHeaders;
2022-03-26 19:46:10 +01:00
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.extractPlaylistTypeFromPlaylistUrl;
2022-03-18 15:09:06 +01:00
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.fixThumbnailUrl;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getKey;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getUrlFromNavigationEndpoint;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getValidJsonResponseBody;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
import static org.schabi.newpipe.extractor.utils.Utils.EMPTY_STRING;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
2020-02-17 18:58:12 +01:00
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonWriter;
2020-04-15 14:09:46 +02:00
import org.schabi.newpipe.extractor.Page;
2018-09-04 03:37:31 +02:00
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.downloader.Downloader;
import org.schabi.newpipe.extractor.downloader.Response;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
import org.schabi.newpipe.extractor.localization.Localization;
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.playlist.PlaylistExtractor;
import org.schabi.newpipe.extractor.playlist.PlaylistInfo;
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
2018-03-01 01:02:43 +01:00
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
2018-02-24 22:20:50 +01:00
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
import org.schabi.newpipe.extractor.utils.JsonUtils;
import org.schabi.newpipe.extractor.utils.Utils;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
public class YoutubePlaylistExtractor extends PlaylistExtractor {
// Minimum size of the stats array in the browse response which includes the streams count
private static final int STATS_ARRAY_WITH_STREAMS_COUNT_MIN_SIZE = 2;
// Names of some objects in JSON response frequently used in this class
private static final String PLAYLIST_VIDEO_RENDERER = "playlistVideoRenderer";
private static final String PLAYLIST_VIDEO_LIST_RENDERER = "playlistVideoListRenderer";
private static final String VIDEO_OWNER_RENDERER = "videoOwnerRenderer";
private JsonObject browseResponse;
2020-02-17 18:58:12 +01:00
private JsonObject playlistInfo;
public YoutubePlaylistExtractor(final StreamingService service,
final ListLinkHandler linkHandler) {
super(service, linkHandler);
2017-08-06 22:20:15 +02:00
}
2017-08-06 22:20:15 +02:00
@Override
public void onFetchPage(@Nonnull final Downloader downloader) throws IOException,
ExtractionException {
final Localization localization = getExtractorLocalization();
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`: - the desktop API is fetched. If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API. - if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent. Otherwise, the next endpoint will be fetched normally, if the content is available. If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made. We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used. If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used. Other code changes: - `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder` - `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder` - two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder` - `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper` - a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor` - `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>` - the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page - some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(localization,
getExtractorContentCountry())
.value("browseId", "VL" + getId())
.value("params", "wgYCCAA%3D") // Show unavailable videos
.done())
.getBytes(StandardCharsets.UTF_8);
2020-02-26 14:45:50 +01:00
browseResponse = getJsonPostResponse("browse", body, localization);
YoutubeParsingHelper.defaultAlertsCheck(browseResponse);
2020-02-17 18:58:12 +01:00
playlistInfo = getPlaylistInfo();
}
private JsonObject getUploaderInfo() throws ParsingException {
final JsonArray items = browseResponse.getObject("sidebar")
.getObject("playlistSidebarRenderer")
.getArray("items");
2020-04-16 16:08:14 +02:00
JsonObject videoOwner = items.getObject(1)
.getObject("playlistSidebarSecondaryInfoRenderer")
.getObject("videoOwner");
if (videoOwner.has(VIDEO_OWNER_RENDERER)) {
return videoOwner.getObject(VIDEO_OWNER_RENDERER);
2020-04-16 16:08:14 +02:00
}
2020-02-17 18:58:12 +01:00
// we might want to create a loop here instead of using duplicated code
videoOwner = items.getObject(items.size())
.getObject("playlistSidebarSecondaryInfoRenderer")
.getObject("videoOwner");
if (videoOwner.has(VIDEO_OWNER_RENDERER)) {
return videoOwner.getObject(VIDEO_OWNER_RENDERER);
2020-02-17 18:58:12 +01:00
}
throw new ParsingException("Could not get uploader info");
}
private JsonObject getPlaylistInfo() throws ParsingException {
try {
return browseResponse.getObject("sidebar")
.getObject("playlistSidebarRenderer")
.getArray("items")
.getObject(0)
.getObject("playlistSidebarPrimaryInfoRenderer");
} catch (final Exception e) {
2020-02-17 18:58:12 +01:00
throw new ParsingException("Could not get PlaylistInfo", e);
}
}
@Nonnull
@Override
2017-08-11 03:23:09 +02:00
public String getName() throws ParsingException {
2020-07-06 20:23:41 +02:00
final String name = getTextFromObject(playlistInfo.getObject("title"));
if (!isNullOrEmpty(name)) {
return name;
}
2020-04-16 16:08:14 +02:00
return browseResponse.getObject("microformat")
.getObject("microformatDataRenderer")
.getString("title");
}
@Nonnull
@Override
2017-08-08 23:36:11 +02:00
public String getThumbnailUrl() throws ParsingException {
String url = playlistInfo.getObject("thumbnailRenderer")
.getObject("playlistVideoThumbnailRenderer")
.getObject("thumbnail")
.getArray("thumbnails")
.getObject(0)
.getString("url");
2020-02-27 19:08:46 +01:00
if (isNullOrEmpty(url)) {
url = browseResponse.getObject("microformat")
.getObject("microformatDataRenderer")
.getObject("thumbnail")
.getArray("thumbnails")
.getObject(0)
.getString("url");
if (isNullOrEmpty(url)) {
throw new ParsingException("Could not get playlist thumbnail");
}
2020-02-27 19:08:46 +01:00
}
2020-02-29 17:18:50 +01:00
return fixThumbnailUrl(url);
}
@Override
public String getUploaderUrl() throws ParsingException {
try {
2020-02-27 17:39:23 +01:00
return getUrlFromNavigationEndpoint(getUploaderInfo().getObject("navigationEndpoint"));
} catch (final Exception e) {
throw new ParsingException("Could not get playlist uploader url", e);
}
}
@Override
public String getUploaderName() throws ParsingException {
try {
2020-02-27 17:39:23 +01:00
return getTextFromObject(getUploaderInfo().getObject("title"));
} catch (final Exception e) {
throw new ParsingException("Could not get playlist uploader name", e);
}
}
@Override
public String getUploaderAvatarUrl() throws ParsingException {
try {
final String url = getUploaderInfo()
.getObject("thumbnail")
.getArray("thumbnails")
.getObject(0)
.getString("url");
2020-02-27 17:39:23 +01:00
return fixThumbnailUrl(url);
} catch (final Exception e) {
throw new ParsingException("Could not get playlist uploader avatar", e);
}
}
@Override
public boolean isUploaderVerified() throws ParsingException {
// YouTube doesn't provide this information
return false;
}
@Override
2017-08-06 22:20:15 +02:00
public long getStreamCount() throws ParsingException {
try {
final JsonArray stats = playlistInfo.getArray("stats");
// For unknown reasons, YouTube don't provide the stream count for learning playlists
// on the desktop client but only the number of views and the playlist modified date
// On normal playlists, at least 3 items are returned: the number of videos, the number
// of views and the playlist modification date
// We can get it by using another client, however it seems we can't get the avatar
// uploader URL with another client than the WEB client
if (stats.size() > STATS_ARRAY_WITH_STREAMS_COUNT_MIN_SIZE) {
final String videosText = getTextFromObject(playlistInfo.getArray("stats")
.getObject(0));
if (videosText != null) {
return Long.parseLong(Utils.removeNonDigitCharacters(videosText));
}
}
return ITEM_COUNT_UNKNOWN;
} catch (final Exception e) {
2017-08-06 22:20:15 +02:00
throw new ParsingException("Could not get video count from playlist", e);
}
}
@Nonnull
@Override
2020-05-13 17:26:07 +02:00
public String getSubChannelName() {
return EMPTY_STRING;
}
@Nonnull
@Override
2020-05-13 17:26:07 +02:00
public String getSubChannelUrl() {
return EMPTY_STRING;
}
@Nonnull
@Override
2020-05-13 17:26:07 +02:00
public String getSubChannelAvatarUrl() {
return EMPTY_STRING;
}
2017-11-25 02:03:30 +01:00
@Nonnull
@Override
2021-03-04 05:31:13 +01:00
public InfoItemsPage<StreamInfoItem> getInitialPage() throws IOException, ExtractionException {
2020-05-13 17:26:07 +02:00
final StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
2020-04-15 14:09:46 +02:00
Page nextPage = null;
final JsonArray contents = browseResponse.getObject("contents")
.getObject("twoColumnBrowseResultsRenderer")
.getArray("tabs")
.getObject(0)
.getObject("tabRenderer")
.getObject("content")
.getObject("sectionListRenderer")
.getArray("contents");
2020-05-13 17:26:07 +02:00
final JsonObject videoPlaylistObject = contents.stream()
2022-03-16 20:14:08 +01:00
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.map(content -> content.getObject("itemSectionRenderer")
.getArray("contents")
.getObject(0))
.filter(contentItemSectionRendererContents ->
contentItemSectionRendererContents.has(PLAYLIST_VIDEO_LIST_RENDERER)
|| contentItemSectionRendererContents.has(
"playlistSegmentRenderer"))
.findFirst()
.orElse(null);
if (videoPlaylistObject != null && videoPlaylistObject.has(PLAYLIST_VIDEO_LIST_RENDERER)) {
final JsonArray videosArray = videoPlaylistObject
.getObject(PLAYLIST_VIDEO_LIST_RENDERER)
.getArray("contents");
collectStreamsFrom(collector, videosArray);
nextPage = getNextPageFrom(videosArray);
2020-05-13 17:26:07 +02:00
}
2020-04-15 14:09:46 +02:00
return new InfoItemsPage<>(collector, nextPage);
}
@Override
public InfoItemsPage<StreamInfoItem> getPage(final Page page) throws IOException,
ExtractionException {
if (page == null || isNullOrEmpty(page.getUrl())) {
throw new IllegalArgumentException("Page doesn't contain an URL");
}
final StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
final Map<String, List<String>> headers = new HashMap<>();
addClientInfoHeaders(headers);
final Response response = getDownloader().post(page.getUrl(), headers, page.getBody(),
getExtractorLocalization());
final JsonObject ajaxJson = JsonUtils.toJsonObject(getValidJsonResponseBody(response));
2018-03-01 01:02:43 +01:00
final JsonArray continuation = ajaxJson.getArray("onResponseReceivedActions")
.getObject(0)
.getObject("appendContinuationItemsAction")
.getArray("continuationItems");
2018-03-01 01:02:43 +01:00
collectStreamsFrom(collector, continuation);
2018-03-01 01:02:43 +01:00
return new InfoItemsPage<>(collector, getNextPageFrom(continuation));
2018-02-26 15:55:27 +01:00
}
@Nullable
private Page getNextPageFrom(final JsonArray contents)
throws IOException, ExtractionException {
if (isNullOrEmpty(contents)) {
2020-04-15 14:09:46 +02:00
return null;
}
final JsonObject lastElement = contents.getObject(contents.size() - 1);
if (lastElement.has("continuationItemRenderer")) {
final String continuation = lastElement
.getObject("continuationItemRenderer")
.getObject("continuationEndpoint")
.getObject("continuationCommand")
.getString("token");
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`: - the desktop API is fetched. If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API. - if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent. Otherwise, the next endpoint will be fetched normally, if the content is available. If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made. We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used. If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used. Other code changes: - `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder` - `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder` - two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder` - `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper` - a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor` - `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>` - the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page - some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(
getExtractorLocalization(), getExtractorContentCountry())
.value("continuation", continuation)
.done())
.getBytes(StandardCharsets.UTF_8);
return new Page(YOUTUBEI_V1_URL + "browse?key=" + getKey()
+ DISABLE_PRETTY_PRINT_PARAMETER, body);
} else {
return null;
}
}
private void collectStreamsFrom(@Nonnull final StreamInfoItemsCollector collector,
@Nonnull final JsonArray videos) {
final TimeAgoParser timeAgoParser = getTimeAgoParser();
videos.stream()
2022-03-16 20:14:08 +01:00
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.filter(video -> video.has(PLAYLIST_VIDEO_RENDERER))
.map(video -> new YoutubeStreamInfoItemExtractor(
video.getObject(PLAYLIST_VIDEO_RENDERER), timeAgoParser) {
2020-02-23 13:48:54 +01:00
@Override
public long getViewCount() {
return -1;
}
})
.forEachOrdered(collector::commit);
}
@Nonnull
@Override
public PlaylistInfo.PlaylistType getPlaylistType() throws ParsingException {
return extractPlaylistTypeFromPlaylistUrl(getUrl());
}
}