NewPipeExtractor/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java

418 lines
16 KiB
Java
Raw Normal View History

2018-05-08 21:19:03 +02:00
package org.schabi.newpipe.extractor.services.youtube.extractors;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.DISABLE_PRETTY_PRINT_PARAMETER;
2022-03-18 15:09:06 +01:00
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.YOUTUBEI_V1_URL;
2022-03-26 19:46:10 +01:00
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.extractPlaylistTypeFromPlaylistUrl;
2022-03-18 15:09:06 +01:00
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.fixThumbnailUrl;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getKey;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getUrlFromNavigationEndpoint;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
2020-02-17 18:58:12 +01:00
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonWriter;
2022-11-12 05:01:05 +01:00
2020-04-15 14:09:46 +02:00
import org.schabi.newpipe.extractor.Page;
2018-09-04 03:37:31 +02:00
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.downloader.Downloader;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
import org.schabi.newpipe.extractor.localization.Localization;
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.playlist.PlaylistExtractor;
import org.schabi.newpipe.extractor.playlist.PlaylistInfo;
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.stream.Description;
2018-03-01 01:02:43 +01:00
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
2018-02-24 22:20:50 +01:00
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
import org.schabi.newpipe.extractor.utils.Utils;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
2022-11-12 05:01:05 +01:00
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
public class YoutubePlaylistExtractor extends PlaylistExtractor {
// Names of some objects in JSON response frequently used in this class
private static final String PLAYLIST_VIDEO_RENDERER = "playlistVideoRenderer";
private static final String PLAYLIST_VIDEO_LIST_RENDERER = "playlistVideoListRenderer";
private static final String SIDEBAR = "sidebar";
private static final String VIDEO_OWNER_RENDERER = "videoOwnerRenderer";
private JsonObject browseResponse;
2020-02-17 18:58:12 +01:00
private JsonObject playlistInfo;
private JsonObject uploaderInfo;
private JsonObject playlistHeader;
private boolean isNewPlaylistInterface;
public YoutubePlaylistExtractor(final StreamingService service,
final ListLinkHandler linkHandler) {
super(service, linkHandler);
2017-08-06 22:20:15 +02:00
}
2017-08-06 22:20:15 +02:00
@Override
public void onFetchPage(@Nonnull final Downloader downloader) throws IOException,
ExtractionException {
final Localization localization = getExtractorLocalization();
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`: - the desktop API is fetched. If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API. - if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent. Otherwise, the next endpoint will be fetched normally, if the content is available. If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made. We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used. If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used. Other code changes: - `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder` - `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder` - two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder` - `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper` - a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor` - `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>` - the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page - some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(localization,
getExtractorContentCountry())
.value("browseId", "VL" + getId())
.value("params", "wgYCCAA%3D") // Show unavailable videos
.done())
.getBytes(StandardCharsets.UTF_8);
2020-02-26 14:45:50 +01:00
browseResponse = getJsonPostResponse("browse", body, localization);
YoutubeParsingHelper.defaultAlertsCheck(browseResponse);
isNewPlaylistInterface = checkIfResponseIsNewPlaylistInterface();
}
/**
* Whether the playlist response is using only the new playlist design.
*
* <p>
* This new response changes how metadata is returned, and does not provide author thumbnails.
* </p>
*
* <p>
* The new response can be detected by checking whether a header JSON object is returned in the
* browse response (the old returns instead a sidebar one).
* </p>
*
* <p>
* This new playlist UI is currently A/B tested.
* </p>
*
* @return Whether the playlist response is using only the new playlist design
*/
private boolean checkIfResponseIsNewPlaylistInterface() {
// The "old" playlist UI can be also returned with the new one
return browseResponse.has("header") && !browseResponse.has(SIDEBAR);
2020-02-17 18:58:12 +01:00
}
@Nonnull
2020-02-17 18:58:12 +01:00
private JsonObject getUploaderInfo() throws ParsingException {
if (uploaderInfo == null) {
uploaderInfo = browseResponse.getObject(SIDEBAR)
.getObject("playlistSidebarRenderer")
.getArray("items")
.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.filter(item -> item.getObject("playlistSidebarSecondaryInfoRenderer")
.getObject("videoOwner")
.has(VIDEO_OWNER_RENDERER))
.map(item -> item.getObject("playlistSidebarSecondaryInfoRenderer")
.getObject("videoOwner")
.getObject(VIDEO_OWNER_RENDERER))
.findFirst()
.orElseThrow(() -> new ParsingException("Could not get uploader info"));
2020-04-16 16:08:14 +02:00
}
2020-02-17 18:58:12 +01:00
return uploaderInfo;
2020-02-17 18:58:12 +01:00
}
@Nonnull
2020-02-17 18:58:12 +01:00
private JsonObject getPlaylistInfo() throws ParsingException {
if (playlistInfo == null) {
playlistInfo = browseResponse.getObject(SIDEBAR)
.getObject("playlistSidebarRenderer")
.getArray("items")
.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.filter(item -> item.has("playlistSidebarPrimaryInfoRenderer"))
.map(item -> item.getObject("playlistSidebarPrimaryInfoRenderer"))
.findFirst()
.orElseThrow(() -> new ParsingException("Could not get playlist info"));
2020-02-17 18:58:12 +01:00
}
return playlistInfo;
}
@Nonnull
private JsonObject getPlaylistHeader() {
if (playlistHeader == null) {
playlistHeader = browseResponse.getObject("header")
.getObject("playlistHeaderRenderer");
}
return playlistHeader;
2020-02-17 18:58:12 +01:00
}
@Nonnull
@Override
2017-08-11 03:23:09 +02:00
public String getName() throws ParsingException {
final String name = getTextFromObject(getPlaylistInfo().getObject("title"));
if (!isNullOrEmpty(name)) {
return name;
}
2020-04-16 16:08:14 +02:00
return browseResponse.getObject("microformat")
.getObject("microformatDataRenderer")
.getString("title");
}
@Nonnull
@Override
2017-08-08 23:36:11 +02:00
public String getThumbnailUrl() throws ParsingException {
String url;
if (isNewPlaylistInterface) {
url = getPlaylistHeader().getObject("playlistHeaderBanner")
.getObject("heroPlaylistThumbnailRenderer")
.getObject("thumbnail")
.getArray("thumbnails")
.getObject(0)
.getString("url");
} else {
url = getPlaylistInfo().getObject("thumbnailRenderer")
.getObject("playlistVideoThumbnailRenderer")
.getObject("thumbnail")
.getArray("thumbnails")
.getObject(0)
.getString("url");
}
2020-02-27 19:08:46 +01:00
// This data structure is returned in both layouts
if (isNullOrEmpty(url)) {
url = browseResponse.getObject("microformat")
.getObject("microformatDataRenderer")
.getObject("thumbnail")
.getArray("thumbnails")
.getObject(0)
.getString("url");
if (isNullOrEmpty(url)) {
throw new ParsingException("Could not get playlist thumbnail");
}
2020-02-27 19:08:46 +01:00
}
2020-02-29 17:18:50 +01:00
return fixThumbnailUrl(url);
}
@Override
public String getUploaderUrl() throws ParsingException {
try {
return getUrlFromNavigationEndpoint(isNewPlaylistInterface
? getPlaylistHeader().getObject("ownerText")
.getArray("runs")
.getObject(0)
.getObject("navigationEndpoint")
: getUploaderInfo().getObject("navigationEndpoint"));
} catch (final Exception e) {
throw new ParsingException("Could not get playlist uploader url", e);
}
}
@Override
public String getUploaderName() throws ParsingException {
try {
return getTextFromObject(isNewPlaylistInterface
? getPlaylistHeader().getObject("ownerText")
: getUploaderInfo().getObject("title"));
} catch (final Exception e) {
throw new ParsingException("Could not get playlist uploader name", e);
}
}
@Override
public String getUploaderAvatarUrl() throws ParsingException {
if (isNewPlaylistInterface) {
// The new playlist interface doesn't provide an uploader avatar
return "";
}
try {
final String url = getUploaderInfo()
.getObject("thumbnail")
.getArray("thumbnails")
.getObject(0)
.getString("url");
2020-02-27 17:39:23 +01:00
return fixThumbnailUrl(url);
} catch (final Exception e) {
throw new ParsingException("Could not get playlist uploader avatar", e);
}
}
@Override
public boolean isUploaderVerified() throws ParsingException {
// YouTube doesn't provide this information
return false;
}
@Override
2017-08-06 22:20:15 +02:00
public long getStreamCount() throws ParsingException {
if (isNewPlaylistInterface) {
final String numVideosText =
getTextFromObject(getPlaylistHeader().getObject("numVideosText"));
if (numVideosText != null) {
try {
return Long.parseLong(Utils.removeNonDigitCharacters(numVideosText));
} catch (final NumberFormatException ignored) {
}
}
final String firstByLineRendererText = getTextFromObject(
getPlaylistHeader().getArray("byline")
.getObject(0)
.getObject("text"));
if (firstByLineRendererText != null) {
try {
return Long.parseLong(Utils.removeNonDigitCharacters(firstByLineRendererText));
} catch (final NumberFormatException ignored) {
}
}
}
// These data structures are returned in both layouts
final JsonArray briefStats =
(isNewPlaylistInterface ? getPlaylistHeader() : getPlaylistInfo())
.getArray("briefStats");
if (!briefStats.isEmpty()) {
final String briefsStatsText = getTextFromObject(briefStats.getObject(0));
if (briefsStatsText != null) {
return Long.parseLong(Utils.removeNonDigitCharacters(briefsStatsText));
}
}
final JsonArray stats = (isNewPlaylistInterface ? getPlaylistHeader() : getPlaylistInfo())
.getArray("stats");
if (!stats.isEmpty()) {
final String statsText = getTextFromObject(stats.getObject(0));
if (statsText != null) {
return Long.parseLong(Utils.removeNonDigitCharacters(statsText));
}
}
return ITEM_COUNT_UNKNOWN;
}
@Nonnull
2023-05-11 06:00:22 +02:00
@Override
public Description getDescription() throws ParsingException {
final String description = getTextFromObject(
getPlaylistInfo().getObject("description"),
true
);
return new Description(description, Description.HTML);
2023-05-11 06:00:22 +02:00
}
2017-11-25 02:03:30 +01:00
@Nonnull
@Override
2021-03-04 05:31:13 +01:00
public InfoItemsPage<StreamInfoItem> getInitialPage() throws IOException, ExtractionException {
2020-05-13 17:26:07 +02:00
final StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
2020-04-15 14:09:46 +02:00
Page nextPage = null;
final JsonArray contents = browseResponse.getObject("contents")
.getObject("twoColumnBrowseResultsRenderer")
.getArray("tabs")
.getObject(0)
.getObject("tabRenderer")
.getObject("content")
.getObject("sectionListRenderer")
.getArray("contents");
2020-05-13 17:26:07 +02:00
final JsonObject videoPlaylistObject = contents.stream()
2022-03-16 20:14:08 +01:00
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.map(content -> content.getObject("itemSectionRenderer")
.getArray("contents")
.getObject(0))
.filter(contentItemSectionRendererContents ->
contentItemSectionRendererContents.has(PLAYLIST_VIDEO_LIST_RENDERER)
|| contentItemSectionRendererContents.has(
"playlistSegmentRenderer"))
.findFirst()
.orElse(null);
if (videoPlaylistObject != null && videoPlaylistObject.has(PLAYLIST_VIDEO_LIST_RENDERER)) {
final JsonArray videosArray = videoPlaylistObject
.getObject(PLAYLIST_VIDEO_LIST_RENDERER)
.getArray("contents");
collectStreamsFrom(collector, videosArray);
nextPage = getNextPageFrom(videosArray);
2020-05-13 17:26:07 +02:00
}
2020-04-15 14:09:46 +02:00
return new InfoItemsPage<>(collector, nextPage);
}
@Override
public InfoItemsPage<StreamInfoItem> getPage(final Page page) throws IOException,
ExtractionException {
if (page == null || isNullOrEmpty(page.getUrl())) {
throw new IllegalArgumentException("Page doesn't contain an URL");
}
final StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
final JsonObject ajaxJson = getJsonPostResponse("browse", page.getBody(),
getExtractorLocalization());
2018-03-01 01:02:43 +01:00
final JsonArray continuation = ajaxJson.getArray("onResponseReceivedActions")
.getObject(0)
.getObject("appendContinuationItemsAction")
.getArray("continuationItems");
2018-03-01 01:02:43 +01:00
collectStreamsFrom(collector, continuation);
2018-03-01 01:02:43 +01:00
return new InfoItemsPage<>(collector, getNextPageFrom(continuation));
2018-02-26 15:55:27 +01:00
}
@Nullable
private Page getNextPageFrom(final JsonArray contents)
throws IOException, ExtractionException {
if (isNullOrEmpty(contents)) {
2020-04-15 14:09:46 +02:00
return null;
}
final JsonObject lastElement = contents.getObject(contents.size() - 1);
if (lastElement.has("continuationItemRenderer")) {
final String continuation = lastElement
.getObject("continuationItemRenderer")
.getObject("continuationEndpoint")
.getObject("continuationCommand")
.getString("token");
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`: - the desktop API is fetched. If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API. - if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent. Otherwise, the next endpoint will be fetched normally, if the content is available. If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made. We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used. If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used. Other code changes: - `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder` - `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder` - two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder` - `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper` - a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor` - `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>` - the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page - some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(
getExtractorLocalization(), getExtractorContentCountry())
.value("continuation", continuation)
.done())
.getBytes(StandardCharsets.UTF_8);
return new Page(YOUTUBEI_V1_URL + "browse?key=" + getKey()
+ DISABLE_PRETTY_PRINT_PARAMETER, body);
} else {
return null;
}
}
private void collectStreamsFrom(@Nonnull final StreamInfoItemsCollector collector,
@Nonnull final JsonArray videos) {
final TimeAgoParser timeAgoParser = getTimeAgoParser();
videos.stream()
2022-03-16 20:14:08 +01:00
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.filter(video -> video.has(PLAYLIST_VIDEO_RENDERER))
.map(video -> new YoutubeStreamInfoItemExtractor(
video.getObject(PLAYLIST_VIDEO_RENDERER), timeAgoParser))
.forEachOrdered(collector::commit);
}
@Nonnull
@Override
public PlaylistInfo.PlaylistType getPlaylistType() throws ParsingException {
return extractPlaylistTypeFromPlaylistUrl(getUrl());
}
}