NewPipeExtractor/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java

449 lines
17 KiB
Java
Raw Normal View History

2018-05-08 21:19:03 +02:00
package org.schabi.newpipe.extractor.services.youtube.extractors;
2017-03-01 18:47:52 +01:00
import static org.schabi.newpipe.extractor.services.youtube.YouTubeChannelHelper.ChannelResponseData;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.DISABLE_PRETTY_PRINT_PARAMETER;
2022-03-18 15:09:06 +01:00
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.YOUTUBEI_V1_URL;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.fixThumbnailUrl;
import static org.schabi.newpipe.extractor.services.youtube.YouTubeChannelHelper.getChannelResponse;
2022-03-18 15:09:06 +01:00
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getKey;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
import static org.schabi.newpipe.extractor.services.youtube.YouTubeChannelHelper.resolveChannelId;
2022-03-18 15:09:06 +01:00
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
2020-02-23 11:23:33 +01:00
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonWriter;
2020-04-15 14:09:46 +02:00
import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.StreamingService;
2017-08-11 03:23:09 +02:00
import org.schabi.newpipe.extractor.channel.ChannelExtractor;
import org.schabi.newpipe.extractor.downloader.Downloader;
import org.schabi.newpipe.extractor.exceptions.ContentNotSupportedException;
2017-03-01 18:47:52 +01:00
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
2022-11-04 23:47:44 +01:00
import org.schabi.newpipe.extractor.linkhandler.ChannelTabs;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
2020-02-29 16:26:28 +01:00
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
2022-11-05 00:20:53 +01:00
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelTabLinkHandlerFactory;
2018-03-01 01:02:43 +01:00
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
2018-02-24 22:20:50 +01:00
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
import org.schabi.newpipe.extractor.utils.Utils;
2017-04-21 23:31:40 +02:00
2020-10-25 20:29:47 +01:00
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
2022-11-04 23:47:44 +01:00
import java.util.Collections;
import java.util.List;
import java.util.function.Consumer;
2022-10-23 17:01:39 +02:00
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
2021-06-24 18:39:16 +02:00
import javax.annotation.Nullable;
/*
2017-03-01 18:47:52 +01:00
* Created by Christian Schabesberger on 25.07.16.
*
2018-07-01 16:21:40 +02:00
* Copyright (C) Christian Schabesberger 2018 <chris.schabesberger@mailbox.org>
2017-08-11 03:23:09 +02:00
* YoutubeChannelExtractor.java is part of NewPipe.
2017-03-01 18:47:52 +01:00
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
2017-08-11 03:23:09 +02:00
public class YoutubeChannelExtractor extends ChannelExtractor {
private JsonObject initialData;
2020-02-28 17:05:31 +01:00
private JsonObject videoTab;
2022-11-04 23:47:44 +01:00
private List<ListLinkHandler> tabs;
2017-09-26 22:46:21 +02:00
/**
2022-03-18 15:09:06 +01:00
* Some channels have response redirects and the only way to reliably get the id is by saving it
* <p>
* "Movies & Shows":
* <pre>
* UCuJcl0Ju-gPDoksRjK1ya-w
* UChBfWrfBXL9wS6tQtgjt_OQ UClgRkhTL3_hImCAmdLfDE4g
* UCok7UTQQEP1Rsctxiv3gwSQ
* </pre>
*/
private String redirectedChannelId;
public YoutubeChannelExtractor(final StreamingService service,
final ListLinkHandler linkHandler) {
super(service, linkHandler);
2017-08-06 22:20:15 +02:00
}
@Override
public void onFetchPage(@Nonnull final Downloader downloader)
throws IOException, ExtractionException {
final String channelPath = super.getId();
final String id = resolveChannelId(channelPath);
// Fetch video tab
final ChannelResponseData data = getChannelResponse(id, "EgZ2aWRlb3M%3D",
2022-11-04 23:47:44 +01:00
getExtractorLocalization(), getExtractorContentCountry());
initialData = data.responseJson;
redirectedChannelId = data.channelId;
2020-02-17 09:55:06 +01:00
}
@Nonnull
@Override
public String getUrl() throws ParsingException {
try {
2020-02-29 16:26:28 +01:00
return YoutubeChannelLinkHandlerFactory.getInstance().getUrl("channel/" + getId());
} catch (final ParsingException e) {
return super.getUrl();
}
}
@Nonnull
@Override
2017-08-11 03:23:09 +02:00
public String getId() throws ParsingException {
final String channelId = initialData.getObject("header")
.getObject("c4TabbedHeaderRenderer")
2022-08-15 05:49:40 +02:00
.getString("channelId", "");
if (!channelId.isEmpty()) {
return channelId;
2020-04-15 18:49:58 +02:00
} else if (!isNullOrEmpty(redirectedChannelId)) {
return redirectedChannelId;
} else {
throw new ParsingException("Could not get channel id");
2017-03-01 18:47:52 +01:00
}
}
@Nonnull
2017-03-01 18:47:52 +01:00
@Override
2017-08-11 03:23:09 +02:00
public String getName() throws ParsingException {
2017-03-01 18:47:52 +01:00
try {
return initialData.getObject("header").getObject("c4TabbedHeaderRenderer")
.getString("title");
} catch (final Exception e) {
throw new ParsingException("Could not get channel name", e);
2017-03-01 18:47:52 +01:00
}
}
@Override
public String getAvatarUrl() throws ParsingException {
try {
2022-03-18 15:09:06 +01:00
final String url = initialData.getObject("header")
.getObject("c4TabbedHeaderRenderer").getObject("avatar").getArray("thumbnails")
.getObject(0).getString("url");
2020-02-27 17:39:23 +01:00
return fixThumbnailUrl(url);
} catch (final Exception e) {
2017-03-01 18:47:52 +01:00
throw new ParsingException("Could not get avatar", e);
}
}
@Override
public String getBannerUrl() throws ParsingException {
try {
2022-03-18 15:09:06 +01:00
final String url = initialData.getObject("header")
.getObject("c4TabbedHeaderRenderer").getObject("banner").getArray("thumbnails")
.getObject(0).getString("url");
2020-04-16 16:08:14 +02:00
2020-02-24 13:01:13 +01:00
if (url == null || url.contains("s.ytimg.com") || url.contains("default_banner")) {
2020-02-17 11:02:43 +01:00
return null;
}
2020-02-27 19:08:46 +01:00
return fixThumbnailUrl(url);
} catch (final Exception e) {
throw new ParsingException("Could not get banner", e);
2017-03-01 18:47:52 +01:00
}
}
2017-08-11 03:23:09 +02:00
@Override
public String getFeedUrl() throws ParsingException {
try {
return YoutubeParsingHelper.getFeedUrlFrom(getId());
} catch (final Exception e) {
2017-08-11 03:23:09 +02:00
throw new ParsingException("Could not get feed url", e);
}
}
@Override
public long getSubscriberCount() throws ParsingException {
final JsonObject c4TabbedHeaderRenderer = initialData.getObject("header")
.getObject("c4TabbedHeaderRenderer");
if (!c4TabbedHeaderRenderer.has("subscriberCountText")) {
2022-02-12 19:00:54 +01:00
return UNKNOWN_SUBSCRIBER_COUNT;
2017-03-01 18:47:52 +01:00
}
try {
return Utils.mixedNumberWordToLong(getTextFromObject(c4TabbedHeaderRenderer
.getObject("subscriberCountText")));
} catch (final NumberFormatException e) {
throw new ParsingException("Could not get subscriber count", e);
}
}
@Override
public String getDescription() throws ParsingException {
try {
return initialData.getObject("metadata").getObject("channelMetadataRenderer")
.getString("description");
} catch (final Exception e) {
2017-08-11 03:23:09 +02:00
throw new ParsingException("Could not get channel description", e);
}
}
@Override
2021-06-24 18:39:16 +02:00
public String getParentChannelName() {
return "";
}
@Override
2021-06-24 18:39:16 +02:00
public String getParentChannelUrl() {
return "";
}
@Override
2021-06-24 18:39:16 +02:00
public String getParentChannelAvatarUrl() {
return "";
}
@Override
public boolean isVerified() throws ParsingException {
final JsonArray badges = initialData.getObject("header")
.getObject("c4TabbedHeaderRenderer")
.getArray("badges");
return YoutubeParsingHelper.isVerified(badges);
}
@Nonnull
@Override
2022-11-04 23:47:44 +01:00
public List<ListLinkHandler> getTabs() throws ParsingException {
return tabs;
}
2022-10-23 17:01:39 +02:00
@Nonnull
@Override
public List<String> getTags() throws ParsingException {
final JsonArray tags = initialData.getObject("microformat")
.getObject("microformatDataRenderer").getArray("tags");
return tags.stream().map(Object::toString).collect(Collectors.toList());
}
2017-11-25 02:03:30 +01:00
@Nonnull
@Override
public InfoItemsPage<StreamInfoItem> getInitialPage() throws IOException, ExtractionException {
final StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
2020-04-15 14:09:46 +02:00
Page nextPage = null;
2023-03-22 01:00:05 +01:00
extractTabs();
2023-03-22 01:00:05 +01:00
if (videoTab != null) {
final JsonObject tabContent = videoTab.getObject("content");
JsonArray items = tabContent
.getObject("sectionListRenderer")
.getArray("contents").getObject(0).getObject("itemSectionRenderer")
.getArray("contents").getObject(0).getObject("gridRenderer").getArray("items");
if (items.isEmpty()) {
items = tabContent.getObject("richGridRenderer").getArray("contents");
}
final List<String> channelIds = new ArrayList<>();
channelIds.add(getName());
channelIds.add(getUrl());
final JsonObject continuation = collectStreamsFrom(collector, items, channelIds);
nextPage = getNextPageFrom(continuation, channelIds);
}
2020-04-15 14:09:46 +02:00
return new InfoItemsPage<>(collector, nextPage);
2017-08-06 22:20:15 +02:00
}
@Override
public InfoItemsPage<StreamInfoItem> getPage(final Page page)
throws IOException, ExtractionException {
if (page == null || isNullOrEmpty(page.getUrl())) {
throw new IllegalArgumentException("Page doesn't contain an URL");
}
final List<String> channelIds = page.getIds();
final StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
final JsonObject ajaxJson = getJsonPostResponse("browse", page.getBody(),
getExtractorLocalization());
2022-03-18 15:09:06 +01:00
final JsonObject sectionListContinuation = ajaxJson.getArray("onResponseReceivedActions")
.getObject(0)
.getObject("appendContinuationItemsAction");
2018-03-01 01:02:43 +01:00
final JsonObject continuation = collectStreamsFrom(collector, sectionListContinuation
.getArray("continuationItems"), channelIds);
2018-02-26 15:55:27 +01:00
return new InfoItemsPage<>(collector, getNextPageFrom(continuation, channelIds));
}
2021-06-24 18:39:16 +02:00
@Nullable
private Page getNextPageFrom(final JsonObject continuations,
final List<String> channelIds)
throws IOException, ExtractionException {
if (isNullOrEmpty(continuations)) {
2020-04-15 14:09:46 +02:00
return null;
2020-04-16 16:08:14 +02:00
}
2021-02-12 14:39:09 +01:00
final JsonObject continuationEndpoint = continuations.getObject("continuationEndpoint");
final String continuation = continuationEndpoint.getObject("continuationCommand")
.getString("token");
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`: - the desktop API is fetched. If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API. - if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent. Otherwise, the next endpoint will be fetched normally, if the content is available. If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made. We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used. If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used. Other code changes: - `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder` - `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder` - two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder` - `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper` - a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor` - `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>` - the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page - some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(getExtractorLocalization(),
getExtractorContentCountry())
.value("continuation", continuation)
.done())
.getBytes(StandardCharsets.UTF_8);
return new Page(YOUTUBEI_V1_URL + "browse?key=" + getKey()
+ DISABLE_PRETTY_PRINT_PARAMETER, null, channelIds, null, body);
}
2021-02-12 14:39:09 +01:00
/**
* Collect streams from an array of items
*
* @param collector the collector where videos will be committed
* @param videos the array to get videos from
* @param channelIds the ids of the channel, which are its name and its URL
2021-02-12 14:39:09 +01:00
* @return the continuation object
*/
2021-06-24 18:39:16 +02:00
private JsonObject collectStreamsFrom(@Nonnull final StreamInfoItemsCollector collector,
@Nonnull final JsonArray videos,
@Nonnull final List<String> channelIds) {
collector.reset();
final String uploaderName = channelIds.get(0);
final String uploaderUrl = channelIds.get(1);
final TimeAgoParser timeAgoParser = getTimeAgoParser();
2021-02-12 14:39:09 +01:00
JsonObject continuation = null;
for (final Object object : videos) {
2021-02-12 14:39:09 +01:00
final JsonObject video = (JsonObject) object;
if (video.has("gridVideoRenderer")) {
2020-02-26 09:31:26 +01:00
collector.commit(new YoutubeStreamInfoItemExtractor(
2021-02-12 14:39:09 +01:00
video.getObject("gridVideoRenderer"), timeAgoParser) {
2017-03-01 18:47:52 +01:00
@Override
2020-02-23 11:23:33 +01:00
public String getUploaderName() {
return uploaderName;
2017-03-01 18:47:52 +01:00
}
2017-11-25 02:38:46 +01:00
@Override
2020-02-23 11:23:33 +01:00
public String getUploaderUrl() {
2017-11-25 02:38:46 +01:00
return uploaderUrl;
}
2017-03-01 18:47:52 +01:00
});
} else if (video.has("richItemRenderer")) {
collector.commit(new YoutubeStreamInfoItemExtractor(
video.getObject("richItemRenderer")
.getObject("content").getObject("videoRenderer"), timeAgoParser) {
@Override
public String getUploaderName() {
return uploaderName;
}
@Override
public String getUploaderUrl() {
return uploaderUrl;
}
});
2021-02-12 14:39:09 +01:00
} else if (video.has("continuationItemRenderer")) {
continuation = video.getObject("continuationItemRenderer");
2017-03-01 18:47:52 +01:00
}
}
2021-02-12 14:39:09 +01:00
return continuation;
2017-03-01 18:47:52 +01:00
}
2023-03-22 01:00:05 +01:00
/**
* Collect a list of available tabs and get the video tab data.
*/
private void extractTabs() throws ParsingException {
final JsonArray responseTabs = initialData.getObject("contents")
.getObject("twoColumnBrowseResultsRenderer")
.getArray("tabs");
2022-03-18 15:09:06 +01:00
JsonObject foundVideoTab = null;
tabs = new ArrayList<>();
2022-11-04 23:47:44 +01:00
final Consumer<String> addTab = tab -> {
try {
2022-11-05 00:20:53 +01:00
tabs.add(YoutubeChannelTabLinkHandlerFactory.getInstance().fromQuery(
redirectedChannelId, Collections.singletonList(tab), ""));
2022-11-04 23:47:44 +01:00
} catch (final ParsingException ignored) {
}
};
for (final Object tab : responseTabs) {
2020-04-16 16:08:14 +02:00
if (((JsonObject) tab).has("tabRenderer")) {
final JsonObject tabRenderer = ((JsonObject) tab).getObject("tabRenderer");
final String tabUrl = tabRenderer.getObject("endpoint")
.getObject("commandMetadata").getObject("webCommandMetadata")
.getString("url");
if (tabUrl != null) {
final String[] urlParts = tabUrl.split("/");
final String urlSuffix = urlParts[urlParts.length - 1];
switch (urlSuffix) {
case "videos":
foundVideoTab = tabRenderer;
break;
case "playlists":
2022-11-04 23:47:44 +01:00
addTab.accept(ChannelTabs.PLAYLISTS);
break;
case "streams":
addTab.accept(ChannelTabs.LIVESTREAMS);
break;
case "shorts":
2022-11-04 23:47:44 +01:00
addTab.accept(ChannelTabs.SHORTS);
break;
case "channels":
2022-11-04 23:47:44 +01:00
addTab.accept(ChannelTabs.CHANNELS);
break;
}
}
}
}
2022-03-18 15:09:06 +01:00
if (foundVideoTab == null) {
if (tabs.isEmpty()) {
throw new ContentNotSupportedException("This channel has no supported tabs");
}
2023-03-22 01:00:05 +01:00
return;
2020-04-20 14:27:33 +02:00
}
final String messageRendererText = getTextFromObject(
foundVideoTab.getObject("content")
.getObject("sectionListRenderer")
.getArray("contents")
.getObject(0)
.getObject("itemSectionRenderer")
.getArray("contents")
.getObject(0)
.getObject("messageRenderer")
.getObject("text"));
if (messageRendererText != null
&& messageRendererText.equals("This channel has no videos.")) {
2023-03-22 01:00:05 +01:00
return;
}
videoTab = foundVideoTab;
}
2017-03-01 18:47:52 +01:00
}