NewPipeExtractor/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java

490 lines
20 KiB
Java

/*
* Created by Christian Schabesberger on 25.07.16.
*
* Copyright (C) 2018 Christian Schabesberger <chris.schabesberger@mailbox.org>
* YoutubeChannelExtractor.java is part of NewPipe Extractor.
*
* NewPipe Extractor is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe Extractor is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe Extractor. If not, see <https://www.gnu.org/licenses/>.
*/
package org.schabi.newpipe.extractor.services.youtube.extractors;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeChannelHelper.getChannelResponse;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeChannelHelper.resolveChannelId;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import org.schabi.newpipe.extractor.Image;
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.channel.ChannelExtractor;
import org.schabi.newpipe.extractor.channel.tabs.ChannelTabs;
import org.schabi.newpipe.extractor.downloader.Downloader;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
import org.schabi.newpipe.extractor.linkhandler.ReadyChannelTabListLinkHandler;
import org.schabi.newpipe.extractor.services.youtube.YoutubeChannelHelper;
import org.schabi.newpipe.extractor.services.youtube.YoutubeChannelHelper.ChannelHeader;
import org.schabi.newpipe.extractor.services.youtube.YoutubeChannelHelper.ChannelHeader.HeaderType;
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeChannelTabExtractor.VideosTabExtractor;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelTabLinkHandlerFactory;
import org.schabi.newpipe.extractor.utils.Utils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
public class YoutubeChannelExtractor extends ChannelExtractor {
// Constants of objects used multiples from channel responses
private static final String IMAGE = "image";
private static final String CONTENTS = "contents";
private static final String CONTENT_PREVIEW_IMAGE_VIEW_MODEL = "contentPreviewImageViewModel";
private static final String PAGE_HEADER_VIEW_MODEL = "pageHeaderViewModel";
private static final String TAB_RENDERER = "tabRenderer";
private static final String CONTENT = "content";
private static final String METADATA = "metadata";
private static final String AVATAR = "avatar";
private static final String THUMBNAILS = "thumbnails";
private static final String SOURCES = "sources";
private JsonObject jsonResponse;
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
private Optional<ChannelHeader> channelHeader;
private String channelId;
/**
* If a channel is age-restricted, its pages are only accessible to logged-in and
* age-verified users, we get an {@code channelAgeGateRenderer} in this case, containing only
* the following metadata: channel name and channel avatar.
*
* <p>
* This restriction doesn't seem to apply to all countries.
* </p>
*/
@Nullable
private JsonObject channelAgeGateRenderer;
public YoutubeChannelExtractor(final StreamingService service,
final ListLinkHandler linkHandler) {
super(service, linkHandler);
}
@Override
public void onFetchPage(@Nonnull final Downloader downloader)
throws IOException, ExtractionException {
final String channelPath = super.getId();
final String id = resolveChannelId(channelPath);
// Fetch Videos tab
final YoutubeChannelHelper.ChannelResponseData data = getChannelResponse(id,
"EgZ2aWRlb3PyBgQKAjoA", getExtractorLocalization(), getExtractorContentCountry());
jsonResponse = data.jsonResponse;
channelHeader = YoutubeChannelHelper.getChannelHeader(jsonResponse);
channelId = data.channelId;
channelAgeGateRenderer = YoutubeChannelHelper.getChannelAgeGateRenderer(jsonResponse);
}
@Nonnull
@Override
public String getUrl() throws ParsingException {
try {
return YoutubeChannelLinkHandlerFactory.getInstance().getUrl("channel/" + getId());
} catch (final ParsingException e) {
return super.getUrl();
}
}
@Nonnull
@Override
public String getId() throws ParsingException {
assertPageFetched();
return YoutubeChannelHelper.getChannelId(channelHeader, jsonResponse, channelId);
}
@Nonnull
@Override
public String getName() throws ParsingException {
assertPageFetched();
return YoutubeChannelHelper.getChannelName(
channelHeader, jsonResponse, channelAgeGateRenderer);
}
@Nonnull
@Override
public List<Image> getAvatars() throws ParsingException {
assertPageFetched();
if (channelAgeGateRenderer != null) {
return Optional.ofNullable(channelAgeGateRenderer.getObject(AVATAR)
.getArray(THUMBNAILS))
.map(YoutubeParsingHelper::getImagesFromThumbnailsArray)
.orElseThrow(() -> new ParsingException("Could not get avatars"));
}
return channelHeader.map(header -> {
switch (header.headerType) {
case PAGE:
final JsonObject imageObj = header.json.getObject(CONTENT)
.getObject(PAGE_HEADER_VIEW_MODEL)
.getObject(IMAGE);
if (imageObj.has(CONTENT_PREVIEW_IMAGE_VIEW_MODEL)) {
return imageObj.getObject(CONTENT_PREVIEW_IMAGE_VIEW_MODEL)
.getObject(IMAGE)
.getArray(SOURCES);
}
if (imageObj.has("decoratedAvatarViewModel")) {
return imageObj.getObject(AVATAR)
.getObject("avatarViewModel")
.getObject(IMAGE)
.getArray(SOURCES);
}
// Return an empty avatar array as a fallback
return new JsonArray();
case INTERACTIVE_TABBED:
return header.json.getObject("boxArt")
.getArray(THUMBNAILS);
case C4_TABBED:
case CAROUSEL:
default:
return header.json.getObject(AVATAR)
.getArray(THUMBNAILS);
}
})
.map(YoutubeParsingHelper::getImagesFromThumbnailsArray)
.orElseThrow(() -> new ParsingException("Could not get avatars"));
}
@Nonnull
@Override
public List<Image> getBanners() {
assertPageFetched();
if (channelAgeGateRenderer != null) {
return List.of();
}
return channelHeader.map(header -> {
if (header.headerType == HeaderType.PAGE) {
final JsonObject pageHeaderViewModel = header.json.getObject(CONTENT)
.getObject(PAGE_HEADER_VIEW_MODEL);
if (pageHeaderViewModel.has("banner")) {
return pageHeaderViewModel.getObject("imageBannerViewModel")
.getObject(IMAGE)
.getArray(SOURCES);
}
// No banner is available (this should happen on pageHeaderRenderers of
// system channels), use an empty JsonArray instead
return new JsonArray();
}
return header.json
.getObject("banner")
.getArray(THUMBNAILS);
})
.map(YoutubeParsingHelper::getImagesFromThumbnailsArray)
.orElse(List.of());
}
@Override
public String getFeedUrl() throws ParsingException {
// RSS feeds are accessible for age-restricted channels, no need to check whether a channel
// has a channelAgeGateRenderer
try {
return YoutubeParsingHelper.getFeedUrlFrom(getId());
} catch (final Exception e) {
throw new ParsingException("Could not get feed URL", e);
}
}
@Override
public long getSubscriberCount() throws ParsingException {
assertPageFetched();
if (channelAgeGateRenderer != null) {
return UNKNOWN_SUBSCRIBER_COUNT;
}
if (channelHeader.isPresent()) {
final ChannelHeader header = channelHeader.get();
if (header.headerType == HeaderType.INTERACTIVE_TABBED) {
// No subscriber count is available on interactiveTabbedHeaderRenderer header
return UNKNOWN_SUBSCRIBER_COUNT;
}
final JsonObject headerJson = header.json;
if (header.headerType == HeaderType.PAGE) {
return getSubscriberCountFromPageChannelHeader(headerJson);
}
JsonObject textObject = null;
if (headerJson.has("subscriberCountText")) {
textObject = headerJson.getObject("subscriberCountText");
} else if (headerJson.has("subtitle")) {
textObject = headerJson.getObject("subtitle");
}
if (textObject != null) {
try {
return Utils.mixedNumberWordToLong(getTextFromObject(textObject));
} catch (final NumberFormatException e) {
throw new ParsingException("Could not get subscriber count", e);
}
}
}
return UNKNOWN_SUBSCRIBER_COUNT;
}
private long getSubscriberCountFromPageChannelHeader(@Nonnull final JsonObject headerJson)
throws ParsingException {
final JsonObject metadataObject = headerJson.getObject(CONTENT)
.getObject(PAGE_HEADER_VIEW_MODEL)
.getObject(METADATA);
if (metadataObject.has("contentMetadataViewModel")) {
final JsonArray metadataPart = metadataObject.getObject("contentMetadataViewModel")
.getArray("metadataRows")
.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.map(metadataRow -> metadataRow.getArray("metadataParts"))
/*
Find metadata parts which have two elements: channel handle and subscriber
count.
On autogenerated music channels, the subscriber count is not shown with this
header.
Use the first metadata parts object found.
*/
.filter(metadataParts -> metadataParts.size() == 2)
.findFirst()
.orElse(null);
if (metadataPart == null) {
// As the parsing of the metadata parts object needed to get the subscriber count
// is fragile, return UNKNOWN_SUBSCRIBER_COUNT when it cannot be got
return UNKNOWN_SUBSCRIBER_COUNT;
}
try {
// The subscriber count is at the same position for all languages as of 02/03/2024
return Utils.mixedNumberWordToLong(metadataPart.getObject(0)
.getObject("text")
.getString(CONTENT));
} catch (final NumberFormatException e) {
throw new ParsingException("Could not get subscriber count", e);
}
}
// If the channel header has no contentMetadataViewModel (which is the case for system
// channels using this header), return UNKNOWN_SUBSCRIBER_COUNT
return UNKNOWN_SUBSCRIBER_COUNT;
}
@Override
public String getDescription() throws ParsingException {
assertPageFetched();
if (channelAgeGateRenderer != null) {
return null;
}
try {
if (channelHeader.isPresent()) {
final ChannelHeader header = channelHeader.get();
if (header.headerType == HeaderType.INTERACTIVE_TABBED) {
/*
In an interactiveTabbedHeaderRenderer, the real description, is only available
in its header
The other one returned in non-About tabs accessible in the
microformatDataRenderer object of the response may be completely different
The description extracted is incomplete and the original one can be only
accessed from the About tab
*/
return getTextFromObject(header.json.getObject("description"));
}
}
// The description is cut and the original one can be only accessed from the About tab
return jsonResponse.getObject("title")
.getObject("channelMetadataRenderer")
.getString("description");
} catch (final Exception e) {
throw new ParsingException("Could not get channel description", e);
}
}
@Override
public String getParentChannelName() {
return "";
}
@Override
public String getParentChannelUrl() {
return "";
}
@Nonnull
@Override
public List<Image> getParentChannelAvatars() {
return List.of();
}
@Override
public boolean isVerified() throws ParsingException {
assertPageFetched();
if (channelAgeGateRenderer != null) {
// Verified status is unknown with channelAgeGateRenderers, return false in this case
return false;
}
return YoutubeChannelHelper.isChannelVerified(channelHeader.orElseThrow(() ->
new ParsingException("Could not get verified status")));
}
@Nonnull
@Override
public List<ListLinkHandler> getTabs() throws ParsingException {
assertPageFetched();
if (channelAgeGateRenderer == null) {
return getTabsForNonAgeRestrictedChannels();
}
return getTabsForAgeRestrictedChannels();
}
@Nonnull
private List<ListLinkHandler> getTabsForNonAgeRestrictedChannels() throws ParsingException {
final JsonArray responseTabs = jsonResponse.getObject(CONTENTS)
.getObject("twoColumnBrowseResultsRenderer")
.getArray("tabs");
final List<ListLinkHandler> tabs = new ArrayList<>();
final Consumer<String> addNonVideosTab = tabName -> {
try {
tabs.add(YoutubeChannelTabLinkHandlerFactory.getInstance().fromQuery(
channelId, List.of(tabName), ""));
} catch (final ParsingException ignored) {
// Do not add the tab if we couldn't create the LinkHandler
}
};
final String name = getName();
final String url = getUrl();
final String id = getId();
responseTabs.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.filter(tab -> tab.has(TAB_RENDERER))
.map(tab -> tab.getObject(TAB_RENDERER))
.forEach(tabRenderer -> {
final String tabUrl = tabRenderer.getObject("endpoint")
.getObject("commandMetadata")
.getObject("webCommandMetadata")
.getString("url");
if (tabUrl != null) {
final String[] urlParts = tabUrl.split("/");
if (urlParts.length == 0) {
return;
}
final String urlSuffix = urlParts[urlParts.length - 1];
switch (urlSuffix) {
case "videos":
// Since the Videos tab has already its contents fetched, make
// sure it is in the first position
// YoutubeChannelTabExtractor still supports fetching this tab
tabs.add(0, new ReadyChannelTabListLinkHandler(
tabUrl,
channelId,
ChannelTabs.VIDEOS,
(service, linkHandler) -> new VideosTabExtractor(
service, linkHandler, tabRenderer, name, id, url)));
break;
case "shorts":
addNonVideosTab.accept(ChannelTabs.SHORTS);
break;
case "streams":
addNonVideosTab.accept(ChannelTabs.LIVESTREAMS);
break;
case "releases":
addNonVideosTab.accept(ChannelTabs.ALBUMS);
break;
case "playlists":
addNonVideosTab.accept(ChannelTabs.PLAYLISTS);
break;
default:
// Unsupported channel tab, ignore it
break;
}
}
});
return Collections.unmodifiableList(tabs);
}
@Nonnull
private List<ListLinkHandler> getTabsForAgeRestrictedChannels() throws ParsingException {
// As we don't have access to the channel tabs list, consider that the channel has videos,
// shorts and livestreams, the data only accessible without login on YouTube's desktop
// client using uploads system playlists
// The playlists channel tab is still available on YouTube Music, but this is not
// implemented in the extractor
final List<ListLinkHandler> tabs = new ArrayList<>();
final String channelUrl = getUrl();
final Consumer<String> addTab = tabName ->
tabs.add(new ReadyChannelTabListLinkHandler(channelUrl + "/" + tabName,
channelId, tabName, YoutubeChannelTabPlaylistExtractor::new));
addTab.accept(ChannelTabs.VIDEOS);
addTab.accept(ChannelTabs.SHORTS);
addTab.accept(ChannelTabs.LIVESTREAMS);
return Collections.unmodifiableList(tabs);
}
@Nonnull
@Override
public List<String> getTags() throws ParsingException {
assertPageFetched();
if (channelAgeGateRenderer != null) {
return List.of();
}
return jsonResponse.getObject("microformat")
.getObject("microformatDataRenderer")
.getArray("tags")
.stream()
.filter(String.class::isInstance)
.map(String.class::cast)
.collect(Collectors.toUnmodifiableList());
}
}