NewPipeExtractor/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelHelper.java

557 lines
24 KiB
Java

package org.schabi.newpipe.extractor.services.youtube;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonWriter;
import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.ContentCountry;
import org.schabi.newpipe.extractor.localization.Localization;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Optional;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.defaultAlertsCheck;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
/**
* Shared functions for extracting YouTube channel pages and tabs.
*/
public final class YoutubeChannelHelper {
private static final String BROWSE_ENDPOINT = "browseEndpoint";
private static final String BROWSE_ID = "browseId";
private static final String CAROUSEL_HEADER_RENDERER = "carouselHeaderRenderer";
private static final String C4_TABBED_HEADER_RENDERER = "c4TabbedHeaderRenderer";
private static final String CONTENT = "content";
private static final String CONTENTS = "contents";
private static final String HEADER = "header";
private static final String PAGE_HEADER_VIEW_MODEL = "pageHeaderViewModel";
private static final String TAB_RENDERER = "tabRenderer";
private static final String TITLE = "title";
private static final String TOPIC_CHANNEL_DETAILS_RENDERER = "topicChannelDetailsRenderer";
private YoutubeChannelHelper() {
}
/**
* Take a YouTube channel ID or URL path, resolve it if necessary and return a channel ID.
*
* @param idOrPath a YouTube channel ID or URL path
* @return a YouTube channel ID
* @throws IOException if a channel resolve request failed
* @throws ExtractionException if a channel resolve request response could not be parsed or is
* invalid
*/
@Nonnull
public static String resolveChannelId(@Nonnull final String idOrPath)
throws ExtractionException, IOException {
final String[] channelId = idOrPath.split("/");
if (channelId[0].startsWith("UC")) {
return channelId[0];
}
// If the URL is not a /channel URL, we need to use the navigation/resolve_url endpoint of
// the InnerTube API to get the channel id. If this fails or if the URL is not a /channel
// URL, then no information about the channel associated with this URL was found,
// so the unresolved url will be returned.
if (!channelId[0].equals("channel")) {
final byte[] body = JsonWriter.string(
prepareDesktopJsonBuilder(Localization.DEFAULT, ContentCountry.DEFAULT)
.value("url", "https://www.youtube.com/" + idOrPath)
.done())
.getBytes(StandardCharsets.UTF_8);
final JsonObject jsonResponse = getJsonPostResponse(
"navigation/resolve_url", body, Localization.DEFAULT);
checkIfChannelResponseIsValid(jsonResponse);
final JsonObject endpoint = jsonResponse.getObject("endpoint");
final String webPageType = endpoint.getObject("commandMetadata")
.getObject("webCommandMetadata")
.getString("webPageType", "");
final JsonObject browseEndpoint = endpoint.getObject(BROWSE_ENDPOINT);
final String browseId = browseEndpoint.getString(BROWSE_ID, "");
if (webPageType.equalsIgnoreCase("WEB_PAGE_TYPE_BROWSE")
|| webPageType.equalsIgnoreCase("WEB_PAGE_TYPE_CHANNEL")
&& !browseId.isEmpty()) {
if (!browseId.startsWith("UC")) {
throw new ExtractionException("Redirected id is not pointing to a channel");
}
return browseId;
}
}
// return the unresolved URL
return channelId[1];
}
/**
* Response data object for {@link #getChannelResponse(String, String, Localization,
* ContentCountry)}, after any redirection in the allowed redirects count ({@code 3}).
*/
public static final class ChannelResponseData {
/**
* The channel response as a JSON object, after all redirects.
*/
@Nonnull
public final JsonObject jsonResponse;
/**
* The channel ID after all redirects.
*/
@Nonnull
public final String channelId;
private ChannelResponseData(@Nonnull final JsonObject jsonResponse,
@Nonnull final String channelId) {
this.jsonResponse = jsonResponse;
this.channelId = channelId;
}
}
/**
* Fetch a YouTube channel tab response, using the given channel ID and tab parameters.
*
* <p>
* Redirections to other channels are supported to up to 3 redirects, which could happen for
* instance for localized channels or for auto-generated ones. For instance, there are three IDs
* of the auto-generated "Movies and Shows" channel, i.e. {@code UCuJcl0Ju-gPDoksRjK1ya-w},
* {@code UChBfWrfBXL9wS6tQtgjt_OQ} and {@code UCok7UTQQEP1Rsctxiv3gwSQ}, and they all redirect
* to the {@code UClgRkhTL3_hImCAmdLfDE4g} one.
* </p>
*
* @param channelId a valid YouTube channel ID
* @param parameters the parameters to specify the YouTube channel tab; if invalid ones are
* specified, YouTube should return the {@code Home} tab
* @param localization the {@link Localization} to use
* @param country the {@link ContentCountry} to use
* @return a {@link ChannelResponseData channel response data}
* @throws IOException if a channel request failed
* @throws ExtractionException if a channel request response could not be parsed or is invalid
*/
@Nonnull
public static ChannelResponseData getChannelResponse(@Nonnull final String channelId,
@Nonnull final String parameters,
@Nonnull final Localization localization,
@Nonnull final ContentCountry country)
throws ExtractionException, IOException {
String id = channelId;
JsonObject ajaxJson = null;
int level = 0;
while (level < 3) {
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(
localization, country)
.value(BROWSE_ID, id)
.value("params", parameters)
.done())
.getBytes(StandardCharsets.UTF_8);
final JsonObject jsonResponse = getJsonPostResponse(
"browse", body, localization);
checkIfChannelResponseIsValid(jsonResponse);
final JsonObject endpoint = jsonResponse.getArray("onResponseReceivedActions")
.getObject(0)
.getObject("navigateAction")
.getObject("endpoint");
final String webPageType = endpoint.getObject("commandMetadata")
.getObject("webCommandMetadata")
.getString("webPageType", "");
final String browseId = endpoint.getObject(BROWSE_ENDPOINT)
.getString(BROWSE_ID, "");
if (webPageType.equalsIgnoreCase("WEB_PAGE_TYPE_BROWSE")
|| webPageType.equalsIgnoreCase("WEB_PAGE_TYPE_CHANNEL")
&& !browseId.isEmpty()) {
if (!browseId.startsWith("UC")) {
throw new ExtractionException("Redirected id is not pointing to a channel");
}
id = browseId;
level++;
} else {
ajaxJson = jsonResponse;
break;
}
}
if (ajaxJson == null) {
throw new ExtractionException("Got no channel response after 3 redirects");
}
defaultAlertsCheck(ajaxJson);
return new ChannelResponseData(ajaxJson, id);
}
/**
* Assert that a channel JSON response does not contain an {@code error} JSON object.
*
* @param jsonResponse a channel JSON response
* @throws ContentNotAvailableException if the channel was not found
*/
private static void checkIfChannelResponseIsValid(@Nonnull final JsonObject jsonResponse)
throws ContentNotAvailableException {
if (!isNullOrEmpty(jsonResponse.getObject("error"))) {
final JsonObject errorJsonObject = jsonResponse.getObject("error");
final int errorCode = errorJsonObject.getInt("code");
if (errorCode == 404) {
throw new ContentNotAvailableException("This channel doesn't exist.");
} else {
throw new ContentNotAvailableException("Got error:\""
+ errorJsonObject.getString("status") + "\": "
+ errorJsonObject.getString("message"));
}
}
}
/**
* A channel header response.
*
* <p>
* This class allows the distinction between a classic header and a carousel one, used for
* auto-generated ones like the gaming or music topic channels and for big events such as the
* Coachella music festival, which have a different data structure and do not return the same
* properties.
* </p>
*/
public static final class ChannelHeader {
/**
* Types of supported YouTube channel headers.
*/
public enum HeaderType {
/**
* A {@code c4TabbedHeaderRenderer} channel header type.
*
* <p>
* This header is returned on the majority of channels and contains the channel's name,
* its banner and its avatar and its subscriber count in most cases.
* </p>
*/
C4_TABBED,
/**
* An {@code interactiveTabbedHeaderRenderer} channel header type.
*
* <p>
* This header is returned for gaming topic channels, and only contains the channel's
* name, its banner and a poster as its "avatar".
* </p>
*/
INTERACTIVE_TABBED,
/**
* A {@code carouselHeaderRenderer} channel header type.
*
* <p>
* This header returns only the channel's name, its avatar and its subscriber count.
* </p>
*/
CAROUSEL,
/**
* A {@code pageHeaderRenderer} channel header type.
*
* <p>
* This header returns only the channel's name and its avatar for system channels.
* </p>
*/
PAGE
}
/**
* The channel header JSON response.
*/
@Nonnull
public final JsonObject json;
/**
* The type of the channel header.
*
* <p>
* See the documentation of the {@link HeaderType} class for more details.
* </p>
*/
public final HeaderType headerType;
private ChannelHeader(@Nonnull final JsonObject json, final HeaderType headerType) {
this.json = json;
this.headerType = headerType;
}
}
/**
* Get a channel header as an {@link Optional} it if exists.
*
* @param channelResponse a full channel JSON response
* @return an {@link Optional} containing a {@link ChannelHeader} or an empty {@link Optional}
* if no supported header has been found
*/
@Nonnull
public static Optional<ChannelHeader> getChannelHeader(
@Nonnull final JsonObject channelResponse) {
final JsonObject header = channelResponse.getObject(HEADER);
if (header.has(C4_TABBED_HEADER_RENDERER)) {
return Optional.of(header.getObject(C4_TABBED_HEADER_RENDERER))
.map(json -> new ChannelHeader(json, ChannelHeader.HeaderType.C4_TABBED));
} else if (header.has(CAROUSEL_HEADER_RENDERER)) {
return header.getObject(CAROUSEL_HEADER_RENDERER)
.getArray(CONTENTS)
.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.filter(item -> item.has(TOPIC_CHANNEL_DETAILS_RENDERER))
.findFirst()
.map(item -> item.getObject(TOPIC_CHANNEL_DETAILS_RENDERER))
.map(json -> new ChannelHeader(json, ChannelHeader.HeaderType.CAROUSEL));
} else if (header.has("pageHeaderRenderer")) {
return Optional.of(header.getObject("pageHeaderRenderer"))
.map(json -> new ChannelHeader(json, ChannelHeader.HeaderType.PAGE));
} else if (header.has("interactiveTabbedHeaderRenderer")) {
return Optional.of(header.getObject("interactiveTabbedHeaderRenderer"))
.map(json -> new ChannelHeader(json,
ChannelHeader.HeaderType.INTERACTIVE_TABBED));
} else {
return Optional.empty();
}
}
/**
* Check if a channel is verified by using its header.
*
* <p>
* The header is mandatory, so the verified status of age-restricted channels with a
* {@code channelAgeGateRenderer} cannot be checked.
* </p>
*
* @param channelHeader the {@link ChannelHeader} of a non age-restricted channel
* @return whether the channel is verified
*/
public static boolean isChannelVerified(@Nonnull final ChannelHeader channelHeader) {
switch (channelHeader.headerType) {
// carouselHeaderRenderers do not contain any verification badges
// Since they are only shown on YouTube internal channels or on channels of large
// organizations broadcasting live events, we can assume the channel to be verified
case CAROUSEL:
return true;
case PAGE:
final JsonObject pageHeaderViewModel = channelHeader.json.getObject(CONTENT)
.getObject(PAGE_HEADER_VIEW_MODEL);
final boolean hasCircleOrMusicIcon = pageHeaderViewModel.getObject(TITLE)
.getObject("dynamicTextViewModel")
.getObject("text")
.getArray("attachmentRuns")
.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.anyMatch(attachmentRun -> attachmentRun.getObject("element")
.getObject("type")
.getObject("imageType")
.getObject("image")
.getArray("sources")
.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.anyMatch(source -> {
final String imageName = source.getObject("clientResource")
.getString("imageName");
return "CHECK_CIRCLE_FILLED".equals(imageName)
|| "MUSIC_FILLED".equals(imageName);
}));
if (!hasCircleOrMusicIcon && pageHeaderViewModel.getObject("image")
.has("contentPreviewImageViewModel")) {
// If a pageHeaderRenderer has no object in which a check verified may be
// contained and if it has a contentPreviewImageViewModel, it should mean
// that the header is coming from a system channel, which we can assume to
// be verified
return true;
}
return hasCircleOrMusicIcon;
case INTERACTIVE_TABBED:
// If the header has an autoGenerated property, it should mean that the channel has
// been auto generated by YouTube: we can assume the channel to be verified in this
// case
return channelHeader.json.has("autoGenerated");
default:
return YoutubeParsingHelper.isVerified(channelHeader.json.getArray("badges"));
}
}
/**
* Get the ID of a channel from its response.
*
* <p>
* For {@link ChannelHeader.HeaderType#C4_TABBED c4TabbedHeaderRenderer} and
* {@link ChannelHeader.HeaderType#CAROUSEL carouselHeaderRenderer} channel headers, the ID is
* get from the header.
* </p>
*
* <p>
* For other headers or if it cannot be got, the ID from the {@code channelMetadataRenderer}
* in the channel response is used.
* </p>
*
* <p>
* If the ID cannot still be get, the fallback channel ID, if provided, will be used.
* </p>
*
* @param header the channel header
* @param fallbackChannelId the fallback channel ID, which can be null
* @return the ID of the channel
* @throws ParsingException if the channel ID cannot be got from the channel header, the
* channel response and the fallback channel ID
*/
@Nonnull
public static String getChannelId(
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
@Nonnull final Optional<ChannelHeader> header,
@Nonnull final JsonObject jsonResponse,
@Nullable final String fallbackChannelId) throws ParsingException {
if (header.isPresent()) {
final ChannelHeader channelHeader = header.get();
switch (channelHeader.headerType) {
case C4_TABBED:
final String channelId = channelHeader.json.getObject(HEADER)
.getObject(C4_TABBED_HEADER_RENDERER)
.getString("channelId", "");
if (!isNullOrEmpty(channelId)) {
return channelId;
}
final String navigationC4TabChannelId = channelHeader.json
.getObject("navigationEndpoint")
.getObject(BROWSE_ENDPOINT)
.getString(BROWSE_ID);
if (!isNullOrEmpty(navigationC4TabChannelId)) {
return navigationC4TabChannelId;
}
break;
case CAROUSEL:
final String navigationCarouselChannelId = channelHeader.json.getObject(HEADER)
.getObject(CAROUSEL_HEADER_RENDERER)
.getArray(CONTENTS)
.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.filter(item -> item.has(TOPIC_CHANNEL_DETAILS_RENDERER))
.findFirst()
.orElse(new JsonObject())
.getObject(TOPIC_CHANNEL_DETAILS_RENDERER)
.getObject("navigationEndpoint")
.getObject(BROWSE_ENDPOINT)
.getString(BROWSE_ID);
if (!isNullOrEmpty(navigationCarouselChannelId)) {
return navigationCarouselChannelId;
}
break;
default:
break;
}
}
final String externalChannelId = jsonResponse.getObject("metadata")
.getObject("channelMetadataRenderer")
.getString("externalChannelId");
if (!isNullOrEmpty(externalChannelId)) {
return externalChannelId;
}
if (!isNullOrEmpty(fallbackChannelId)) {
return fallbackChannelId;
} else {
throw new ParsingException("Could not get channel ID");
}
}
@Nonnull
public static String getChannelName(@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
@Nonnull final Optional<ChannelHeader> channelHeader,
@Nonnull final JsonObject jsonResponse,
@Nullable final JsonObject channelAgeGateRenderer)
throws ParsingException {
if (channelAgeGateRenderer != null) {
final String title = channelAgeGateRenderer.getString("channelTitle");
if (isNullOrEmpty(title)) {
throw new ParsingException("Could not get channel name");
}
return title;
}
final String metadataRendererTitle = jsonResponse.getObject("metadata")
.getObject("channelMetadataRenderer")
.getString(TITLE);
if (!isNullOrEmpty(metadataRendererTitle)) {
return metadataRendererTitle;
}
return channelHeader.map(header -> {
final JsonObject channelJson = header.json;
switch (header.headerType) {
case PAGE:
return channelJson.getObject(CONTENT)
.getObject(PAGE_HEADER_VIEW_MODEL)
.getObject(TITLE)
.getObject("dynamicTextViewModel")
.getObject("text")
.getString(CONTENT, channelJson.getString("pageTitle"));
case CAROUSEL:
case INTERACTIVE_TABBED:
return getTextFromObject(channelJson.getObject(TITLE));
case C4_TABBED:
default:
return channelJson.getString(TITLE);
}
})
// The channel name from a microformatDataRenderer may be different from the one
// displayed, especially for auto-generated channels, depending on the language
// requested for the interface (hl parameter of InnerTube requests' payload)
.or(() -> Optional.ofNullable(jsonResponse.getObject("microformat")
.getObject("microformatDataRenderer")
.getString(TITLE)))
.orElseThrow(() -> new ParsingException("Could not get channel name"));
}
@Nullable
public static JsonObject getChannelAgeGateRenderer(@Nonnull final JsonObject jsonResponse) {
return jsonResponse.getObject(CONTENTS)
.getObject("twoColumnBrowseResultsRenderer")
.getArray("tabs")
.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.flatMap(tab -> tab.getObject(TAB_RENDERER)
.getObject(CONTENT)
.getObject("sectionListRenderer")
.getArray(CONTENTS)
.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast))
.filter(content -> content.has("channelAgeGateRenderer"))
.map(content -> content.getObject("channelAgeGateRenderer"))
.findFirst()
.orElse(null);
}
}