[YouTube] Support pageHeader on user channels

Also move duplicate strings into constants and add a missing default switch
case.
This commit is contained in:
AudricV 2024-04-04 19:41:30 +02:00 committed by Stypox
parent 2a13b96866
commit 5495e23335
No known key found for this signature in database
GPG Key ID: 4BDF1B40A49FDD23
2 changed files with 118 additions and 156 deletions

View File

@ -23,7 +23,6 @@ package org.schabi.newpipe.extractor.services.youtube.extractors;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeChannelHelper.getChannelResponse;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeChannelHelper.resolveChannelId;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
@ -59,6 +58,18 @@ import javax.annotation.Nullable;
public class YoutubeChannelExtractor extends ChannelExtractor {
// Constants of objects used multiples from channel responses
private static final String IMAGE = "image";
private static final String CONTENTS = "contents";
private static final String CONTENT_PREVIEW_IMAGE_VIEW_MODEL = "contentPreviewImageViewModel";
private static final String PAGE_HEADER_VIEW_MODEL = "pageHeaderViewModel";
private static final String TAB_RENDERER = "tabRenderer";
private static final String CONTENT = "content";
private static final String METADATA = "metadata";
private static final String AVATAR = "avatar";
private static final String THUMBNAILS = "thumbnails";
private static final String SOURCES = "sources";
private JsonObject jsonResponse;
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
@ -95,28 +106,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
jsonResponse = data.jsonResponse;
channelHeader = YoutubeChannelHelper.getChannelHeader(jsonResponse);
channelId = data.channelId;
channelAgeGateRenderer = getChannelAgeGateRenderer();
}
@Nullable
private JsonObject getChannelAgeGateRenderer() {
return jsonResponse.getObject("contents")
.getObject("twoColumnBrowseResultsRenderer")
.getArray("tabs")
.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.flatMap(tab -> tab.getObject("tabRenderer")
.getObject("content")
.getObject("sectionListRenderer")
.getArray("contents")
.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast))
.filter(content -> content.has("channelAgeGateRenderer"))
.map(content -> content.getObject("channelAgeGateRenderer"))
.findFirst()
.orElse(null);
channelAgeGateRenderer = YoutubeChannelHelper.getChannelAgeGateRenderer(jsonResponse);
}
@Nonnull
@ -133,62 +123,15 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override
public String getId() throws ParsingException {
assertPageFetched();
return channelHeader.map(header -> header.json)
.flatMap(header -> Optional.ofNullable(header.getString("channelId"))
.or(() -> Optional.ofNullable(header.getObject("navigationEndpoint")
.getObject("browseEndpoint")
.getString("browseId"))
))
.or(() -> Optional.ofNullable(channelId))
.orElseThrow(() -> new ParsingException("Could not get channel ID"));
return YoutubeChannelHelper.getChannelId(channelHeader, jsonResponse, channelId);
}
@Nonnull
@Override
public String getName() throws ParsingException {
assertPageFetched();
if (channelAgeGateRenderer != null) {
final String title = channelAgeGateRenderer.getString("channelTitle");
if (isNullOrEmpty(title)) {
throw new ParsingException("Could not get channel name");
}
return title;
}
final String metadataRendererTitle = jsonResponse.getObject("metadata")
.getObject("channelMetadataRenderer")
.getString("title");
if (!isNullOrEmpty(metadataRendererTitle)) {
return metadataRendererTitle;
}
return channelHeader.map(header -> {
final JsonObject channelJson = header.json;
switch (header.headerType) {
case PAGE:
return channelJson.getObject("content")
.getObject("pageHeaderViewModel")
.getObject("title")
.getObject("dynamicTextViewModel")
.getObject("text")
.getString("content", channelJson.getString("pageTitle"));
case CAROUSEL:
case INTERACTIVE_TABBED:
return getTextFromObject(channelJson.getObject("title"));
case C4_TABBED:
default:
return channelJson.getString("title");
}
})
// The channel name from a microformatDataRenderer may be different from the one displayed,
// especially for auto-generated channels, depending on the language requested for the
// interface (hl parameter of InnerTube requests' payload)
.or(() -> Optional.ofNullable(jsonResponse.getObject("microformat")
.getObject("microformatDataRenderer")
.getString("title")))
.orElseThrow(() -> new ParsingException("Could not get channel name"));
return YoutubeChannelHelper.getChannelName(
channelHeader, jsonResponse, channelAgeGateRenderer);
}
@Nonnull
@ -196,8 +139,8 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
public List<Image> getAvatars() throws ParsingException {
assertPageFetched();
if (channelAgeGateRenderer != null) {
return Optional.ofNullable(channelAgeGateRenderer.getObject("avatar")
.getArray("thumbnails"))
return Optional.ofNullable(channelAgeGateRenderer.getObject(AVATAR)
.getArray(THUMBNAILS))
.map(YoutubeParsingHelper::getImagesFromThumbnailsArray)
.orElseThrow(() -> new ParsingException("Could not get avatars"));
}
@ -205,22 +148,31 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
return channelHeader.map(header -> {
switch (header.headerType) {
case PAGE:
return header.json.getObject("content")
.getObject("pageHeaderViewModel")
.getObject("image")
.getObject("contentPreviewImageViewModel")
.getObject("image")
.getArray("sources");
final JsonObject imageObj = header.json.getObject(CONTENT)
.getObject(PAGE_HEADER_VIEW_MODEL)
.getObject(IMAGE);
if (imageObj.has(CONTENT_PREVIEW_IMAGE_VIEW_MODEL)) {
return imageObj.getObject(CONTENT_PREVIEW_IMAGE_VIEW_MODEL)
.getObject(IMAGE)
.getArray(SOURCES);
}
if (imageObj.has("decoratedAvatarViewModel")) {
return imageObj.getObject(AVATAR)
.getObject("avatarViewModel")
.getObject(IMAGE)
.getArray(SOURCES);
}
// Return an empty avatar array as a fallback
return new JsonArray();
case INTERACTIVE_TABBED:
return header.json.getObject("boxArt")
.getArray("thumbnails");
.getArray(THUMBNAILS);
case C4_TABBED:
case CAROUSEL:
default:
return header.json.getObject("avatar")
.getArray("thumbnails");
return header.json.getObject(AVATAR)
.getArray(THUMBNAILS);
}
})
.map(YoutubeParsingHelper::getImagesFromThumbnailsArray)
@ -235,10 +187,24 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
return List.of();
}
// No banner is available on pageHeaderRenderer headers
return channelHeader.filter(header -> header.headerType != HeaderType.PAGE)
.map(header -> header.json.getObject("banner")
.getArray("thumbnails"))
return channelHeader.map(header -> {
if (header.headerType == HeaderType.PAGE) {
final JsonObject pageHeaderViewModel = header.json.getObject(CONTENT)
.getObject(PAGE_HEADER_VIEW_MODEL);
if (pageHeaderViewModel.has("banner")) {
return pageHeaderViewModel.getObject("imageBannerViewModel")
.getObject(IMAGE)
.getArray(SOURCES);
}
// No banner is available (this should happen on pageHeaderRenderers of
// system channels), use an empty JsonArray instead
return new JsonArray();
}
return header.json
.getObject("banner")
.getArray(THUMBNAILS);
})
.map(YoutubeParsingHelper::getImagesFromThumbnailsArray)
.orElse(List.of());
}
@ -264,14 +230,16 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
if (channelHeader.isPresent()) {
final ChannelHeader header = channelHeader.get();
if (header.headerType == HeaderType.INTERACTIVE_TABBED
|| header.headerType == HeaderType.PAGE) {
// No subscriber count is available on interactiveTabbedHeaderRenderer and
// pageHeaderRenderer headers
if (header.headerType == HeaderType.INTERACTIVE_TABBED) {
// No subscriber count is available on interactiveTabbedHeaderRenderer header
return UNKNOWN_SUBSCRIBER_COUNT;
}
final JsonObject headerJson = header.json;
if (header.headerType == HeaderType.PAGE) {
return getSubscriberCountFromPageChannelHeader(headerJson);
}
JsonObject textObject = null;
if (headerJson.has("subscriberCountText")) {
@ -292,6 +260,51 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
return UNKNOWN_SUBSCRIBER_COUNT;
}
private long getSubscriberCountFromPageChannelHeader(@Nonnull final JsonObject headerJson)
throws ParsingException {
final JsonObject metadataObject = headerJson.getObject(CONTENT)
.getObject(PAGE_HEADER_VIEW_MODEL)
.getObject(METADATA);
if (metadataObject.has("contentMetadataViewModel")) {
final JsonArray metadataPart = metadataObject.getObject("contentMetadataViewModel")
.getArray("metadataRows")
.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.map(metadataRow -> metadataRow.getArray("metadataParts"))
/*
Find metadata parts which have two elements: channel handle and subscriber
count.
On autogenerated music channels, the subscriber count is not shown with this
header.
Use the first metadata parts object found.
*/
.filter(metadataParts -> metadataParts.size() == 2)
.findFirst()
.orElse(null);
if (metadataPart == null) {
// As the parsing of the metadata parts object needed to get the subscriber count
// is fragile, return UNKNOWN_SUBSCRIBER_COUNT when it cannot be got
return UNKNOWN_SUBSCRIBER_COUNT;
}
try {
// The subscriber count is at the same position for all languages as of 02/03/2024
return Utils.mixedNumberWordToLong(metadataPart.getObject(0)
.getObject("text")
.getString(CONTENT));
} catch (final NumberFormatException e) {
throw new ParsingException("Could not get subscriber count", e);
}
}
// If the channel header has no contentMetadataViewModel (which is the case for system
// channels using this header), return UNKNOWN_SUBSCRIBER_COUNT
return UNKNOWN_SUBSCRIBER_COUNT;
}
@Override
public String getDescription() throws ParsingException {
assertPageFetched();
@ -302,12 +315,6 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
try {
if (channelHeader.isPresent()) {
final ChannelHeader header = channelHeader.get();
if (header.headerType == HeaderType.PAGE) {
// A pageHeaderRenderer doesn't contain a description
return null;
}
if (header.headerType == HeaderType.INTERACTIVE_TABBED) {
/*
In an interactiveTabbedHeaderRenderer, the real description, is only available
@ -322,7 +329,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
}
// The description is cut and the original one can be only accessed from the About tab
return jsonResponse.getObject("metadata")
return jsonResponse.getObject("title")
.getObject("channelMetadataRenderer")
.getString("description");
} catch (final Exception e) {
@ -371,7 +378,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Nonnull
private List<ListLinkHandler> getTabsForNonAgeRestrictedChannels() throws ParsingException {
final JsonArray responseTabs = jsonResponse.getObject("contents")
final JsonArray responseTabs = jsonResponse.getObject(CONTENTS)
.getObject("twoColumnBrowseResultsRenderer")
.getArray("tabs");
@ -392,8 +399,8 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
responseTabs.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.filter(tab -> tab.has("tabRenderer"))
.map(tab -> tab.getObject("tabRenderer"))
.filter(tab -> tab.has(TAB_RENDERER))
.map(tab -> tab.getObject(TAB_RENDERER))
.forEach(tabRenderer -> {
final String tabUrl = tabRenderer.getObject("endpoint")
.getObject("commandMetadata")
@ -432,6 +439,9 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
case "playlists":
addNonVideosTab.accept(ChannelTabs.PLAYLISTS);
break;
default:
// Unsupported channel tab, ignore it
break;
}
}
});

View File

@ -29,7 +29,6 @@ import static org.schabi.newpipe.extractor.services.youtube.YoutubeChannelHelper
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.DISABLE_PRETTY_PRINT_PARAMETER;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.YOUTUBEI_V1_URL;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
@ -120,60 +119,13 @@ public class YoutubeChannelTabExtractor extends ChannelTabExtractor {
@Nonnull
@Override
public String getId() throws ParsingException {
final String id = jsonResponse.getObject("header")
.getObject("c4TabbedHeaderRenderer")
.getString("channelId", "");
if (!id.isEmpty()) {
return id;
}
final Optional<String> carouselHeaderId = jsonResponse.getObject("header")
.getObject("carouselHeaderRenderer")
.getArray("contents")
.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.filter(item -> item.has("topicChannelDetailsRenderer"))
.findFirst()
.flatMap(item ->
Optional.ofNullable(item.getObject("topicChannelDetailsRenderer")
.getObject("navigationEndpoint")
.getObject("browseEndpoint")
.getString("browseId")));
if (carouselHeaderId.isPresent()) {
return carouselHeaderId.get();
}
if (!isNullOrEmpty(channelId)) {
return channelId;
} else {
throw new ParsingException("Could not get channel ID");
}
return YoutubeChannelHelper.getChannelId(channelHeader, jsonResponse, channelId);
}
protected String getChannelName() {
final String metadataName = jsonResponse.getObject("metadata")
.getObject("channelMetadataRenderer")
.getString("title");
if (!isNullOrEmpty(metadataName)) {
return metadataName;
}
return YoutubeChannelHelper.getChannelHeader(jsonResponse)
.map(header -> {
final Object title = header.json.get("title");
if (title instanceof String) {
return (String) title;
} else if (title instanceof JsonObject) {
final String headerName = getTextFromObject((JsonObject) title);
if (!isNullOrEmpty(headerName)) {
return headerName;
}
}
return "";
})
.orElse("");
protected String getChannelName() throws ParsingException {
return YoutubeChannelHelper.getChannelName(
channelHeader, jsonResponse,
YoutubeChannelHelper.getChannelAgeGateRenderer(jsonResponse));
}
@Nonnull