NewPipeExtractor/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java

483 lines
19 KiB
Java

package org.schabi.newpipe.extractor.services.youtube.extractors;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.DISABLE_PRETTY_PRINT_PARAMETER;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.YOUTUBEI_V1_URL;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.addClientInfoHeaders;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.fixThumbnailUrl;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getKey;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getValidJsonResponseBody;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
import static org.schabi.newpipe.extractor.utils.Utils.EMPTY_STRING;
import static org.schabi.newpipe.extractor.utils.Utils.UTF_8;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonWriter;
import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.channel.ChannelExtractor;
import org.schabi.newpipe.extractor.downloader.Downloader;
import org.schabi.newpipe.extractor.downloader.Response;
import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
import org.schabi.newpipe.extractor.exceptions.ContentNotSupportedException;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
import org.schabi.newpipe.extractor.utils.JsonUtils;
import org.schabi.newpipe.extractor.utils.Utils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
/*
* Created by Christian Schabesberger on 25.07.16.
*
* Copyright (C) Christian Schabesberger 2018 <chris.schabesberger@mailbox.org>
* YoutubeChannelExtractor.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
public class YoutubeChannelExtractor extends ChannelExtractor {
private JsonObject initialData;
private JsonObject videoTab;
/**
* Some channels have response redirects and the only way to reliably get the id is by saving it
* <p>
* "Movies & Shows":
* <pre>
* UCuJcl0Ju-gPDoksRjK1ya-w ┐
* UChBfWrfBXL9wS6tQtgjt_OQ ├ UClgRkhTL3_hImCAmdLfDE4g
* UCok7UTQQEP1Rsctxiv3gwSQ ┘
* </pre>
*/
private String redirectedChannelId;
public YoutubeChannelExtractor(final StreamingService service,
final ListLinkHandler linkHandler) {
super(service, linkHandler);
}
@Override
public void onFetchPage(@Nonnull final Downloader downloader) throws IOException,
ExtractionException {
final String channelPath = super.getId();
final String[] channelId = channelPath.split("/");
String id = "";
// If the url is an URL which is not a /channel URL, we need to use the
// navigation/resolve_url endpoint of the InnerTube API to get the channel id. Otherwise,
// we couldn't get information about the channel associated with this URL, if there is one.
if (!channelId[0].equals("channel")) {
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(
getExtractorLocalization(), getExtractorContentCountry())
.value("url", "https://www.youtube.com/" + channelPath)
.done())
.getBytes(UTF_8);
final JsonObject jsonResponse = getJsonPostResponse("navigation/resolve_url",
body, getExtractorLocalization());
if (!isNullOrEmpty(jsonResponse.getObject("error"))) {
final JsonObject errorJsonObject = jsonResponse.getObject("error");
final int errorCode = errorJsonObject.getInt("code");
if (errorCode == 404) {
throw new ContentNotAvailableException("This channel doesn't exist.");
} else {
throw new ContentNotAvailableException("Got error:\""
+ errorJsonObject.getString("status") + "\": "
+ errorJsonObject.getString("message"));
}
}
final JsonObject endpoint = jsonResponse.getObject("endpoint");
final String webPageType = endpoint.getObject("commandMetadata")
.getObject("webCommandMetadata")
.getString("webPageType", EMPTY_STRING);
final JsonObject browseEndpoint = endpoint.getObject("browseEndpoint");
final String browseId = browseEndpoint.getString("browseId", EMPTY_STRING);
if (webPageType.equalsIgnoreCase("WEB_PAGE_TYPE_BROWSE")
|| webPageType.equalsIgnoreCase("WEB_PAGE_TYPE_CHANNEL")
&& !browseId.isEmpty()) {
if (!browseId.startsWith("UC")) {
throw new ExtractionException("Redirected id is not pointing to a channel");
}
id = browseId;
redirectedChannelId = browseId;
}
} else {
id = channelId[1];
}
JsonObject ajaxJson = null;
int level = 0;
while (level < 3) {
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(
getExtractorLocalization(), getExtractorContentCountry())
.value("browseId", id)
.value("params", "EgZ2aWRlb3M%3D") // Equal to videos
.done())
.getBytes(UTF_8);
final JsonObject jsonResponse = getJsonPostResponse("browse", body,
getExtractorLocalization());
if (!isNullOrEmpty(jsonResponse.getObject("error"))) {
final JsonObject errorJsonObject = jsonResponse.getObject("error");
final int errorCode = errorJsonObject.getInt("code");
if (errorCode == 404) {
throw new ContentNotAvailableException("This channel doesn't exist.");
} else {
throw new ContentNotAvailableException("Got error:\""
+ errorJsonObject.getString("status") + "\": "
+ errorJsonObject.getString("message"));
}
}
final JsonObject endpoint = jsonResponse.getArray("onResponseReceivedActions")
.getObject(0)
.getObject("navigateAction")
.getObject("endpoint");
final String webPageType = endpoint.getObject("commandMetadata")
.getObject("webCommandMetadata")
.getString("webPageType", EMPTY_STRING);
final String browseId = endpoint.getObject("browseEndpoint").getString("browseId",
EMPTY_STRING);
if (webPageType.equalsIgnoreCase("WEB_PAGE_TYPE_BROWSE")
|| webPageType.equalsIgnoreCase("WEB_PAGE_TYPE_CHANNEL")
&& !browseId.isEmpty()) {
if (!browseId.startsWith("UC")) {
throw new ExtractionException("Redirected id is not pointing to a channel");
}
id = browseId;
redirectedChannelId = browseId;
level++;
} else {
ajaxJson = jsonResponse;
break;
}
}
if (ajaxJson == null) {
throw new ExtractionException("Could not fetch initial JSON data");
}
initialData = ajaxJson;
YoutubeParsingHelper.defaultAlertsCheck(initialData);
}
@Nonnull
@Override
public String getUrl() throws ParsingException {
try {
return YoutubeChannelLinkHandlerFactory.getInstance().getUrl("channel/" + getId());
} catch (final ParsingException e) {
return super.getUrl();
}
}
@Nonnull
@Override
public String getId() throws ParsingException {
final String channelId = initialData.getObject("header")
.getObject("c4TabbedHeaderRenderer")
.getString("channelId", EMPTY_STRING);
if (!channelId.isEmpty()) {
return channelId;
} else if (!isNullOrEmpty(redirectedChannelId)) {
return redirectedChannelId;
} else {
throw new ParsingException("Could not get channel id");
}
}
@Nonnull
@Override
public String getName() throws ParsingException {
try {
return initialData.getObject("header").getObject("c4TabbedHeaderRenderer")
.getString("title");
} catch (final Exception e) {
throw new ParsingException("Could not get channel name", e);
}
}
@Override
public String getAvatarUrl() throws ParsingException {
try {
final String url = initialData.getObject("header")
.getObject("c4TabbedHeaderRenderer").getObject("avatar").getArray("thumbnails")
.getObject(0).getString("url");
return fixThumbnailUrl(url);
} catch (final Exception e) {
throw new ParsingException("Could not get avatar", e);
}
}
@Override
public String getBannerUrl() throws ParsingException {
try {
final String url = initialData.getObject("header")
.getObject("c4TabbedHeaderRenderer").getObject("banner").getArray("thumbnails")
.getObject(0).getString("url");
if (url == null || url.contains("s.ytimg.com") || url.contains("default_banner")) {
return null;
}
return fixThumbnailUrl(url);
} catch (final Exception e) {
throw new ParsingException("Could not get banner", e);
}
}
@Override
public String getFeedUrl() throws ParsingException {
try {
return YoutubeParsingHelper.getFeedUrlFrom(getId());
} catch (final Exception e) {
throw new ParsingException("Could not get feed url", e);
}
}
@Override
public long getSubscriberCount() throws ParsingException {
final JsonObject c4TabbedHeaderRenderer = initialData.getObject("header")
.getObject("c4TabbedHeaderRenderer");
if (!c4TabbedHeaderRenderer.has("subscriberCountText")) {
return UNKNOWN_SUBSCRIBER_COUNT;
}
try {
return Utils.mixedNumberWordToLong(getTextFromObject(c4TabbedHeaderRenderer
.getObject("subscriberCountText")));
} catch (final NumberFormatException e) {
throw new ParsingException("Could not get subscriber count", e);
}
}
@Override
public String getDescription() throws ParsingException {
try {
return initialData.getObject("metadata").getObject("channelMetadataRenderer")
.getString("description");
} catch (final Exception e) {
throw new ParsingException("Could not get channel description", e);
}
}
@Override
public String getParentChannelName() {
return "";
}
@Override
public String getParentChannelUrl() {
return "";
}
@Override
public String getParentChannelAvatarUrl() {
return "";
}
@Override
public boolean isVerified() throws ParsingException {
final JsonArray badges = initialData.getObject("header")
.getObject("c4TabbedHeaderRenderer")
.getArray("badges");
return YoutubeParsingHelper.isVerified(badges);
}
@Nonnull
@Override
public InfoItemsPage<StreamInfoItem> getInitialPage() throws IOException, ExtractionException {
final StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
Page nextPage = null;
if (getVideoTab() != null) {
final JsonObject gridRenderer = getVideoTab().getObject("content")
.getObject("sectionListRenderer")
.getArray("contents").getObject(0).getObject("itemSectionRenderer")
.getArray("contents").getObject(0).getObject("gridRenderer");
final List<String> channelIds = new ArrayList<>();
channelIds.add(getName());
channelIds.add(getUrl());
final JsonObject continuation = collectStreamsFrom(collector, gridRenderer
.getArray("items"), channelIds);
nextPage = getNextPageFrom(continuation, channelIds);
}
return new InfoItemsPage<>(collector, nextPage);
}
@Override
public InfoItemsPage<StreamInfoItem> getPage(final Page page) throws IOException,
ExtractionException {
if (page == null || isNullOrEmpty(page.getUrl())) {
throw new IllegalArgumentException("Page doesn't contain an URL");
}
final List<String> channelIds = page.getIds();
final StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
final Map<String, List<String>> headers = new HashMap<>();
addClientInfoHeaders(headers);
final Response response = getDownloader().post(page.getUrl(), null, page.getBody(),
getExtractorLocalization());
final JsonObject ajaxJson = JsonUtils.toJsonObject(getValidJsonResponseBody(response));
final JsonObject sectionListContinuation = ajaxJson.getArray("onResponseReceivedActions")
.getObject(0)
.getObject("appendContinuationItemsAction");
final JsonObject continuation = collectStreamsFrom(collector, sectionListContinuation
.getArray("continuationItems"), channelIds);
return new InfoItemsPage<>(collector, getNextPageFrom(continuation, channelIds));
}
@Nullable
private Page getNextPageFrom(final JsonObject continuations,
final List<String> channelIds) throws IOException,
ExtractionException {
if (isNullOrEmpty(continuations)) {
return null;
}
final JsonObject continuationEndpoint = continuations.getObject("continuationEndpoint");
final String continuation = continuationEndpoint.getObject("continuationCommand")
.getString("token");
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(getExtractorLocalization(),
getExtractorContentCountry())
.value("continuation", continuation)
.done())
.getBytes(UTF_8);
return new Page(YOUTUBEI_V1_URL + "browse?key=" + getKey()
+ DISABLE_PRETTY_PRINT_PARAMETER, null, channelIds, null, body);
}
/**
* Collect streams from an array of items
*
* @param collector the collector where videos will be committed
* @param videos the array to get videos from
* @param channelIds the ids of the channel, which are its name and its URL
* @return the continuation object
*/
private JsonObject collectStreamsFrom(@Nonnull final StreamInfoItemsCollector collector,
@Nonnull final JsonArray videos,
@Nonnull final List<String> channelIds) {
collector.reset();
final String uploaderName = channelIds.get(0);
final String uploaderUrl = channelIds.get(1);
final TimeAgoParser timeAgoParser = getTimeAgoParser();
JsonObject continuation = null;
for (final Object object : videos) {
final JsonObject video = (JsonObject) object;
if (video.has("gridVideoRenderer")) {
collector.commit(new YoutubeStreamInfoItemExtractor(
video.getObject("gridVideoRenderer"), timeAgoParser) {
@Override
public String getUploaderName() {
return uploaderName;
}
@Override
public String getUploaderUrl() {
return uploaderUrl;
}
});
} else if (video.has("continuationItemRenderer")) {
continuation = video.getObject("continuationItemRenderer");
}
}
return continuation;
}
@Nullable
private JsonObject getVideoTab() throws ParsingException {
if (this.videoTab != null) {
return this.videoTab;
}
final JsonArray tabs = initialData.getObject("contents")
.getObject("twoColumnBrowseResultsRenderer")
.getArray("tabs");
JsonObject foundVideoTab = null;
for (final Object tab : tabs) {
if (((JsonObject) tab).has("tabRenderer")) {
if (((JsonObject) tab).getObject("tabRenderer").getString("title",
EMPTY_STRING).equals("Videos")) {
foundVideoTab = ((JsonObject) tab).getObject("tabRenderer");
break;
}
}
}
if (foundVideoTab == null) {
throw new ContentNotSupportedException("This channel has no Videos tab");
}
final String messageRendererText = getTextFromObject(foundVideoTab.getObject("content")
.getObject("sectionListRenderer").getArray("contents").getObject(0)
.getObject("itemSectionRenderer").getArray("contents").getObject(0)
.getObject("messageRenderer").getObject("text"));
if (messageRendererText != null
&& messageRendererText.equals("This channel has no videos.")) {
return null;
}
this.videoTab = foundVideoTab;
return foundVideoTab;
}
}