NewPipeExtractor/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java

296 lines
12 KiB
Java
Raw Normal View History

2018-05-08 21:19:03 +02:00
package org.schabi.newpipe.extractor.services.youtube.extractors;
2017-03-01 18:47:52 +01:00
2020-02-23 11:23:33 +01:00
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;
2020-02-23 11:23:33 +01:00
2017-03-01 18:47:52 +01:00
import org.jsoup.nodes.Document;
import org.schabi.newpipe.extractor.StreamingService;
2017-08-11 03:23:09 +02:00
import org.schabi.newpipe.extractor.channel.ChannelExtractor;
import org.schabi.newpipe.extractor.downloader.Downloader;
import org.schabi.newpipe.extractor.downloader.Response;
2017-03-01 18:47:52 +01:00
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
2018-03-01 01:02:43 +01:00
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
2018-02-24 22:20:50 +01:00
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
import org.schabi.newpipe.extractor.utils.Utils;
2017-04-21 23:31:40 +02:00
2017-03-01 18:47:52 +01:00
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
2017-03-01 18:47:52 +01:00
2020-02-23 11:23:33 +01:00
import javax.annotation.Nonnull;
2020-02-17 11:02:43 +01:00
import static org.schabi.newpipe.extractor.utils.Utils.HTTP;
import static org.schabi.newpipe.extractor.utils.Utils.HTTPS;
/*
2017-03-01 18:47:52 +01:00
* Created by Christian Schabesberger on 25.07.16.
*
2018-07-01 16:21:40 +02:00
* Copyright (C) Christian Schabesberger 2018 <chris.schabesberger@mailbox.org>
2017-08-11 03:23:09 +02:00
* YoutubeChannelExtractor.java is part of NewPipe.
2017-03-01 18:47:52 +01:00
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
@SuppressWarnings("WeakerAccess")
2017-08-11 03:23:09 +02:00
public class YoutubeChannelExtractor extends ChannelExtractor {
/*package-private*/ static final String CHANNEL_URL_BASE = "https://www.youtube.com/channel/";
private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000";
private Document doc;
private JsonObject initialData;
2017-09-26 22:46:21 +02:00
public YoutubeChannelExtractor(StreamingService service, ListLinkHandler linkHandler) {
super(service, linkHandler);
2017-08-06 22:20:15 +02:00
}
@Override
2017-11-28 13:37:01 +01:00
public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException {
String channelUrl = super.getUrl() + CHANNEL_URL_PARAMETERS;
final Response response = downloader.get(channelUrl, getExtractorLocalization());
doc = YoutubeParsingHelper.parseAndCheckPage(channelUrl, response);
initialData = YoutubeParsingHelper.getInitialData(response.responseBody());
2020-02-17 09:55:06 +01:00
}
2018-02-26 15:55:27 +01:00
@Override
public String getNextPageUrl() throws ExtractionException {
return getNextPageUrlFrom(getVideoTab().getObject("content").getObject("sectionListRenderer").getArray("continuations"));
}
2017-03-01 18:47:52 +01:00
@Nonnull
@Override
public String getUrl() throws ParsingException {
try {
return CHANNEL_URL_BASE + getId();
} catch (ParsingException e) {
return super.getUrl();
}
}
@Nonnull
@Override
2017-08-11 03:23:09 +02:00
public String getId() throws ParsingException {
try {
return initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getString("channelId");
} catch (Exception e) {
throw new ParsingException("Could not get channel id", e);
2017-03-01 18:47:52 +01:00
}
}
@Nonnull
2017-03-01 18:47:52 +01:00
@Override
2017-08-11 03:23:09 +02:00
public String getName() throws ParsingException {
2017-03-01 18:47:52 +01:00
try {
return initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getString("title");
} catch (Exception e) {
throw new ParsingException("Could not get channel name", e);
2017-03-01 18:47:52 +01:00
}
}
@Override
public String getAvatarUrl() throws ParsingException {
try {
return initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("avatar")
.getArray("thumbnails").getObject(0).getString("url");
} catch (Exception e) {
2017-03-01 18:47:52 +01:00
throw new ParsingException("Could not get avatar", e);
}
}
@Override
public String getBannerUrl() throws ParsingException {
try {
2020-02-24 13:01:13 +01:00
String url = null;
try {
url = initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("banner")
.getArray("thumbnails").getObject(0).getString("url");
2020-02-24 13:01:13 +01:00
} catch (Exception ignored) {}
if (url == null || url.contains("s.ytimg.com") || url.contains("default_banner")) {
2020-02-17 11:02:43 +01:00
return null;
}
// the first characters of the banner URLs are different for each channel and some are not even valid URLs
if (url.startsWith("//")) {
url = url.substring(2);
}
if (url.startsWith(HTTP)) {
url = Utils.replaceHttpWithHttps(url);
} else if (!url.startsWith(HTTPS)) {
url = HTTPS + url;
}
2020-02-17 11:02:43 +01:00
return url;
} catch (Exception e) {
throw new ParsingException("Could not get banner", e);
2017-03-01 18:47:52 +01:00
}
}
2017-08-11 03:23:09 +02:00
@Override
public String getFeedUrl() throws ParsingException {
try {
return YoutubeParsingHelper.getFeedUrlFrom(getId());
2017-08-11 03:23:09 +02:00
} catch (Exception e) {
throw new ParsingException("Could not get feed url", e);
}
}
@Override
public long getSubscriberCount() throws ParsingException {
final JsonObject subscriberInfo = initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("subscriberCountText");
if (subscriberInfo != null) {
try {
return Utils.mixedNumberWordToLong(subscriberInfo.getArray("runs").getObject(0).getString("text"));
} catch (NumberFormatException e) {
throw new ParsingException("Could not get subscriber count", e);
}
2017-08-06 22:20:15 +02:00
} else {
// If the element is null, the channel have the subscriber count disabled
return -1;
2017-03-01 18:47:52 +01:00
}
}
@Override
public String getDescription() throws ParsingException {
try {
return initialData.getObject("metadata").getObject("channelMetadataRenderer").getString("description");
} catch (Exception e) {
2017-08-11 03:23:09 +02:00
throw new ParsingException("Could not get channel description", e);
}
}
2017-11-25 02:03:30 +01:00
@Nonnull
@Override
public InfoItemsPage<StreamInfoItem> getInitialPage() throws ExtractionException {
2018-02-24 22:20:50 +01:00
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
JsonArray videos = getVideoTab().getObject("content").getObject("sectionListRenderer").getArray("contents");
collectStreamsFrom(collector, videos);
return new InfoItemsPage<>(collector, getNextPageUrl());
2017-08-06 22:20:15 +02:00
}
@Override
public InfoItemsPage<StreamInfoItem> getPage(String pageUrl) throws IOException, ExtractionException {
2018-03-01 01:02:43 +01:00
if (pageUrl == null || pageUrl.isEmpty()) {
throw new ExtractionException(new IllegalArgumentException("Page url is empty or null"));
}
2018-03-01 01:02:43 +01:00
// Unfortunately, we have to fetch the page even if we are only getting next streams,
// as they don't deliver enough information on their own (the channel name, for example).
2020-02-24 15:51:08 +01:00
fetchPage();
2018-03-01 01:02:43 +01:00
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
JsonArray ajaxJson;
Map<String, List<String>> headers = new HashMap<>();
headers.put("X-YouTube-Client-Name", Collections.singletonList("1"));
2018-03-01 01:02:43 +01:00
try {
// Use the hardcoded client version first to get JSON with a structure we know
headers.put("X-YouTube-Client-Version",
Collections.singletonList(YoutubeParsingHelper.HARDCODED_CLIENT_VERSION));
final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
if (response.length() < 50) { // ensure to have a valid response
throw new ParsingException("Could not parse json data for next streams");
}
ajaxJson = JsonParser.array().from(response);
} catch (Exception e) {
try {
headers.put("X-YouTube-Client-Version",
Collections.singletonList(YoutubeParsingHelper.getClientVersion(initialData, doc.toString())));
final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
if (response.length() < 50) { // ensure to have a valid response
throw new ParsingException("Could not parse json data for next streams");
}
ajaxJson = JsonParser.array().from(response);
} catch (JsonParserException ignored) {
throw new ParsingException("Could not parse json data for next streams", e);
}
2018-02-26 15:55:27 +01:00
}
2018-03-01 01:02:43 +01:00
JsonObject sectionListContinuation = ajaxJson.getObject(1).getObject("response")
.getObject("continuationContents").getObject("sectionListContinuation");
2018-03-01 01:02:43 +01:00
collectStreamsFrom(collector, sectionListContinuation.getArray("contents"));
2018-02-26 15:55:27 +01:00
return new InfoItemsPage<>(collector, getNextPageUrlFrom(sectionListContinuation.getArray("continuations")));
}
private String getNextPageUrlFrom(JsonArray continuations) {
if (continuations == null) {
return "";
}
JsonObject nextContinuationData = continuations.getObject(0).getObject("nextContinuationData");
String continuation = nextContinuationData.getString("continuation");
String clickTrackingParams = nextContinuationData.getString("clickTrackingParams");
return "https://www.youtube.com/browse_ajax?ctoken=" + continuation + "&continuation=" + continuation
+ "&itct=" + clickTrackingParams;
}
private void collectStreamsFrom(StreamInfoItemsCollector collector, JsonArray videos) throws ParsingException {
collector.reset();
final String uploaderName = getName();
final String uploaderUrl = getUrl();
final TimeAgoParser timeAgoParser = getTimeAgoParser();
2020-02-23 11:23:33 +01:00
for (Object video : videos) {
JsonObject videoInfo = ((JsonObject) video).getObject("itemSectionRenderer")
.getArray("contents").getObject(0);
if (videoInfo.getObject("videoRenderer") != null) {
collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo.getObject("videoRenderer"), timeAgoParser) {
2017-03-01 18:47:52 +01:00
@Override
2020-02-23 11:23:33 +01:00
public String getUploaderName() {
return uploaderName;
2017-03-01 18:47:52 +01:00
}
2017-11-25 02:38:46 +01:00
@Override
2020-02-23 11:23:33 +01:00
public String getUploaderUrl() {
2017-11-25 02:38:46 +01:00
return uploaderUrl;
}
2017-03-01 18:47:52 +01:00
});
}
}
}
private JsonObject getVideoTab() throws ParsingException {
JsonArray tabs = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer")
.getArray("tabs");
JsonObject videoTab = null;
for (Object tab : tabs) {
if (((JsonObject) tab).getObject("tabRenderer") != null) {
if (((JsonObject) tab).getObject("tabRenderer").getString("title").equals("Videos")) {
videoTab = ((JsonObject) tab).getObject("tabRenderer");
break;
}
}
}
if (videoTab == null) {
throw new ParsingException("Could not find Videos tab");
}
return videoTab;
}
2017-03-01 18:47:52 +01:00
}