2018-05-08 21:19:03 +02:00
|
|
|
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
2017-03-01 18:47:52 +01:00
|
|
|
|
|
|
|
|
2017-08-16 04:40:03 +02:00
|
|
|
import com.grack.nanojson.JsonObject;
|
|
|
|
import com.grack.nanojson.JsonParser;
|
|
|
|
import com.grack.nanojson.JsonParserException;
|
2017-03-01 18:47:52 +01:00
|
|
|
import org.jsoup.Jsoup;
|
|
|
|
import org.jsoup.nodes.Document;
|
|
|
|
import org.jsoup.nodes.Element;
|
2019-10-29 06:00:29 +01:00
|
|
|
import org.schabi.newpipe.extractor.DownloadResponse;
|
2018-09-04 03:37:31 +02:00
|
|
|
import org.schabi.newpipe.extractor.Downloader;
|
|
|
|
import org.schabi.newpipe.extractor.NewPipe;
|
|
|
|
import org.schabi.newpipe.extractor.StreamingService;
|
2017-08-11 03:23:09 +02:00
|
|
|
import org.schabi.newpipe.extractor.channel.ChannelExtractor;
|
2017-03-01 18:47:52 +01:00
|
|
|
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
|
|
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
2018-09-04 03:37:31 +02:00
|
|
|
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
2019-10-29 06:00:29 +01:00
|
|
|
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
2018-03-01 01:02:43 +01:00
|
|
|
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
2018-02-24 22:20:50 +01:00
|
|
|
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
|
2019-10-02 07:02:01 +02:00
|
|
|
import org.schabi.newpipe.extractor.stream.TimeAgoParser;
|
2018-04-08 15:58:42 +02:00
|
|
|
import org.schabi.newpipe.extractor.utils.DonationLinkHelper;
|
2018-09-15 21:47:53 +02:00
|
|
|
import org.schabi.newpipe.extractor.utils.Localization;
|
2017-06-29 20:12:55 +02:00
|
|
|
import org.schabi.newpipe.extractor.utils.Parser;
|
2017-07-11 05:08:03 +02:00
|
|
|
import org.schabi.newpipe.extractor.utils.Utils;
|
2017-04-21 23:31:40 +02:00
|
|
|
|
2017-11-25 01:10:04 +01:00
|
|
|
import javax.annotation.Nonnull;
|
2017-03-01 18:47:52 +01:00
|
|
|
import java.io.IOException;
|
2018-04-07 16:45:07 +02:00
|
|
|
import java.util.ArrayList;
|
2017-03-01 18:47:52 +01:00
|
|
|
|
2017-06-29 20:12:55 +02:00
|
|
|
/*
|
2017-03-01 18:47:52 +01:00
|
|
|
* Created by Christian Schabesberger on 25.07.16.
|
|
|
|
*
|
2018-07-01 16:21:40 +02:00
|
|
|
* Copyright (C) Christian Schabesberger 2018 <chris.schabesberger@mailbox.org>
|
2017-08-11 03:23:09 +02:00
|
|
|
* YoutubeChannelExtractor.java is part of NewPipe.
|
2017-03-01 18:47:52 +01:00
|
|
|
*
|
|
|
|
* NewPipe is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* NewPipe is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
2017-06-29 20:12:55 +02:00
|
|
|
@SuppressWarnings("WeakerAccess")
|
2017-08-11 03:23:09 +02:00
|
|
|
public class YoutubeChannelExtractor extends ChannelExtractor {
|
2019-08-12 11:57:29 +02:00
|
|
|
/*package-private*/ static final String CHANNEL_URL_BASE = "https://www.youtube.com/channel/";
|
2017-06-29 20:12:55 +02:00
|
|
|
private static final String CHANNEL_FEED_BASE = "https://www.youtube.com/feeds/videos.xml?channel_id=";
|
2019-09-16 23:15:54 +02:00
|
|
|
private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000&gl=US&hl=en";
|
2017-06-29 20:12:55 +02:00
|
|
|
|
2019-10-02 07:02:01 +02:00
|
|
|
private final TimeAgoParser timeAgoParser = getService().getTimeAgoParser();
|
|
|
|
|
2017-06-29 20:12:55 +02:00
|
|
|
private Document doc;
|
2017-09-26 22:46:21 +02:00
|
|
|
|
2018-09-15 21:47:53 +02:00
|
|
|
public YoutubeChannelExtractor(StreamingService service, ListLinkHandler linkHandler, Localization localization) {
|
|
|
|
super(service, linkHandler, localization);
|
2017-08-06 22:20:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2017-11-28 13:37:01 +01:00
|
|
|
public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException {
|
2018-05-06 14:08:50 +02:00
|
|
|
String channelUrl = super.getUrl() + CHANNEL_URL_PARAMETERS;
|
2019-10-29 06:00:29 +01:00
|
|
|
final DownloadResponse response = downloader.get(channelUrl);
|
|
|
|
doc = YoutubeParsingHelper.parseAndCheckPage(channelUrl, response);
|
2018-02-26 15:55:27 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public String getNextPageUrl() throws ExtractionException {
|
|
|
|
return getNextPageUrlFrom(doc);
|
2017-06-29 20:12:55 +02:00
|
|
|
}
|
2017-03-01 18:47:52 +01:00
|
|
|
|
2017-11-25 01:10:04 +01:00
|
|
|
@Nonnull
|
2017-08-16 04:56:10 +02:00
|
|
|
@Override
|
2018-05-06 14:08:50 +02:00
|
|
|
public String getUrl() throws ParsingException {
|
2017-08-16 04:56:10 +02:00
|
|
|
try {
|
2019-08-12 11:57:29 +02:00
|
|
|
return CHANNEL_URL_BASE + getId();
|
2017-08-16 04:56:10 +02:00
|
|
|
} catch (ParsingException e) {
|
2018-05-06 14:08:50 +02:00
|
|
|
return super.getUrl();
|
2017-08-16 04:56:10 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-11-25 01:10:04 +01:00
|
|
|
@Nonnull
|
2017-06-29 20:12:55 +02:00
|
|
|
@Override
|
2017-08-11 03:23:09 +02:00
|
|
|
public String getId() throws ParsingException {
|
2017-06-29 20:12:55 +02:00
|
|
|
try {
|
2019-08-27 13:15:06 +02:00
|
|
|
return doc.select("meta[itemprop=\"channelId\"]").first().attr("content");
|
|
|
|
} catch (Exception ignored) {}
|
|
|
|
|
|
|
|
// fallback method; does not work with channels that have no "Subscribe" button (e.g. EminemVEVO)
|
|
|
|
try {
|
2017-08-16 04:56:10 +02:00
|
|
|
Element element = doc.getElementsByClass("yt-uix-subscription-button").first();
|
|
|
|
if (element == null) element = doc.getElementsByClass("yt-uix-subscription-preferences-button").first();
|
|
|
|
|
|
|
|
return element.attr("data-channel-external-id");
|
2017-06-29 20:12:55 +02:00
|
|
|
} catch (Exception e) {
|
2017-08-16 04:56:10 +02:00
|
|
|
throw new ParsingException("Could not get channel id", e);
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-11-25 01:10:04 +01:00
|
|
|
@Nonnull
|
2017-03-01 18:47:52 +01:00
|
|
|
@Override
|
2017-08-11 03:23:09 +02:00
|
|
|
public String getName() throws ParsingException {
|
2017-03-01 18:47:52 +01:00
|
|
|
try {
|
2017-09-11 15:19:16 +02:00
|
|
|
return doc.select("meta[property=\"og:title\"]").first().attr("content");
|
2017-06-29 20:12:55 +02:00
|
|
|
} catch (Exception e) {
|
2017-09-09 21:42:42 +02:00
|
|
|
throw new ParsingException("Could not get channel name", e);
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public String getAvatarUrl() throws ParsingException {
|
|
|
|
try {
|
2017-09-09 22:11:16 +02:00
|
|
|
return doc.select("img[class=\"channel-header-profile-image\"]").first().attr("abs:src");
|
2017-06-29 20:12:55 +02:00
|
|
|
} catch (Exception e) {
|
2017-03-01 18:47:52 +01:00
|
|
|
throw new ParsingException("Could not get avatar", e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public String getBannerUrl() throws ParsingException {
|
|
|
|
try {
|
2017-08-06 22:20:15 +02:00
|
|
|
Element el = doc.select("div[id=\"gh-banner\"]").first().select("style").first();
|
|
|
|
String cssContent = el.html();
|
|
|
|
String url = "https:" + Parser.matchGroup1("url\\(([^)]+)\\)", cssContent);
|
2017-06-29 20:12:55 +02:00
|
|
|
|
2017-08-06 22:20:15 +02:00
|
|
|
return url.contains("s.ytimg.com") || url.contains("default_banner") ? null : url;
|
2017-06-29 20:12:55 +02:00
|
|
|
} catch (Exception e) {
|
2017-03-01 18:47:52 +01:00
|
|
|
throw new ParsingException("Could not get Banner", e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-08-11 03:23:09 +02:00
|
|
|
@Override
|
|
|
|
public String getFeedUrl() throws ParsingException {
|
|
|
|
try {
|
2017-08-16 04:56:10 +02:00
|
|
|
return CHANNEL_FEED_BASE + getId();
|
2017-08-11 03:23:09 +02:00
|
|
|
} catch (Exception e) {
|
|
|
|
throw new ParsingException("Could not get feed url", e);
|
|
|
|
}
|
|
|
|
}
|
2017-06-29 20:12:55 +02:00
|
|
|
|
|
|
|
@Override
|
|
|
|
public long getSubscriberCount() throws ParsingException {
|
2019-09-25 08:56:39 +02:00
|
|
|
|
|
|
|
final Element el = doc.select("span[class*=\"yt-subscription-button-subscriber-count\"]").first();
|
2017-08-06 22:20:15 +02:00
|
|
|
if (el != null) {
|
2019-09-25 08:56:39 +02:00
|
|
|
String elTitle = el.attr("title");
|
2018-09-04 03:37:31 +02:00
|
|
|
try {
|
2019-09-25 08:56:39 +02:00
|
|
|
return Utils.mixedNumberWordToLong(elTitle);
|
2018-09-04 03:37:31 +02:00
|
|
|
} catch (NumberFormatException e) {
|
|
|
|
throw new ParsingException("Could not get subscriber count", e);
|
|
|
|
}
|
2017-08-06 22:20:15 +02:00
|
|
|
} else {
|
2018-09-04 03:37:31 +02:00
|
|
|
// If the element is null, the channel have the subscriber count disabled
|
|
|
|
return -1;
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
2017-06-29 20:12:55 +02:00
|
|
|
}
|
|
|
|
|
2017-08-07 19:00:36 +02:00
|
|
|
@Override
|
|
|
|
public String getDescription() throws ParsingException {
|
|
|
|
try {
|
|
|
|
return doc.select("meta[name=\"description\"]").first().attr("content");
|
2017-06-29 20:12:55 +02:00
|
|
|
} catch (Exception e) {
|
2017-08-11 03:23:09 +02:00
|
|
|
throw new ParsingException("Could not get channel description", e);
|
2017-06-29 20:12:55 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-11-25 02:03:30 +01:00
|
|
|
@Nonnull
|
2017-06-29 20:12:55 +02:00
|
|
|
@Override
|
2018-03-11 21:54:41 +01:00
|
|
|
public InfoItemsPage<StreamInfoItem> getInitialPage() throws ExtractionException {
|
2018-02-24 22:20:50 +01:00
|
|
|
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
|
2017-08-06 22:20:15 +02:00
|
|
|
Element ul = doc.select("ul[id=\"browse-items-primary\"]").first();
|
|
|
|
collectStreamsFrom(collector, ul);
|
2018-03-11 21:54:41 +01:00
|
|
|
return new InfoItemsPage<>(collector, getNextPageUrl());
|
2017-08-06 22:20:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2018-03-11 21:54:41 +01:00
|
|
|
public InfoItemsPage<StreamInfoItem> getPage(String pageUrl) throws IOException, ExtractionException {
|
2018-03-01 01:02:43 +01:00
|
|
|
if (pageUrl == null || pageUrl.isEmpty()) {
|
|
|
|
throw new ExtractionException(new IllegalArgumentException("Page url is empty or null"));
|
|
|
|
}
|
2017-06-29 20:12:55 +02:00
|
|
|
|
2018-03-01 01:02:43 +01:00
|
|
|
// Unfortunately, we have to fetch the page even if we are only getting next streams,
|
|
|
|
// as they don't deliver enough information on their own (the channel name, for example).
|
|
|
|
fetchPage();
|
2017-06-29 20:12:55 +02:00
|
|
|
|
2018-03-01 01:02:43 +01:00
|
|
|
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
|
|
|
|
JsonObject ajaxJson;
|
|
|
|
try {
|
|
|
|
ajaxJson = JsonParser.object().from(NewPipe.getDownloader().download(pageUrl));
|
2018-02-26 15:55:27 +01:00
|
|
|
} catch (JsonParserException pe) {
|
|
|
|
throw new ParsingException("Could not parse json data for next streams", pe);
|
|
|
|
}
|
2018-03-01 01:02:43 +01:00
|
|
|
|
2018-03-04 21:30:31 +01:00
|
|
|
final Document ajaxHtml = Jsoup.parse(ajaxJson.getString("content_html"), pageUrl);
|
2018-03-01 01:02:43 +01:00
|
|
|
collectStreamsFrom(collector, ajaxHtml.select("body").first());
|
|
|
|
|
2018-03-11 21:54:41 +01:00
|
|
|
return new InfoItemsPage<>(collector, getNextPageUrlFromAjaxPage(ajaxJson, pageUrl));
|
2018-02-26 15:55:27 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
private String getNextPageUrlFromAjaxPage(final JsonObject ajaxJson, final String pageUrl)
|
|
|
|
throws ParsingException {
|
|
|
|
String loadMoreHtmlDataRaw = ajaxJson.getString("load_more_widget_html");
|
|
|
|
if (!loadMoreHtmlDataRaw.isEmpty()) {
|
|
|
|
return getNextPageUrlFrom(Jsoup.parse(loadMoreHtmlDataRaw, pageUrl));
|
|
|
|
} else {
|
|
|
|
return "";
|
2017-06-29 20:12:55 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-02-24 22:20:50 +01:00
|
|
|
private String getNextPageUrlFrom(Document d) throws ParsingException {
|
2017-06-29 20:12:55 +02:00
|
|
|
try {
|
|
|
|
Element button = d.select("button[class*=\"yt-uix-load-more\"]").first();
|
|
|
|
if (button != null) {
|
|
|
|
return button.attr("abs:data-uix-load-more-href");
|
|
|
|
} else {
|
|
|
|
// Sometimes channels are simply so small, they don't have a more streams/videos
|
|
|
|
return "";
|
|
|
|
}
|
|
|
|
} catch (Exception e) {
|
2018-03-04 21:26:13 +01:00
|
|
|
throw new ParsingException("Could not get next page url", e);
|
2017-06-29 20:12:55 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-02-24 22:20:50 +01:00
|
|
|
private void collectStreamsFrom(StreamInfoItemsCollector collector, Element element) throws ParsingException {
|
2017-11-11 12:17:14 +01:00
|
|
|
collector.reset();
|
2017-06-29 20:12:55 +02:00
|
|
|
|
2017-09-11 15:19:16 +02:00
|
|
|
final String uploaderName = getName();
|
2018-05-06 14:08:50 +02:00
|
|
|
final String uploaderUrl = getUrl();
|
2017-06-29 20:12:55 +02:00
|
|
|
for (final Element li : element.children()) {
|
2017-03-01 18:47:52 +01:00
|
|
|
if (li.select("div[class=\"feed-item-dismissable\"]").first() != null) {
|
2019-10-02 07:02:01 +02:00
|
|
|
collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
|
2017-03-01 18:47:52 +01:00
|
|
|
@Override
|
2017-08-11 20:21:49 +02:00
|
|
|
public String getUrl() throws ParsingException {
|
2017-03-01 18:47:52 +01:00
|
|
|
try {
|
|
|
|
Element el = li.select("div[class=\"feed-item-dismissable\"]").first();
|
|
|
|
Element dl = el.select("h3").first().select("a").first();
|
|
|
|
return dl.attr("abs:href");
|
|
|
|
} catch (Exception e) {
|
|
|
|
throw new ParsingException("Could not get web page url for the video", e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2017-08-11 20:21:49 +02:00
|
|
|
public String getName() throws ParsingException {
|
2017-03-01 18:47:52 +01:00
|
|
|
try {
|
|
|
|
Element el = li.select("div[class=\"feed-item-dismissable\"]").first();
|
|
|
|
Element dl = el.select("h3").first().select("a").first();
|
|
|
|
return dl.text();
|
|
|
|
} catch (Exception e) {
|
|
|
|
throw new ParsingException("Could not get title", e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2017-08-10 04:50:29 +02:00
|
|
|
public String getUploaderName() throws ParsingException {
|
2017-09-11 15:19:16 +02:00
|
|
|
return uploaderName;
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
|
2017-11-25 02:38:46 +01:00
|
|
|
@Override
|
|
|
|
public String getUploaderUrl() throws ParsingException {
|
|
|
|
return uploaderUrl;
|
|
|
|
}
|
|
|
|
|
2017-03-01 18:47:52 +01:00
|
|
|
@Override
|
|
|
|
public String getThumbnailUrl() throws ParsingException {
|
|
|
|
try {
|
|
|
|
String url;
|
|
|
|
Element te = li.select("span[class=\"yt-thumb-clip\"]").first()
|
|
|
|
.select("img").first();
|
|
|
|
url = te.attr("abs:src");
|
|
|
|
// Sometimes youtube sends links to gif files which somehow seem to not exist
|
|
|
|
// anymore. Items with such gif also offer a secondary image source. So we are going
|
|
|
|
// to use that if we've caught such an item.
|
|
|
|
if (url.contains(".gif")) {
|
|
|
|
url = te.attr("abs:data-thumb");
|
|
|
|
}
|
|
|
|
return url;
|
|
|
|
} catch (Exception e) {
|
|
|
|
throw new ParsingException("Could not get thumbnail url", e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|