diff --git a/src/main/java/org/schabi/newpipe/extractor/channel/ChannelInfo.java b/src/main/java/org/schabi/newpipe/extractor/channel/ChannelInfo.java index adfdd44cf..e566bb545 100644 --- a/src/main/java/org/schabi/newpipe/extractor/channel/ChannelInfo.java +++ b/src/main/java/org/schabi/newpipe/extractor/channel/ChannelInfo.java @@ -54,7 +54,7 @@ public class ChannelInfo extends ListInfo { } public static ChannelInfo getInfoFromFeed(StreamingService service, String url) throws IOException, ExtractionException { - ChannelExtractor extractor = service.getFeedExtractor(url); + FeedExtractor extractor = service.getFeedExtractor(url); extractor.fetchPage(); return getInfoFromFeed(service.getFeedExtractor(url)); } diff --git a/src/main/java/org/schabi/newpipe/extractor/channel/FeedExtractor.java b/src/main/java/org/schabi/newpipe/extractor/channel/FeedExtractor.java new file mode 100644 index 000000000..ef1689a87 --- /dev/null +++ b/src/main/java/org/schabi/newpipe/extractor/channel/FeedExtractor.java @@ -0,0 +1,30 @@ +package org.schabi.newpipe.extractor.channel; + +import org.schabi.newpipe.extractor.ListExtractor; +import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.UrlIdHandler; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.exceptions.ParsingException; + +import java.io.IOException; + +import javax.annotation.Nonnull; + +public abstract class FeedExtractor extends ListExtractor { + + public FeedExtractor(StreamingService service, String url, String nextStreamsUrl) throws IOException, ExtractionException { + super(service, url, nextStreamsUrl); + } + + @Nonnull + @Override + protected UrlIdHandler getUrlIdHandler() throws ParsingException { + return getService().getFeedUrlIdHandler(); + } + + public abstract String getAvatarUrl() throws ParsingException; + public abstract String getBannerUrl() throws ParsingException; + public abstract String getFeedUrl() throws ParsingException; + public abstract long getSubscriberCount() throws ParsingException; + public abstract String getDescription() throws ParsingException; +} diff --git a/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudFeedExtractor.java b/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudFeedExtractor.java new file mode 100644 index 000000000..eda2a1546 --- /dev/null +++ b/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudFeedExtractor.java @@ -0,0 +1,113 @@ +package org.schabi.newpipe.extractor.services.soundcloud; + +import com.grack.nanojson.JsonObject; +import com.grack.nanojson.JsonParser; +import com.grack.nanojson.JsonParserException; + +import org.schabi.newpipe.extractor.Downloader; +import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.channel.FeedExtractor; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.stream.StreamInfoItemCollector; + +import java.io.IOException; + +import javax.annotation.Nonnull; + +public class SoundcloudFeedExtractor extends FeedExtractor { + private String userId; + private JsonObject user; + + public SoundcloudFeedExtractor(StreamingService service, String url, String nextStreamsUrl) throws IOException, ExtractionException { + super(service, url, nextStreamsUrl); + } + + @Override + public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException { + + userId = getUrlIdHandler().getId(getOriginalUrl()); + String apiUrl = "https://api.soundcloud.com/users/" + userId + + "?client_id=" + SoundcloudParsingHelper.clientId(); + + String response = downloader.download(apiUrl); + try { + user = JsonParser.object().from(response); + } catch (JsonParserException e) { + throw new ParsingException("Could not parse json response", e); + } + } + + @Nonnull + @Override + public String getCleanUrl() { + return user.isString("permalink_url") ? user.getString("permalink_url") : getOriginalUrl(); + } + + @Nonnull + @Override + public String getId() { + return userId; + } + + @Nonnull + @Override + public String getName() { + return user.getString("username"); + } + + @Override + public String getAvatarUrl() { + return user.getString("avatar_url"); + } + + @Override + public String getBannerUrl() { + try { + return user.getObject("visuals").getArray("visuals").getObject(0).getString("visual_url", ""); + } catch (NullPointerException e) { + return null; + } + } + + @Override + public String getFeedUrl() { + return null; + } + + @Override + public long getSubscriberCount() { + return user.getNumber("followers_count", 0).longValue(); + } + + @Override + public String getDescription() throws ParsingException { + return user.getString("description", ""); + } + + @Nonnull + @Override + public StreamInfoItemCollector getStreams() throws IOException, ExtractionException { + StreamInfoItemCollector collector = new StreamInfoItemCollector(getServiceId()); + + String apiUrl = "https://api-v2.soundcloud.com/users/" + getId() + "/tracks" + + "?client_id=" + SoundcloudParsingHelper.clientId() + + "&limit=20" + + "&linked_partitioning=1"; + + nextStreamsUrl = SoundcloudParsingHelper.getStreamsFromApiMinItems(15, collector, apiUrl); + return collector; + } + + @Override + public NextItemsResult getNextStreams() throws IOException, ExtractionException { + if (!hasMoreStreams()) { + throw new ExtractionException("Channel doesn't have more streams"); + } + + StreamInfoItemCollector collector = new StreamInfoItemCollector(getServiceId()); + nextStreamsUrl = SoundcloudParsingHelper.getStreamsFromApiMinItems(15, collector, nextStreamsUrl); + + return new NextItemsResult(collector, nextStreamsUrl); + } +} diff --git a/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudFeedUrlIdHandler.java b/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudFeedUrlIdHandler.java new file mode 100644 index 000000000..cbd7605e8 --- /dev/null +++ b/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudFeedUrlIdHandler.java @@ -0,0 +1,35 @@ +package org.schabi.newpipe.extractor.services.soundcloud; + +import org.schabi.newpipe.extractor.UrlIdHandler; +import org.schabi.newpipe.extractor.utils.Parser; + +public class SoundcloudFeedUrlIdHandler implements UrlIdHandler { + + private static final SoundcloudFeedUrlIdHandler instance = new SoundcloudFeedUrlIdHandler(); + private static final String ID_PATTERN = "users:(\\d+)"; + + public static SoundcloudFeedUrlIdHandler getInstance() { + return instance; + } + + @Override + public String getUrl(String id) { + return "https://feeds.soundcloud.com/users/soundcloud:users:" + id + "/sounds.rss"; + } + + @Override + public String getId(String url) throws Parser.RegexException { + return Parser.matchGroup1(ID_PATTERN, url); + } + + @Override + public String cleanUrl(String complexUrl) throws Parser.RegexException { + return getUrl(getId(complexUrl)); + } + + @Override + public boolean acceptUrl(String url) { + return url.contains("feeds.soundcloud.com") && url.contains("sounds.rss"); + } + +} diff --git a/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeFeedExtractor.java b/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeFeedExtractor.java new file mode 100644 index 000000000..e23a3d5c4 --- /dev/null +++ b/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeFeedExtractor.java @@ -0,0 +1,262 @@ +package org.schabi.newpipe.extractor.services.youtube; + +import com.grack.nanojson.JsonObject; +import com.grack.nanojson.JsonParser; +import com.grack.nanojson.JsonParserException; + +import javax.annotation.Nonnull; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.schabi.newpipe.extractor.Downloader; +import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; +import org.schabi.newpipe.extractor.stream.StreamInfoItemCollector; +import org.schabi.newpipe.extractor.utils.Parser; +import org.schabi.newpipe.extractor.utils.Utils; +import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.channel.FeedExtractor; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; + +import java.io.IOException; + +public class YoutubeFeedExtractor extends FeedExtractor { + + private static final String CHANNEL_FEED_BASE = "https://www.youtube.com/feeds/videos.xml?channel_id="; + private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000"; + + private Document doc; + + private Document nextStreamsAjax; + + private boolean fetchingNextStreams; + + public YoutubeFeedExtractor(StreamingService service, String url, String nextStreamsUrl) throws IOException, ExtractionException { + super(service, url, nextStreamsUrl); + } + + @Override + public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException { + YoutubeChannelUrlIdHandler urlIdHandler = YoutubeChannelUrlIdHandler.getInstance(); + YoutubeFeedUrlIdHandler feedIdHandler = YoutubeFeedUrlIdHandler.getInstance(); + String channelUrl = urlIdHandler.getUrl(feedIdHandler.getId(super.getCleanUrl())) + CHANNEL_URL_PARAMETERS; + String pageContent = downloader.download(channelUrl); + doc = Jsoup.parse(pageContent, channelUrl); + + if (!fetchingNextStreams) { + nextStreamsUrl = getNextStreamsUrlFrom(doc); + } + nextStreamsAjax = null; + } + + @Override + protected boolean fetchPageUponCreation() { + // Unfortunately, we have to fetch the page even if we are getting only next streams, + // as they don't deliver enough information on their own (the channel name, for example). + fetchingNextStreams = nextStreamsUrl != null && !nextStreamsUrl.isEmpty(); + return true; + } + + @Nonnull + @Override + public String getCleanUrl() { + try { + return "https://www.youtube.com/channel/" + getId(); + } catch (ParsingException e) { + return super.getCleanUrl(); + } + } + + @Nonnull + @Override + public String getId() throws ParsingException { + try { + Element element = doc.getElementsByClass("yt-uix-subscription-button").first(); + if (element == null) element = doc.getElementsByClass("yt-uix-subscription-preferences-button").first(); + + return element.attr("data-channel-external-id"); + } catch (Exception e) { + throw new ParsingException("Could not get channel id", e); + } + } + + @Nonnull + @Override + public String getName() throws ParsingException { + try { + return doc.select("meta[property=\"og:title\"]").first().attr("content"); + } catch (Exception e) { + throw new ParsingException("Could not get channel name", e); + } + } + + @Override + public String getAvatarUrl() throws ParsingException { + try { + return doc.select("img[class=\"channel-header-profile-image\"]").first().attr("abs:src"); + } catch (Exception e) { + throw new ParsingException("Could not get avatar", e); + } + } + + @Override + public String getBannerUrl() throws ParsingException { + try { + Element el = doc.select("div[id=\"gh-banner\"]").first().select("style").first(); + String cssContent = el.html(); + String url = "https:" + Parser.matchGroup1("url\\(([^)]+)\\)", cssContent); + + return url.contains("s.ytimg.com") || url.contains("default_banner") ? null : url; + } catch (Exception e) { + throw new ParsingException("Could not get Banner", e); + } + } + + @Override + public String getFeedUrl() throws ParsingException { + try { + return CHANNEL_FEED_BASE + getId(); + } catch (Exception e) { + throw new ParsingException("Could not get feed url", e); + } + } + + @Override + public long getSubscriberCount() throws ParsingException { + Element el = doc.select("span[class*=\"yt-subscription-button-subscriber-count\"]").first(); + if (el != null) { + return Long.parseLong(Utils.removeNonDigitCharacters(el.text())); + } else { + throw new ParsingException("Could not get subscriber count"); + } + } + + @Override + public String getDescription() throws ParsingException { + try { + return doc.select("meta[name=\"description\"]").first().attr("content"); + } catch (Exception e) { + throw new ParsingException("Could not get channel description", e); + } + } + + @Nonnull + @Override + public StreamInfoItemCollector getStreams() throws IOException, ExtractionException { + StreamInfoItemCollector collector = new StreamInfoItemCollector(getServiceId()); + Element ul = doc.select("ul[id=\"browse-items-primary\"]").first(); + collectStreamsFrom(collector, ul); + return collector; + } + + @Override + public NextItemsResult getNextStreams() throws IOException, ExtractionException { + if (!hasMoreStreams()) { + throw new ExtractionException("Channel doesn't have more streams"); + } + + StreamInfoItemCollector collector = new StreamInfoItemCollector(getServiceId()); + setupNextStreamsAjax(NewPipe.getDownloader()); + collectStreamsFrom(collector, nextStreamsAjax.select("body").first()); + + return new NextItemsResult(collector, nextStreamsUrl); + } + + private void setupNextStreamsAjax(Downloader downloader) throws IOException, ReCaptchaException, ParsingException { + String ajaxDataRaw = downloader.download(nextStreamsUrl); + try { + JsonObject ajaxData = JsonParser.object().from(ajaxDataRaw); + + String htmlDataRaw = ajaxData.getString("content_html"); + nextStreamsAjax = Jsoup.parse(htmlDataRaw, nextStreamsUrl); + + String nextStreamsHtmlDataRaw = ajaxData.getString("load_more_widget_html"); + if (!nextStreamsHtmlDataRaw.isEmpty()) { + nextStreamsUrl = getNextStreamsUrlFrom(Jsoup.parse(nextStreamsHtmlDataRaw, nextStreamsUrl)); + } else { + nextStreamsUrl = ""; + } + } catch (JsonParserException e) { + throw new ParsingException("Could not parse json data for next streams", e); + } + } + + private String getNextStreamsUrlFrom(org.jsoup.nodes.Document d) throws ParsingException { + try { + Element button = d.select("button[class*=\"yt-uix-load-more\"]").first(); + if (button != null) { + return button.attr("abs:data-uix-load-more-href"); + } else { + // Sometimes channels are simply so small, they don't have a more streams/videos + return ""; + } + } catch (Exception e) { + throw new ParsingException("could not get next streams' url", e); + } + } + + private void collectStreamsFrom(StreamInfoItemCollector collector, Element element) throws ParsingException { + collector.reset(); + + final String uploaderName = getName(); + final String uploaderUrl = getCleanUrl(); + for (final Element li : element.children()) { + if (li.select("div[class=\"feed-item-dismissable\"]").first() != null) { + collector.commit(new YoutubeStreamInfoItemExtractor(li) { + @Override + public String getUrl() throws ParsingException { + try { + Element el = li.select("div[class=\"feed-item-dismissable\"]").first(); + Element dl = el.select("h3").first().select("a").first(); + return dl.attr("abs:href"); + } catch (Exception e) { + throw new ParsingException("Could not get web page url for the video", e); + } + } + + @Override + public String getName() throws ParsingException { + try { + Element el = li.select("div[class=\"feed-item-dismissable\"]").first(); + Element dl = el.select("h3").first().select("a").first(); + return dl.text(); + } catch (Exception e) { + throw new ParsingException("Could not get title", e); + } + } + + @Override + public String getUploaderName() throws ParsingException { + return uploaderName; + } + + @Override + public String getUploaderUrl() throws ParsingException { + return uploaderUrl; + } + + @Override + public String getThumbnailUrl() throws ParsingException { + try { + String url; + Element te = li.select("span[class=\"yt-thumb-clip\"]").first() + .select("img").first(); + url = te.attr("abs:src"); + // Sometimes youtube sends links to gif files which somehow seem to not exist + // anymore. Items with such gif also offer a secondary image source. So we are going + // to use that if we've caught such an item. + if (url.contains(".gif")) { + url = te.attr("abs:data-thumb"); + } + return url; + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } + }); + } + } + } +} diff --git a/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeFeedUrlIdHandler.java b/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeFeedUrlIdHandler.java new file mode 100644 index 000000000..28e49e540 --- /dev/null +++ b/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeFeedUrlIdHandler.java @@ -0,0 +1,35 @@ +package org.schabi.newpipe.extractor.services.youtube; + +import org.schabi.newpipe.extractor.UrlIdHandler; +import org.schabi.newpipe.extractor.utils.Parser; + +public class YoutubeFeedUrlIdHandler implements UrlIdHandler { + + private static final YoutubeFeedUrlIdHandler instance = new YoutubeFeedUrlIdHandler(); + private static final String ID_PATTERN = "channel_id=([A-Za-z0-9_-]+)"; + + public static YoutubeFeedUrlIdHandler getInstance() { + return instance; + } + + @Override + public String getUrl(String id) { + return "https://www.youtube.com/feeds/videos.xml?channel_id=" + id; + } + + @Override + public String getId(String url) throws Parser.RegexException { + return Parser.matchGroup1(ID_PATTERN, url); + } + + @Override + public String cleanUrl(String complexUrl) throws Parser.RegexException { + return getUrl(getId(complexUrl)); + } + + @Override + public boolean acceptUrl(String url) { + return (url.contains("youtube") || url.contains("youtu.be")) + && url.contains("videos.xml?channel_id="); + } +}