Adds some files that were not added in the last commit

2017-12-29 18:32:34 +00:00 · 2017-12-29 18:32:34 +00:00 · ffd3056b60
parent f3f069e135
commit ffd3056b60
6 changed files with 476 additions and 1 deletions
--- a/src/main/java/org/schabi/newpipe/extractor/channel/ChannelInfo.java
+++ b/src/main/java/org/schabi/newpipe/extractor/channel/ChannelInfo.java
@ -54,7 +54,7 @@ public class ChannelInfo extends ListInfo {
    }

    public static ChannelInfo getInfoFromFeed(StreamingService service, String url) throws IOException, ExtractionException {
-        ChannelExtractor extractor = service.getFeedExtractor(url);
+        FeedExtractor extractor = service.getFeedExtractor(url);
        extractor.fetchPage();
        return getInfoFromFeed(service.getFeedExtractor(url));
    }
--- a/src/main/java/org/schabi/newpipe/extractor/channel/FeedExtractor.java
+++ b/src/main/java/org/schabi/newpipe/extractor/channel/FeedExtractor.java
@ -0,0 +1,30 @@
+package org.schabi.newpipe.extractor.channel;
+
+import org.schabi.newpipe.extractor.ListExtractor;
+import org.schabi.newpipe.extractor.StreamingService;
+import org.schabi.newpipe.extractor.UrlIdHandler;
+import org.schabi.newpipe.extractor.exceptions.ExtractionException;
+import org.schabi.newpipe.extractor.exceptions.ParsingException;
+
+import java.io.IOException;
+
+import javax.annotation.Nonnull;
+
+public abstract class FeedExtractor extends ListExtractor {
+
+    public FeedExtractor(StreamingService service, String url, String nextStreamsUrl) throws IOException, ExtractionException {
+        super(service, url, nextStreamsUrl);
+    }
+
+    @Nonnull
+    @Override
+    protected UrlIdHandler getUrlIdHandler() throws ParsingException {
+        return getService().getFeedUrlIdHandler();
+    }
+
+    public abstract String getAvatarUrl() throws ParsingException;
+    public abstract String getBannerUrl() throws ParsingException;
+    public abstract String getFeedUrl() throws ParsingException;
+    public abstract long getSubscriberCount() throws ParsingException;
+    public abstract String getDescription() throws ParsingException;
+}
--- a/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudFeedExtractor.java
+++ b/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudFeedExtractor.java
@ -0,0 +1,113 @@
+package org.schabi.newpipe.extractor.services.soundcloud;
+
+import com.grack.nanojson.JsonObject;
+import com.grack.nanojson.JsonParser;
+import com.grack.nanojson.JsonParserException;
+
+import org.schabi.newpipe.extractor.Downloader;
+import org.schabi.newpipe.extractor.StreamingService;
+import org.schabi.newpipe.extractor.channel.FeedExtractor;
+import org.schabi.newpipe.extractor.exceptions.ExtractionException;
+import org.schabi.newpipe.extractor.exceptions.ParsingException;
+import org.schabi.newpipe.extractor.stream.StreamInfoItemCollector;
+
+import java.io.IOException;
+
+import javax.annotation.Nonnull;
+
+public class SoundcloudFeedExtractor extends FeedExtractor {
+    private String userId;
+    private JsonObject user;
+
+    public SoundcloudFeedExtractor(StreamingService service, String url, String nextStreamsUrl) throws IOException, ExtractionException {
+        super(service, url, nextStreamsUrl);
+    }
+
+    @Override
+    public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException {
+
+        userId = getUrlIdHandler().getId(getOriginalUrl());
+        String apiUrl = "https://api.soundcloud.com/users/" + userId +
+                "?client_id=" + SoundcloudParsingHelper.clientId();
+
+        String response = downloader.download(apiUrl);
+        try {
+            user = JsonParser.object().from(response);
+        } catch (JsonParserException e) {
+            throw new ParsingException("Could not parse json response", e);
+        }
+    }
+
+    @Nonnull
+    @Override
+    public String getCleanUrl() {
+        return user.isString("permalink_url") ? user.getString("permalink_url") : getOriginalUrl();
+    }
+
+    @Nonnull
+    @Override
+    public String getId() {
+        return userId;
+    }
+
+    @Nonnull
+    @Override
+    public String getName() {
+        return user.getString("username");
+    }
+
+    @Override
+    public String getAvatarUrl() {
+        return user.getString("avatar_url");
+    }
+
+    @Override
+    public String getBannerUrl() {
+        try {
+            return user.getObject("visuals").getArray("visuals").getObject(0).getString("visual_url", "");
+        } catch (NullPointerException e) {
+            return null;
+        }
+    }
+
+    @Override
+    public String getFeedUrl() {
+        return null;
+    }
+
+    @Override
+    public long getSubscriberCount() {
+        return user.getNumber("followers_count", 0).longValue();
+    }
+
+    @Override
+    public String getDescription() throws ParsingException {
+        return user.getString("description", "");
+    }
+
+    @Nonnull
+    @Override
+    public StreamInfoItemCollector getStreams() throws IOException, ExtractionException {
+        StreamInfoItemCollector collector = new StreamInfoItemCollector(getServiceId());
+
+        String apiUrl = "https://api-v2.soundcloud.com/users/" + getId() + "/tracks"
+                + "?client_id=" + SoundcloudParsingHelper.clientId()
+                + "&limit=20"
+                + "&linked_partitioning=1";
+
+        nextStreamsUrl = SoundcloudParsingHelper.getStreamsFromApiMinItems(15, collector, apiUrl);
+        return collector;
+    }
+
+    @Override
+    public NextItemsResult getNextStreams() throws IOException, ExtractionException {
+        if (!hasMoreStreams()) {
+            throw new ExtractionException("Channel doesn't have more streams");
+        }
+
+        StreamInfoItemCollector collector = new StreamInfoItemCollector(getServiceId());
+        nextStreamsUrl = SoundcloudParsingHelper.getStreamsFromApiMinItems(15, collector, nextStreamsUrl);
+
+        return new NextItemsResult(collector, nextStreamsUrl);
+    }
+}
--- a/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudFeedUrlIdHandler.java
+++ b/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudFeedUrlIdHandler.java
@ -0,0 +1,35 @@
+package org.schabi.newpipe.extractor.services.soundcloud;
+
+import org.schabi.newpipe.extractor.UrlIdHandler;
+import org.schabi.newpipe.extractor.utils.Parser;
+
+public class SoundcloudFeedUrlIdHandler implements UrlIdHandler {
+
+    private static final SoundcloudFeedUrlIdHandler instance = new SoundcloudFeedUrlIdHandler();
+    private static final String ID_PATTERN = "users:(\\d+)";
+
+    public static SoundcloudFeedUrlIdHandler getInstance() {
+        return instance;
+    }
+
+    @Override
+    public String getUrl(String id) {
+        return "https://feeds.soundcloud.com/users/soundcloud:users:" + id + "/sounds.rss";
+    }
+
+    @Override
+    public String getId(String url) throws Parser.RegexException {
+        return Parser.matchGroup1(ID_PATTERN, url);
+    }
+
+    @Override
+    public String cleanUrl(String complexUrl) throws Parser.RegexException {
+        return getUrl(getId(complexUrl));
+    }
+
+    @Override
+    public boolean acceptUrl(String url) {
+        return url.contains("feeds.soundcloud.com") && url.contains("sounds.rss");
+    }
+
+}
--- a/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeFeedExtractor.java
+++ b/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeFeedExtractor.java
@ -0,0 +1,262 @@
+package org.schabi.newpipe.extractor.services.youtube;
+
+import com.grack.nanojson.JsonObject;
+import com.grack.nanojson.JsonParser;
+import com.grack.nanojson.JsonParserException;
+
+import javax.annotation.Nonnull;
+
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.schabi.newpipe.extractor.Downloader;
+import org.schabi.newpipe.extractor.NewPipe;
+import org.schabi.newpipe.extractor.exceptions.ParsingException;
+import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
+import org.schabi.newpipe.extractor.stream.StreamInfoItemCollector;
+import org.schabi.newpipe.extractor.utils.Parser;
+import org.schabi.newpipe.extractor.utils.Utils;
+import org.schabi.newpipe.extractor.StreamingService;
+import org.schabi.newpipe.extractor.channel.FeedExtractor;
+import org.schabi.newpipe.extractor.exceptions.ExtractionException;
+
+import java.io.IOException;
+
+public class YoutubeFeedExtractor extends FeedExtractor {
+
+    private static final String CHANNEL_FEED_BASE = "https://www.youtube.com/feeds/videos.xml?channel_id=";
+    private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000";
+
+    private Document doc;
+
+    private Document nextStreamsAjax;
+
+    private boolean fetchingNextStreams;
+
+    public YoutubeFeedExtractor(StreamingService service, String url, String nextStreamsUrl) throws IOException, ExtractionException {
+        super(service, url, nextStreamsUrl);
+    }
+
+    @Override
+    public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException {
+        YoutubeChannelUrlIdHandler urlIdHandler = YoutubeChannelUrlIdHandler.getInstance();
+        YoutubeFeedUrlIdHandler feedIdHandler = YoutubeFeedUrlIdHandler.getInstance();
+        String channelUrl = urlIdHandler.getUrl(feedIdHandler.getId(super.getCleanUrl())) + CHANNEL_URL_PARAMETERS;
+        String pageContent = downloader.download(channelUrl);
+        doc = Jsoup.parse(pageContent, channelUrl);
+
+        if (!fetchingNextStreams) {
+            nextStreamsUrl = getNextStreamsUrlFrom(doc);
+        }
+        nextStreamsAjax = null;
+    }
+
+    @Override
+    protected boolean fetchPageUponCreation() {
+        // Unfortunately, we have to fetch the page even if we are getting only next streams,
+        // as they don't deliver enough information on their own (the channel name, for example).
+        fetchingNextStreams = nextStreamsUrl != null && !nextStreamsUrl.isEmpty();
+        return true;
+    }
+
+    @Nonnull
+    @Override
+    public String getCleanUrl() {
+        try {
+            return "https://www.youtube.com/channel/" + getId();
+        } catch (ParsingException e) {
+            return super.getCleanUrl();
+        }
+    }
+
+    @Nonnull
+    @Override
+    public String getId() throws ParsingException {
+        try {
+            Element element = doc.getElementsByClass("yt-uix-subscription-button").first();
+            if (element == null) element = doc.getElementsByClass("yt-uix-subscription-preferences-button").first();
+
+            return element.attr("data-channel-external-id");
+        } catch (Exception e) {
+            throw new ParsingException("Could not get channel id", e);
+        }
+    }
+
+    @Nonnull
+    @Override
+    public String getName() throws ParsingException {
+        try {
+            return doc.select("meta[property=\"og:title\"]").first().attr("content");
+        } catch (Exception e) {
+            throw new ParsingException("Could not get channel name", e);
+        }
+    }
+
+    @Override
+    public String getAvatarUrl() throws ParsingException {
+        try {
+            return doc.select("img[class=\"channel-header-profile-image\"]").first().attr("abs:src");
+        } catch (Exception e) {
+            throw new ParsingException("Could not get avatar", e);
+        }
+    }
+
+    @Override
+    public String getBannerUrl() throws ParsingException {
+        try {
+            Element el = doc.select("div[id=\"gh-banner\"]").first().select("style").first();
+            String cssContent = el.html();
+            String url = "https:" + Parser.matchGroup1("url\\(([^)]+)\\)", cssContent);
+
+            return url.contains("s.ytimg.com") || url.contains("default_banner") ? null : url;
+        } catch (Exception e) {
+            throw new ParsingException("Could not get Banner", e);
+        }
+    }
+
+    @Override
+    public String getFeedUrl() throws ParsingException {
+        try {
+            return CHANNEL_FEED_BASE + getId();
+        } catch (Exception e) {
+            throw new ParsingException("Could not get feed url", e);
+        }
+    }
+
+    @Override
+    public long getSubscriberCount() throws ParsingException {
+        Element el = doc.select("span[class*=\"yt-subscription-button-subscriber-count\"]").first();
+        if (el != null) {
+            return Long.parseLong(Utils.removeNonDigitCharacters(el.text()));
+        } else {
+            throw new ParsingException("Could not get subscriber count");
+        }
+    }
+
+    @Override
+    public String getDescription() throws ParsingException {
+        try {
+            return doc.select("meta[name=\"description\"]").first().attr("content");
+        } catch (Exception e) {
+            throw new ParsingException("Could not get channel description", e);
+        }
+    }
+
+    @Nonnull
+    @Override
+    public StreamInfoItemCollector getStreams() throws IOException, ExtractionException {
+        StreamInfoItemCollector collector = new StreamInfoItemCollector(getServiceId());
+        Element ul = doc.select("ul[id=\"browse-items-primary\"]").first();
+        collectStreamsFrom(collector, ul);
+        return collector;
+    }
+
+    @Override
+    public NextItemsResult getNextStreams() throws IOException, ExtractionException {
+        if (!hasMoreStreams()) {
+            throw new ExtractionException("Channel doesn't have more streams");
+        }
+
+        StreamInfoItemCollector collector = new StreamInfoItemCollector(getServiceId());
+        setupNextStreamsAjax(NewPipe.getDownloader());
+        collectStreamsFrom(collector, nextStreamsAjax.select("body").first());
+
+        return new NextItemsResult(collector, nextStreamsUrl);
+    }
+
+    private void setupNextStreamsAjax(Downloader downloader) throws IOException, ReCaptchaException, ParsingException {
+        String ajaxDataRaw = downloader.download(nextStreamsUrl);
+        try {
+            JsonObject ajaxData = JsonParser.object().from(ajaxDataRaw);
+
+            String htmlDataRaw = ajaxData.getString("content_html");
+            nextStreamsAjax = Jsoup.parse(htmlDataRaw, nextStreamsUrl);
+
+            String nextStreamsHtmlDataRaw = ajaxData.getString("load_more_widget_html");
+            if (!nextStreamsHtmlDataRaw.isEmpty()) {
+                nextStreamsUrl = getNextStreamsUrlFrom(Jsoup.parse(nextStreamsHtmlDataRaw, nextStreamsUrl));
+            } else {
+                nextStreamsUrl = "";
+            }
+        } catch (JsonParserException e) {
+            throw new ParsingException("Could not parse json data for next streams", e);
+        }
+    }
+
+    private String getNextStreamsUrlFrom(org.jsoup.nodes.Document d) throws ParsingException {
+        try {
+            Element button = d.select("button[class*=\"yt-uix-load-more\"]").first();
+            if (button != null) {
+                return button.attr("abs:data-uix-load-more-href");
+            } else {
+                // Sometimes channels are simply so small, they don't have a more streams/videos
+                return "";
+            }
+        } catch (Exception e) {
+            throw new ParsingException("could not get next streams' url", e);
+        }
+    }
+
+    private void collectStreamsFrom(StreamInfoItemCollector collector, Element element) throws ParsingException {
+        collector.reset();
+
+        final String uploaderName = getName();
+        final String uploaderUrl = getCleanUrl();
+        for (final Element li : element.children()) {
+            if (li.select("div[class=\"feed-item-dismissable\"]").first() != null) {
+                collector.commit(new YoutubeStreamInfoItemExtractor(li) {
+                    @Override
+                    public String getUrl() throws ParsingException {
+                        try {
+                            Element el = li.select("div[class=\"feed-item-dismissable\"]").first();
+                            Element dl = el.select("h3").first().select("a").first();
+                            return dl.attr("abs:href");
+                        } catch (Exception e) {
+                            throw new ParsingException("Could not get web page url for the video", e);
+                        }
+                    }
+
+                    @Override
+                    public String getName() throws ParsingException {
+                        try {
+                            Element el = li.select("div[class=\"feed-item-dismissable\"]").first();
+                            Element dl = el.select("h3").first().select("a").first();
+                            return dl.text();
+                        } catch (Exception e) {
+                            throw new ParsingException("Could not get title", e);
+                        }
+                    }
+
+                    @Override
+                    public String getUploaderName() throws ParsingException {
+                        return uploaderName;
+                    }
+
+                    @Override
+                    public String getUploaderUrl() throws ParsingException {
+                        return uploaderUrl;
+                    }
+
+                    @Override
+                    public String getThumbnailUrl() throws ParsingException {
+                        try {
+                            String url;
+                            Element te = li.select("span[class=\"yt-thumb-clip\"]").first()
+                                    .select("img").first();
+                            url = te.attr("abs:src");
+                            // Sometimes youtube sends links to gif files which somehow seem to not exist
+                            // anymore. Items with such gif also offer a secondary image source. So we are going
+                            // to use that if we've caught such an item.
+                            if (url.contains(".gif")) {
+                                url = te.attr("abs:data-thumb");
+                            }
+                            return url;
+                        } catch (Exception e) {
+                            throw new ParsingException("Could not get thumbnail url", e);
+                        }
+                    }
+                });
+            }
+        }
+    }
+}
--- a/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeFeedUrlIdHandler.java
+++ b/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeFeedUrlIdHandler.java
@ -0,0 +1,35 @@
+package org.schabi.newpipe.extractor.services.youtube;
+
+import org.schabi.newpipe.extractor.UrlIdHandler;
+import org.schabi.newpipe.extractor.utils.Parser;
+
+public class YoutubeFeedUrlIdHandler implements UrlIdHandler {
+
+    private static final YoutubeFeedUrlIdHandler instance = new YoutubeFeedUrlIdHandler();
+    private static final String ID_PATTERN = "channel_id=([A-Za-z0-9_-]+)";
+
+    public static YoutubeFeedUrlIdHandler getInstance() {
+        return instance;
+    }
+
+    @Override
+    public String getUrl(String id) {
+        return "https://www.youtube.com/feeds/videos.xml?channel_id=" + id;
+    }
+
+    @Override
+    public String getId(String url) throws Parser.RegexException {
+        return Parser.matchGroup1(ID_PATTERN, url);
+    }
+
+    @Override
+    public String cleanUrl(String complexUrl) throws Parser.RegexException {
+        return getUrl(getId(complexUrl));
+    }
+
+    @Override
+    public boolean acceptUrl(String url) {
+        return (url.contains("youtube") || url.contains("youtu.be"))
+                && url.contains("videos.xml?channel_id=");
+    }
+}