Merge pull request #652 from litetex/fixYTCommentsAndAddDisabledComments

Fix yt comments and add disabled comments functionallity
2024-12-12 20:10:00 +01:00 · 2021-07-12 16:31:50 +02:00 · 2021-07-12 16:31:50 +02:00 · b45bb411e8
commit b45bb411e8
parent 6fd93cdb31 fdebf3c6cd
3 changed files with 162 additions and 48 deletions
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java
@ -9,9 +9,16 @@ import javax.annotation.Nonnull;

 public abstract class CommentsExtractor extends ListExtractor<CommentsInfoItem> {

-    public CommentsExtractor(StreamingService service, ListLinkHandler uiHandler) {
+    public CommentsExtractor(final StreamingService service, final ListLinkHandler uiHandler) {
        super(service, uiHandler);
-        // TODO Auto-generated constructor stub
+    }
+
+    /**
+     * @apiNote Warning: This method is experimental and may get removed in a future release.
+     * @return <code>true</code> if the comments are disabled otherwise <code>false</code> (default)
+     */
+    public boolean isCommentsDisabled() {
+        return false;
    }

    @Nonnull
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java
@ -13,45 +13,56 @@ import java.io.IOException;

 public class CommentsInfo extends ListInfo<CommentsInfoItem> {

-    private CommentsInfo(int serviceId, ListLinkHandler listUrlIdHandler, String name) {
+    private CommentsInfo(
+            final int serviceId,
+            final ListLinkHandler listUrlIdHandler,
+            final String name) {
        super(serviceId, listUrlIdHandler, name);
    }

-    public static CommentsInfo getInfo(String url) throws IOException, ExtractionException {
+    public static CommentsInfo getInfo(final String url) throws IOException, ExtractionException {
        return getInfo(NewPipe.getServiceByUrl(url), url);
    }

-    public static CommentsInfo getInfo(StreamingService serviceByUrl, String url) throws ExtractionException, IOException {
+    public static CommentsInfo getInfo(final StreamingService serviceByUrl, final String url)
+            throws ExtractionException, IOException {
        return getInfo(serviceByUrl.getCommentsExtractor(url));
    }

-    public static CommentsInfo getInfo(CommentsExtractor commentsExtractor) throws IOException, ExtractionException {
+    public static CommentsInfo getInfo(final CommentsExtractor commentsExtractor)
+            throws IOException, ExtractionException {
        // for services which do not have a comments extractor
-        if (null == commentsExtractor) {
+        if (commentsExtractor == null) {
            return null;
        }

        commentsExtractor.fetchPage();
-        String name = commentsExtractor.getName();
-        int serviceId = commentsExtractor.getServiceId();
-        ListLinkHandler listUrlIdHandler = commentsExtractor.getLinkHandler();
-        CommentsInfo commentsInfo = new CommentsInfo(serviceId, listUrlIdHandler, name);
+
+        final String name = commentsExtractor.getName();
+        final int serviceId = commentsExtractor.getServiceId();
+        final ListLinkHandler listUrlIdHandler = commentsExtractor.getLinkHandler();
+
+        final CommentsInfo commentsInfo = new CommentsInfo(serviceId, listUrlIdHandler, name);
        commentsInfo.setCommentsExtractor(commentsExtractor);
-        InfoItemsPage<CommentsInfoItem> initialCommentsPage = ExtractorHelper.getItemsPageOrLogError(commentsInfo,
-                commentsExtractor);
+        final InfoItemsPage<CommentsInfoItem> initialCommentsPage =
+                ExtractorHelper.getItemsPageOrLogError(commentsInfo, commentsExtractor);
+        commentsInfo.setCommentsDisabled(commentsExtractor.isCommentsDisabled());
        commentsInfo.setRelatedItems(initialCommentsPage.getItems());
        commentsInfo.setNextPage(initialCommentsPage.getNextPage());

        return commentsInfo;
    }

-    public static InfoItemsPage<CommentsInfoItem> getMoreItems(CommentsInfo commentsInfo, Page page)
-            throws ExtractionException, IOException {
+    public static InfoItemsPage<CommentsInfoItem> getMoreItems(
+            final CommentsInfo commentsInfo,
+            final Page page) throws ExtractionException, IOException {
        return getMoreItems(NewPipe.getService(commentsInfo.getServiceId()), commentsInfo, page);
    }

-    public static InfoItemsPage<CommentsInfoItem> getMoreItems(StreamingService service, CommentsInfo commentsInfo,
-                                                               Page page) throws IOException, ExtractionException {
+    public static InfoItemsPage<CommentsInfoItem> getMoreItems(
+            final StreamingService service,
+            final CommentsInfo commentsInfo,
+            final Page page) throws IOException, ExtractionException {
        if (null == commentsInfo.getCommentsExtractor()) {
            commentsInfo.setCommentsExtractor(service.getCommentsExtractor(commentsInfo.getUrl()));
            commentsInfo.getCommentsExtractor().fetchPage();
@ -60,13 +71,30 @@ public class CommentsInfo extends ListInfo<CommentsInfoItem> {
    }

    private transient CommentsExtractor commentsExtractor;
+    private boolean commentsDisabled = false;

    public CommentsExtractor getCommentsExtractor() {
        return commentsExtractor;
    }

-    public void setCommentsExtractor(CommentsExtractor commentsExtractor) {
+    public void setCommentsExtractor(final CommentsExtractor commentsExtractor) {
        this.commentsExtractor = commentsExtractor;
    }

+    /**
+     * @apiNote Warning: This method is experimental and may get removed in a future release.
+     * @return <code>true</code> if the comments are disabled otherwise <code>false</code> (default)
+     * @see CommentsExtractor#isCommentsDisabled()
+     */
+    public boolean isCommentsDisabled() {
+        return commentsDisabled;
+    }
+
+    /**
+     * @apiNote Warning: This method is experimental and may get removed in a future release.
+     * @param commentsDisabled <code>true</code> if the comments are disabled otherwise <code>false</code>
+     */
+    public void setCommentsDisabled(final boolean commentsDisabled) {
+        this.commentsDisabled = commentsDisabled;
+    }
 }
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java
@ -23,9 +23,11 @@ import javax.annotation.Nonnull;
 import java.io.IOException;
 import java.io.UnsupportedEncodingException;
 import java.net.URLEncoder;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Optional;
 import java.util.regex.Pattern;

 import static java.util.Collections.singletonList;
@ -41,53 +43,108 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
    private String ytClientName;
    private String responseBody;

-    public YoutubeCommentsExtractor(StreamingService service, ListLinkHandler uiHandler) {
+    /**
+     * Caching mechanism and holder of the commentsDisabled value.
+     * <br/>
+     * Initial value = empty -> unknown if comments are disabled or not<br/>
+     * Some method calls {@link YoutubeCommentsExtractor#findInitialCommentsToken()}
+     * -> value is set<br/>
+     * If the method or another one that is depending on disabled comments
+     * is now called again, the method execution can avoid unnecessary calls
+     */
+    private Optional<Boolean> optCommentsDisabled = Optional.empty();
+
+    public YoutubeCommentsExtractor(
+            final StreamingService service,
+            final ListLinkHandler uiHandler) {
        super(service, uiHandler);
    }

    @Override
-    public InfoItemsPage<CommentsInfoItem> getInitialPage() throws IOException, ExtractionException {
-        String commentsTokenInside = findValue(responseBody, "sectionListRenderer", "}");
-        if (!commentsTokenInside.contains("continuation\":\"")) {
-            commentsTokenInside = findValue(responseBody, "commentSectionRenderer", "}");
+    public InfoItemsPage<CommentsInfoItem> getInitialPage()
+            throws IOException, ExtractionException {
+
+        // Check if findInitialCommentsToken was already called and optCommentsDisabled initialized
+        if (optCommentsDisabled.orElse(false)) {
+            return getInfoItemsPageForDisabledComments();
        }
-        final String commentsToken = findValue(commentsTokenInside, "continuation\":\"", "\"");
+
+        // Get the token
+        final String commentsToken = findInitialCommentsToken();
+        // Check if the comments have been disabled
+        if (optCommentsDisabled.get()) {
+            return getInfoItemsPageForDisabledComments();
+        }
+
        return getPage(getNextPage(commentsToken));
    }

-    private Page getNextPage(JsonObject ajaxJson) throws ParsingException {
+    /**
+     * Finds the initial comments token and initializes commentsDisabled.
+     * @return the continuation token or null if none was found
+     */
+    private String findInitialCommentsToken() {
+        final String continuationStartPattern = "continuation\":\"";
+
+        String commentsTokenInside = findValue(responseBody, "sectionListRenderer", "}");
+        if (commentsTokenInside == null || !commentsTokenInside.contains(continuationStartPattern)) {
+            commentsTokenInside = findValue(responseBody, "commentSectionRenderer", "}");
+        }
+
+        // If no continuation token is found the comments are disabled
+        if (commentsTokenInside == null || !commentsTokenInside.contains(continuationStartPattern)) {
+            optCommentsDisabled = Optional.of(true);
+            return null;
+        }
+
+        // If a continuation token is found there are >= 0 comments
+        final String commentsToken = findValue(commentsTokenInside, continuationStartPattern, "\"");
+
+        optCommentsDisabled = Optional.of(false);
+
+        return commentsToken;
+    }
+
+    private InfoItemsPage<CommentsInfoItem> getInfoItemsPageForDisabledComments() {
+        return new InfoItemsPage<>(Collections.emptyList(), null, Collections.emptyList());
+    }
+
+    private Page getNextPage(final JsonObject ajaxJson) throws ParsingException {
        final JsonArray arr;
        try {
            arr = JsonUtils.getArray(ajaxJson, "response.continuationContents.commentSectionContinuation.continuations");
-        } catch (Exception e) {
+        } catch (final Exception e) {
            return null;
        }
        if (arr.isEmpty()) {
            return null;
        }
-        String continuation;
+        final String continuation;
        try {
            continuation = JsonUtils.getString(arr.getObject(0), "nextContinuationData.continuation");
-        } catch (Exception e) {
+        } catch (final Exception e) {
            return null;
        }
        return getNextPage(continuation);
    }

-    private Page getNextPage(String continuation) throws ParsingException {
-        Map<String, String> params = new HashMap<>();
+    private Page getNextPage(final String continuation) throws ParsingException {
+        final Map<String, String> params = new HashMap<>();
        params.put("action_get_comments", "1");
        params.put("pbj", "1");
        params.put("ctoken", continuation);
        try {
            return new Page("https://m.youtube.com/watch_comment?" + getDataString(params));
-        } catch (UnsupportedEncodingException e) {
+        } catch (final UnsupportedEncodingException e) {
            throw new ParsingException("Could not get next page url", e);
        }
    }

    @Override
    public InfoItemsPage<CommentsInfoItem> getPage(final Page page) throws IOException, ExtractionException {
+        if (optCommentsDisabled.orElse(false)) {
+            return getInfoItemsPageForDisabledComments();
+        }
        if (page == null || isNullOrEmpty(page.getUrl())) {
            throw new IllegalArgumentException("Page doesn't contain an URL");
        }
@ -96,7 +153,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
        final JsonObject ajaxJson;
        try {
            ajaxJson = JsonParser.array().from(ajaxResponse).getObject(1);
-        } catch (Exception e) {
+        } catch (final Exception e) {
            throw new ParsingException("Could not parse json data for comments", e);
        }
        final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
@ -104,31 +161,32 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
        return new InfoItemsPage<>(collector, getNextPage(ajaxJson));
    }

-    private void collectCommentsFrom(CommentsInfoItemsCollector collector, JsonObject ajaxJson) throws ParsingException {
-        JsonArray contents;
+    private void collectCommentsFrom(final CommentsInfoItemsCollector collector, final JsonObject ajaxJson) throws ParsingException {
+        final JsonArray contents;
        try {
            contents = JsonUtils.getArray(ajaxJson, "response.continuationContents.commentSectionContinuation.items");
-        } catch (Exception e) {
+        } catch (final Exception e) {
            //no comments
            return;
        }
-        List<Object> comments;
+        final List<Object> comments;
        try {
            comments = JsonUtils.getValues(contents, "commentThreadRenderer.comment.commentRenderer");
-        } catch (Exception e) {
+        } catch (final Exception e) {
            throw new ParsingException("unable to get parse youtube comments", e);
        }

-        for (Object c : comments) {
+        for (final Object c : comments) {
            if (c instanceof JsonObject) {
-                CommentsInfoItemExtractor extractor = new YoutubeCommentsInfoItemExtractor((JsonObject) c, getUrl(), getTimeAgoParser());
+                final CommentsInfoItemExtractor extractor =
+                        new YoutubeCommentsInfoItemExtractor((JsonObject) c, getUrl(), getTimeAgoParser());
                collector.commit(extractor);
            }
        }
    }

    @Override
-    public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException {
+    public void onFetchPage(@Nonnull final Downloader downloader) throws IOException, ExtractionException {
        final Map<String, List<String>> requestHeaders = new HashMap<>();
        requestHeaders.put("User-Agent", singletonList(USER_AGENT));
        final Response response = downloader.get(getUrl(), requestHeaders, getExtractorLocalization());
@ -138,8 +196,8 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
    }


-    private String makeAjaxRequest(String siteUrl) throws IOException, ReCaptchaException {
-        Map<String, List<String>> requestHeaders = new HashMap<>();
+    private String makeAjaxRequest(final String siteUrl) throws IOException, ReCaptchaException {
+        final Map<String, List<String>> requestHeaders = new HashMap<>();
        requestHeaders.put("Accept", singletonList("*/*"));
        requestHeaders.put("User-Agent", singletonList(USER_AGENT));
        requestHeaders.put("X-YouTube-Client-Version", singletonList(ytClientVersion));
@ -147,14 +205,15 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
        return getDownloader().get(siteUrl, requestHeaders, getExtractorLocalization()).responseBody();
    }

-    private String getDataString(Map<String, String> params) throws UnsupportedEncodingException {
-        StringBuilder result = new StringBuilder();
+    private String getDataString(final Map<String, String> params) throws UnsupportedEncodingException {
+        final StringBuilder result = new StringBuilder();
        boolean first = true;
-        for (Map.Entry<String, String> entry : params.entrySet()) {
-            if (first)
+        for (final Map.Entry<String, String> entry : params.entrySet()) {
+            if (first) {
                first = false;
-            else
+            } else {
                result.append("&");
+            }
            result.append(URLEncoder.encode(entry.getKey(), UTF_8));
            result.append("=");
            result.append(URLEncoder.encode(entry.getValue(), UTF_8));
@ -163,8 +222,28 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
    }

    private String findValue(final String doc, final String start, final String end) {
-        final int beginIndex = doc.indexOf(start) + start.length();
+        int beginIndex = doc.indexOf(start);
+        // Start string was not found
+        if (beginIndex == -1) {
+            return null;
+        }
+        beginIndex = beginIndex + start.length();
        final int endIndex = doc.indexOf(end, beginIndex);
+        // End string was not found
+        if (endIndex == -1) {
+            return null;
+        }
        return doc.substring(beginIndex, endIndex);
    }
+
+    @Override
+    public boolean isCommentsDisabled() {
+        // Check if commentsDisabled has to be initialized
+        if (!optCommentsDisabled.isPresent()) {
+            // Initialize commentsDisabled
+            this.findInitialCommentsToken();
+        }
+
+        return optCommentsDisabled.get();
+    }
 }