From 688a1c316bb40c3536c2a003a07ad7e8462eef26 Mon Sep 17 00:00:00 2001 From: litetex <40789489+litetex@users.noreply.github.com> Date: Sun, 13 Jun 2021 21:11:11 +0200 Subject: [PATCH] Fixed Exception when YT comments are disabled and added ``commentsDisabled`` field * Fixed code: Added missing finals (according to NewPipes Checkstyle guide) * Fixed ``findValue`` method in ``YoutubeCommentsExtractor`` --- .../extractor/comments/CommentsInfo.java | 51 ++++--- .../extractors/YoutubeCommentsExtractor.java | 130 ++++++++++++++---- 2 files changed, 135 insertions(+), 46 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java index 49bbaf090..8803aa575 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java @@ -13,45 +13,56 @@ import java.io.IOException; public class CommentsInfo extends ListInfo { - private CommentsInfo(int serviceId, ListLinkHandler listUrlIdHandler, String name) { + private CommentsInfo( + final int serviceId, + final ListLinkHandler listUrlIdHandler, + final String name) { super(serviceId, listUrlIdHandler, name); } - public static CommentsInfo getInfo(String url) throws IOException, ExtractionException { + public static CommentsInfo getInfo(final String url) throws IOException, ExtractionException { return getInfo(NewPipe.getServiceByUrl(url), url); } - public static CommentsInfo getInfo(StreamingService serviceByUrl, String url) throws ExtractionException, IOException { + public static CommentsInfo getInfo(final StreamingService serviceByUrl, final String url) + throws ExtractionException, IOException { return getInfo(serviceByUrl.getCommentsExtractor(url)); } - public static CommentsInfo getInfo(CommentsExtractor commentsExtractor) throws IOException, ExtractionException { + public static CommentsInfo getInfo(final CommentsExtractor commentsExtractor) + throws IOException, ExtractionException { // for services which do not have a comments extractor - if (null == commentsExtractor) { + if (commentsExtractor == null) { return null; } commentsExtractor.fetchPage(); - String name = commentsExtractor.getName(); - int serviceId = commentsExtractor.getServiceId(); - ListLinkHandler listUrlIdHandler = commentsExtractor.getLinkHandler(); - CommentsInfo commentsInfo = new CommentsInfo(serviceId, listUrlIdHandler, name); + + final String name = commentsExtractor.getName(); + final int serviceId = commentsExtractor.getServiceId(); + final ListLinkHandler listUrlIdHandler = commentsExtractor.getLinkHandler(); + + final CommentsInfo commentsInfo = new CommentsInfo(serviceId, listUrlIdHandler, name); commentsInfo.setCommentsExtractor(commentsExtractor); - InfoItemsPage initialCommentsPage = ExtractorHelper.getItemsPageOrLogError(commentsInfo, - commentsExtractor); + final InfoItemsPage initialCommentsPage = + ExtractorHelper.getItemsPageOrLogError(commentsInfo, commentsExtractor); + commentsInfo.setCommentsDisabled(commentsExtractor.isCommentsDisabled()); commentsInfo.setRelatedItems(initialCommentsPage.getItems()); commentsInfo.setNextPage(initialCommentsPage.getNextPage()); return commentsInfo; } - public static InfoItemsPage getMoreItems(CommentsInfo commentsInfo, Page page) - throws ExtractionException, IOException { + public static InfoItemsPage getMoreItems( + final CommentsInfo commentsInfo, + final Page page) throws ExtractionException, IOException { return getMoreItems(NewPipe.getService(commentsInfo.getServiceId()), commentsInfo, page); } - public static InfoItemsPage getMoreItems(StreamingService service, CommentsInfo commentsInfo, - Page page) throws IOException, ExtractionException { + public static InfoItemsPage getMoreItems( + final StreamingService service, + final CommentsInfo commentsInfo, + final Page page) throws IOException, ExtractionException { if (null == commentsInfo.getCommentsExtractor()) { commentsInfo.setCommentsExtractor(service.getCommentsExtractor(commentsInfo.getUrl())); commentsInfo.getCommentsExtractor().fetchPage(); @@ -60,13 +71,21 @@ public class CommentsInfo extends ListInfo { } private transient CommentsExtractor commentsExtractor; + private boolean commentsDisabled = false; public CommentsExtractor getCommentsExtractor() { return commentsExtractor; } - public void setCommentsExtractor(CommentsExtractor commentsExtractor) { + public void setCommentsExtractor(final CommentsExtractor commentsExtractor) { this.commentsExtractor = commentsExtractor; } + public boolean isCommentsDisabled() { + return commentsDisabled; + } + + public void setCommentsDisabled(final boolean commentsDisabled) { + this.commentsDisabled = commentsDisabled; + } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java index 6c4a3c938..a4b225242 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -23,9 +23,11 @@ import javax.annotation.Nonnull; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.regex.Pattern; import static java.util.Collections.singletonList; @@ -41,53 +43,99 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { private String ytClientName; private String responseBody; - public YoutubeCommentsExtractor(StreamingService service, ListLinkHandler uiHandler) { + private Optional optCommentsDisabled = Optional.empty(); + + public YoutubeCommentsExtractor( + final StreamingService service, + final ListLinkHandler uiHandler) { super(service, uiHandler); } @Override - public InfoItemsPage getInitialPage() throws IOException, ExtractionException { - String commentsTokenInside = findValue(responseBody, "sectionListRenderer", "}"); - if (!commentsTokenInside.contains("continuation\":\"")) { - commentsTokenInside = findValue(responseBody, "commentSectionRenderer", "}"); + public InfoItemsPage getInitialPage() + throws IOException, ExtractionException { + + // Check if the the findInitialCommentsToken was already called and initialized + if (optCommentsDisabled.orElse(false)) { + return getInfoItemsPageForDisabledComments(); } - final String commentsToken = findValue(commentsTokenInside, "continuation\":\"", "\""); + + // Get the token + final String commentsToken = findInitialCommentsToken(); + // Check if the comments have been disabled + if (optCommentsDisabled.get()) { + return getInfoItemsPageForDisabledComments(); + } + return getPage(getNextPage(commentsToken)); } - private Page getNextPage(JsonObject ajaxJson) throws ParsingException { + /** + * Finds the initial comments token and initializes commentsDisabled. + * @return + */ + private String findInitialCommentsToken() { + final String continuationStartPattern = "continuation\":\""; + + String commentsTokenInside = findValue(responseBody, "sectionListRenderer", "}"); + if (commentsTokenInside == null || !commentsTokenInside.contains(continuationStartPattern)) { + commentsTokenInside = findValue(responseBody, "commentSectionRenderer", "}"); + } + + // If no continuation token is found the comments are disabled + if (commentsTokenInside == null || !commentsTokenInside.contains(continuationStartPattern)) { + optCommentsDisabled = Optional.of(true); + return null; + } + + // If a continuation token is found there are >= 0 comments + final String commentsToken = findValue(commentsTokenInside, continuationStartPattern, "\""); + + optCommentsDisabled = Optional.of(false); + + return commentsToken; + } + + private InfoItemsPage getInfoItemsPageForDisabledComments() { + return new InfoItemsPage<>(Collections.emptyList(), null, Collections.emptyList()); + } + + private Page getNextPage(final JsonObject ajaxJson) throws ParsingException { final JsonArray arr; try { arr = JsonUtils.getArray(ajaxJson, "response.continuationContents.commentSectionContinuation.continuations"); - } catch (Exception e) { + } catch (final Exception e) { return null; } if (arr.isEmpty()) { return null; } - String continuation; + final String continuation; try { continuation = JsonUtils.getString(arr.getObject(0), "nextContinuationData.continuation"); - } catch (Exception e) { + } catch (final Exception e) { return null; } return getNextPage(continuation); } - private Page getNextPage(String continuation) throws ParsingException { - Map params = new HashMap<>(); + private Page getNextPage(final String continuation) throws ParsingException { + final Map params = new HashMap<>(); params.put("action_get_comments", "1"); params.put("pbj", "1"); params.put("ctoken", continuation); try { return new Page("https://m.youtube.com/watch_comment?" + getDataString(params)); - } catch (UnsupportedEncodingException e) { + } catch (final UnsupportedEncodingException e) { throw new ParsingException("Could not get next page url", e); } } @Override public InfoItemsPage getPage(final Page page) throws IOException, ExtractionException { + if (optCommentsDisabled.orElse(false)) { + return getInfoItemsPageForDisabledComments(); + } if (page == null || isNullOrEmpty(page.getUrl())) { throw new IllegalArgumentException("Page doesn't contain an URL"); } @@ -96,7 +144,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { final JsonObject ajaxJson; try { ajaxJson = JsonParser.array().from(ajaxResponse).getObject(1); - } catch (Exception e) { + } catch (final Exception e) { throw new ParsingException("Could not parse json data for comments", e); } final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId()); @@ -104,31 +152,32 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { return new InfoItemsPage<>(collector, getNextPage(ajaxJson)); } - private void collectCommentsFrom(CommentsInfoItemsCollector collector, JsonObject ajaxJson) throws ParsingException { - JsonArray contents; + private void collectCommentsFrom(final CommentsInfoItemsCollector collector, final JsonObject ajaxJson) throws ParsingException { + final JsonArray contents; try { contents = JsonUtils.getArray(ajaxJson, "response.continuationContents.commentSectionContinuation.items"); - } catch (Exception e) { + } catch (final Exception e) { //no comments return; } - List comments; + final List comments; try { comments = JsonUtils.getValues(contents, "commentThreadRenderer.comment.commentRenderer"); - } catch (Exception e) { + } catch (final Exception e) { throw new ParsingException("unable to get parse youtube comments", e); } - for (Object c : comments) { + for (final Object c : comments) { if (c instanceof JsonObject) { - CommentsInfoItemExtractor extractor = new YoutubeCommentsInfoItemExtractor((JsonObject) c, getUrl(), getTimeAgoParser()); + final CommentsInfoItemExtractor extractor = + new YoutubeCommentsInfoItemExtractor((JsonObject) c, getUrl(), getTimeAgoParser()); collector.commit(extractor); } } } @Override - public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException { + public void onFetchPage(@Nonnull final Downloader downloader) throws IOException, ExtractionException { final Map> requestHeaders = new HashMap<>(); requestHeaders.put("User-Agent", singletonList(USER_AGENT)); final Response response = downloader.get(getUrl(), requestHeaders, getExtractorLocalization()); @@ -138,8 +187,8 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { } - private String makeAjaxRequest(String siteUrl) throws IOException, ReCaptchaException { - Map> requestHeaders = new HashMap<>(); + private String makeAjaxRequest(final String siteUrl) throws IOException, ReCaptchaException { + final Map> requestHeaders = new HashMap<>(); requestHeaders.put("Accept", singletonList("*/*")); requestHeaders.put("User-Agent", singletonList(USER_AGENT)); requestHeaders.put("X-YouTube-Client-Version", singletonList(ytClientVersion)); @@ -147,14 +196,15 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { return getDownloader().get(siteUrl, requestHeaders, getExtractorLocalization()).responseBody(); } - private String getDataString(Map params) throws UnsupportedEncodingException { - StringBuilder result = new StringBuilder(); + private String getDataString(final Map params) throws UnsupportedEncodingException { + final StringBuilder result = new StringBuilder(); boolean first = true; - for (Map.Entry entry : params.entrySet()) { - if (first) + for (final Map.Entry entry : params.entrySet()) { + if (first) { first = false; - else + } else { result.append("&"); + } result.append(URLEncoder.encode(entry.getKey(), UTF_8)); result.append("="); result.append(URLEncoder.encode(entry.getValue(), UTF_8)); @@ -163,8 +213,28 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { } private String findValue(final String doc, final String start, final String end) { - final int beginIndex = doc.indexOf(start) + start.length(); + int beginIndex = doc.indexOf(start); + // Start string was not found + if (beginIndex == -1) { + return null; + } + beginIndex = beginIndex + start.length(); final int endIndex = doc.indexOf(end, beginIndex); + // End string was not found + if (endIndex == -1) { + return null; + } return doc.substring(beginIndex, endIndex); } + + @Override + public boolean isCommentsDisabled() { + // Check if commentsDisabled has to be initialized + if (!optCommentsDisabled.isPresent()) { + // Initialize commentsDisabled + this.findInitialCommentsToken(); + } + + return optCommentsDisabled.get(); + } }