From aff3e795f849b0f74347a38cebac61e2bde6dbb1 Mon Sep 17 00:00:00 2001 From: TobiGr Date: Sun, 16 Apr 2023 23:13:25 +0200 Subject: [PATCH] [PeerTube] Fix multi level comment replies --- .../org/schabi/newpipe/extractor/Page.java | 4 + .../extractors/PeertubeCommentsExtractor.java | 79 +++++++++++-------- .../PeertubeCommentsInfoItemExtractor.java | 55 ++++++++++--- .../PeertubeCommentsExtractorTest.java | 53 ++++++++++++- 4 files changed, 148 insertions(+), 43 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/Page.java b/extractor/src/main/java/org/schabi/newpipe/extractor/Page.java index e1b19e7fb..50b6cdd6b 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/Page.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/Page.java @@ -37,6 +37,10 @@ public class Page implements Serializable { this(url, id, null, null, null); } + public Page(final String url, final String id, final byte[] body) { + this(url, id, null, null, body); + } + public Page(final String url, final byte[] body) { this(url, null, null, null, body); } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeCommentsExtractor.java index 4b342c21d..220b5a7e5 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeCommentsExtractor.java @@ -3,6 +3,7 @@ package org.schabi.newpipe.extractor.services.peertube.extractors; import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; +import com.grack.nanojson.JsonParserException; import org.schabi.newpipe.extractor.Page; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.comments.CommentsExtractor; @@ -17,6 +18,7 @@ import org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper; import org.schabi.newpipe.extractor.utils.Utils; import java.io.IOException; +import java.nio.charset.StandardCharsets; import static org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper.COUNT_KEY; import static org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper.ITEMS_PER_PAGE; @@ -26,6 +28,9 @@ import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; import javax.annotation.Nonnull; public class PeertubeCommentsExtractor extends CommentsExtractor { + static final String CHILDREN = "children"; + private static final String IS_DELETED = "isDeleted"; + private static final String TOTAL = "total"; /** * Use {@link #isReply()} to access this variable. @@ -49,7 +54,7 @@ public class PeertubeCommentsExtractor extends CommentsExtractor { } } - private boolean isReply() throws ParsingException { + boolean isReply() throws ParsingException { if (isReply == null) { if (getOriginalUrl().contains("/videos/watch/")) { isReply = false; @@ -67,8 +72,8 @@ public class PeertubeCommentsExtractor extends CommentsExtractor { for (final Object c : contents) { if (c instanceof JsonObject) { final JsonObject item = (JsonObject) c; - if (!item.getBoolean("isDeleted")) { - collector.commit(new PeertubeCommentsInfoItemExtractor(item, this)); + if (!item.getBoolean(IS_DELETED)) { + collector.commit(new PeertubeCommentsInfoItemExtractor(item, null, this)); } } } @@ -76,13 +81,15 @@ public class PeertubeCommentsExtractor extends CommentsExtractor { private void collectRepliesFrom(@Nonnull final CommentsInfoItemsCollector collector, @Nonnull final JsonObject json) throws ParsingException { - final JsonArray contents = json.getArray("children"); + final JsonArray contents = json.getArray(CHILDREN); for (final Object c : contents) { if (c instanceof JsonObject) { - final JsonObject item = ((JsonObject) c).getObject("comment"); - if (!item.getBoolean("isDeleted")) { - collector.commit(new PeertubeCommentsInfoItemExtractor(item, this)); + final JsonObject content = (JsonObject) c; + final JsonObject item = content.getObject("comment"); + final JsonArray children = content.getArray(CHILDREN); + if (!item.getBoolean(IS_DELETED)) { + collector.commit(new PeertubeCommentsInfoItemExtractor(item, children, this)); } } } @@ -95,36 +102,46 @@ public class PeertubeCommentsExtractor extends CommentsExtractor { throw new IllegalArgumentException("Page doesn't contain an URL"); } - final Response response = getDownloader().get(page.getUrl()); - JsonObject json = null; - if (response != null && !Utils.isBlank(response.responseBody())) { - try { - json = JsonParser.object().from(response.responseBody()); - } catch (final Exception e) { - throw new ParsingException("Could not parse json data for comments info", e); + final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId()); + final long total; + if (page.getBody() == null) { + final Response response = getDownloader().get(page.getUrl()); + if (response != null && !Utils.isBlank(response.responseBody())) { + try { + json = JsonParser.object().from(response.responseBody()); + } catch (final Exception e) { + throw new ParsingException("Could not parse json data for comments info", e); + } } - } - - if (json != null) { - PeertubeParsingHelper.validate(json); - final long total; - final CommentsInfoItemsCollector collector - = new CommentsInfoItemsCollector(getServiceId()); - - if (isReply() || json.has("children")) { - total = json.getArray("children").size(); - collectRepliesFrom(collector, json); + if (json != null) { + PeertubeParsingHelper.validate(json); + if (isReply() || json.has(CHILDREN)) { + total = json.getArray(CHILDREN).size(); + collectRepliesFrom(collector, json); + } else { + total = json.getLong(TOTAL); + collectCommentsFrom(collector, json); + } } else { - total = json.getLong("total"); - collectCommentsFrom(collector, json); + throw new ExtractionException("Unable to get PeerTube kiosk info"); } - - return new InfoItemsPage<>(collector, - PeertubeParsingHelper.getNextPage(page.getUrl(), total)); } else { - throw new ExtractionException("Unable to get PeerTube kiosk info"); + try { + json = JsonParser.object().from(new String(page.getBody(), StandardCharsets.UTF_8)); + isReply = true; + total = json.getArray(CHILDREN).size(); + collectRepliesFrom(collector, json); + } catch (final JsonParserException e) { + throw new ParsingException( + "Could not parse json data for nested comments info", e); + } } + + return new InfoItemsPage<>(collector, + PeertubeParsingHelper.getNextPage(page.getUrl(), total)); + + } @Override diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeCommentsInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeCommentsInfoItemExtractor.java index 42cb02030..9b1de3dd7 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeCommentsInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeCommentsInfoItemExtractor.java @@ -1,7 +1,9 @@ package org.schabi.newpipe.extractor.services.peertube.extractors; +import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; +import com.grack.nanojson.JsonWriter; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.schabi.newpipe.extractor.Page; @@ -13,20 +15,36 @@ import org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper; import org.schabi.newpipe.extractor.stream.Description; import org.schabi.newpipe.extractor.utils.JsonUtils; +import javax.annotation.Nonnull; import javax.annotation.Nullable; +import java.nio.charset.StandardCharsets; import java.util.Objects; -public class PeertubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor { - private final JsonObject item; - private final String url; - private final String baseUrl; +import static org.schabi.newpipe.extractor.services.peertube.extractors.PeertubeCommentsExtractor.CHILDREN; - public PeertubeCommentsInfoItemExtractor(final JsonObject item, - final PeertubeCommentsExtractor extractor) +public class PeertubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor { + @Nonnull + private final JsonObject item; + @Nullable + private final JsonArray children; + @Nonnull + private final String url; + @Nonnull + private final String baseUrl; + @Nonnull + private final PeertubeCommentsExtractor superCommentExtractor; + + private Integer replyCount; + + public PeertubeCommentsInfoItemExtractor(@Nonnull final JsonObject item, + @Nullable final JsonArray children, + @Nonnull final PeertubeCommentsExtractor extractor) throws ParsingException { this.item = item; + this.children = children; this.url = extractor.getUrl(); this.baseUrl = extractor.getBaseUrl(); + this.superCommentExtractor = extractor; } @Override @@ -107,15 +125,34 @@ public class PeertubeCommentsInfoItemExtractor implements CommentsInfoItemExtrac @Override @Nullable public Page getReplies() throws ParsingException { - if (JsonUtils.getNumber(item, "totalReplies").intValue() == 0) { + if (getReplyCount() == 0) { return null; } final String threadId = JsonUtils.getNumber(item, "threadId").toString(); - return new Page(url + "/" + threadId, threadId); + final String repliesUrl = url + "/" + threadId; + if (superCommentExtractor.isReply() && children != null && !children.isEmpty()) { + // Nested replies are already included in the original thread's request. + // Wrap the replies into a JsonObject, because the original thread's request body + // is also structured like a JsonObject. + final JsonObject pageContent = new JsonObject(); + pageContent.put(CHILDREN, children); + return new Page(repliesUrl, threadId, + JsonWriter.string(pageContent).getBytes(StandardCharsets.UTF_8)); + } + return new Page(repliesUrl, threadId); } @Override public int getReplyCount() throws ParsingException { - return JsonUtils.getNumber(item, "totalReplies").intValue(); + if (replyCount == null) { + if (children != null && !children.isEmpty()) { + // The totalReplies field is inaccurate for nested replies and sometimes returns 0 + // although there are replies to that reply stored in children. + replyCount = children.size(); + } else { + replyCount = JsonUtils.getNumber(item, "totalReplies").intValue(); + } + } + return replyCount; } } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/peertube/PeertubeCommentsExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/peertube/PeertubeCommentsExtractorTest.java index f7f045a26..353e00482 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/peertube/PeertubeCommentsExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/peertube/PeertubeCommentsExtractorTest.java @@ -14,10 +14,9 @@ import org.schabi.newpipe.extractor.utils.Utils; import java.io.IOException; import java.util.List; +import java.util.Optional; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.*; import static org.schabi.newpipe.extractor.ServiceList.PeerTube; public class PeertubeCommentsExtractorTest { @@ -121,4 +120,52 @@ public class PeertubeCommentsExtractorTest { assertTrue(commentsInfo.getErrors().isEmpty()); } } + + /** + * Test a video that has comments with nested replies. + */ + public static class NestedComments { + private static PeertubeCommentsExtractor extractor; + + @BeforeAll + public static void setUp() throws Exception { + NewPipe.init(DownloaderTestImpl.getInstance()); + extractor = (PeertubeCommentsExtractor) PeerTube + .getCommentsExtractor("https://share.tube/w/vxu4uTstUBAUromWwXGHrq"); + } + + @Test + void testGetComments() throws IOException, ExtractionException { + final InfoItemsPage comments = extractor.getInitialPage(); + assertFalse(comments.getItems().isEmpty()); + final Optional nestedCommentHeadOpt = + comments.getItems() + .stream() + .filter(c -> c.getCommentId().equals("9770")) + .findFirst(); + assertTrue(nestedCommentHeadOpt.isPresent()); + assertTrue(findNestedCommentWithId("9773", nestedCommentHeadOpt.get()), "The nested comment replies were not found"); + } + } + + private static boolean findNestedCommentWithId(final String id, final CommentsInfoItem comment) + throws IOException, ExtractionException { + if (comment.getCommentId().equals(id)) { + return true; + } + return PeerTube + .getCommentsExtractor(comment.getUrl()) + .getPage(comment.getReplies()) + .getItems() + .stream() + .map(c -> { + try { + return findNestedCommentWithId(id, c); + } catch (final Exception ignored) { + return false; + } + }) + .reduce((a, b) -> a || b) + .orElse(false); + } }