diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampCommentsExtractor.java index 50797395a..5d38ea87d 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampCommentsExtractor.java @@ -1,9 +1,13 @@ package org.schabi.newpipe.extractor.services.bandcamp.extractors; +import static org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper.BASE_API_URL; + +import com.grack.nanojson.JsonArray; +import com.grack.nanojson.JsonObject; +import com.grack.nanojson.JsonWriter; + import org.jsoup.Jsoup; import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; import org.schabi.newpipe.extractor.Page; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.comments.CommentsExtractor; @@ -11,13 +15,22 @@ import org.schabi.newpipe.extractor.comments.CommentsInfoItem; import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector; import org.schabi.newpipe.extractor.downloader.Downloader; import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; +import org.schabi.newpipe.extractor.utils.JsonUtils; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import java.util.List; import javax.annotation.Nonnull; -import java.io.IOException; public class BandcampCommentsExtractor extends CommentsExtractor { + private static final String REVIEWS_API_URL = BASE_API_URL + "/tralbumcollectors/2/reviews"; + private Document document; @@ -39,19 +52,81 @@ public class BandcampCommentsExtractor extends CommentsExtractor { final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId()); - final Elements writings = document.getElementsByClass("writing"); + final JsonObject collectorsData = JsonUtils.toJsonObject( + document.getElementById("collectors-data").attr("data-blob")); + final JsonArray reviews = collectorsData.getArray("reviews"); - for (final Element writing : writings) { - collector.commit(new BandcampCommentsInfoItemExtractor(writing, getUrl())); + for (final Object review : reviews) { + collector.commit( + new BandcampCommentsInfoItemExtractor((JsonObject) review, getUrl())); } - return new InfoItemsPage<>(collector, null); + if (!collectorsData.getBoolean("more_reviews_available")) { + return new InfoItemsPage<>(collector, null); + } + + final String trackId = getTrackId(); + final String token = getNextPageToken(reviews); + return new InfoItemsPage<>(collector, new Page(List.of(trackId, token))); } @Override public InfoItemsPage getPage(final Page page) throws IOException, ExtractionException { - return null; + + final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId()); + + final List pageIds = page.getIds(); + final String trackId = pageIds.get(0); + final String token = pageIds.get(1); + final JsonObject reviewsData = fetchReviewsData(trackId, token); + final JsonArray reviews = reviewsData.getArray("results"); + + for (final Object review : reviews) { + collector.commit( + new BandcampCommentsInfoItemExtractor((JsonObject) review, getUrl())); + } + + if (!reviewsData.getBoolean("more_available")) { + return new InfoItemsPage<>(collector, null); + } + + return new InfoItemsPage<>(collector, + new Page(List.of(trackId, getNextPageToken(reviews)))); + } + + private JsonObject fetchReviewsData(final String trackId, final String token) + throws ParsingException { + try { + return JsonUtils.toJsonObject(getDownloader().postWithContentTypeJson( + REVIEWS_API_URL, + Collections.emptyMap(), + JsonWriter.string().object() + .value("tralbum_type", "t") + .value("tralbum_id", trackId) + .value("token", token) + .value("count", 7) + .array("exclude_fan_ids").end() + .end().done().getBytes(StandardCharsets.UTF_8)).responseBody()); + } catch (final IOException | ReCaptchaException e) { + throw new ParsingException("Could not fetch reviews", e); + } + } + + private String getNextPageToken(final JsonArray reviews) throws ParsingException { + return reviews.stream() + .filter(JsonObject.class::isInstance) + .map(JsonObject.class::cast) + .map(review -> review.getString("token")) + .reduce((a, b) -> b) // keep only the last element + .orElseThrow(() -> new ParsingException("Could not get token")); + } + + private String getTrackId() throws ParsingException { + final JsonObject pageProperties = JsonUtils.toJsonObject( + document.selectFirst("meta[name=bc-page-properties]") + .attr("content")); + return Long.toString(pageProperties.getLong("item_id")); } @Override diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampCommentsInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampCommentsInfoItemExtractor.java index d931738c9..e65b2cc4d 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampCommentsInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampCommentsInfoItemExtractor.java @@ -1,19 +1,20 @@ package org.schabi.newpipe.extractor.services.bandcamp.extractors; -import org.jsoup.nodes.Element; +import static org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper.getImageUrl; + +import com.grack.nanojson.JsonObject; + import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.stream.Description; -import java.util.Objects; - public class BandcampCommentsInfoItemExtractor implements CommentsInfoItemExtractor { - private final Element writing; + private final JsonObject review; private final String url; - public BandcampCommentsInfoItemExtractor(final Element writing, final String url) { - this.writing = writing; + public BandcampCommentsInfoItemExtractor(final JsonObject review, final String url) { + this.review = review; this.url = url; } @@ -29,31 +30,21 @@ public class BandcampCommentsInfoItemExtractor implements CommentsInfoItemExtrac @Override public String getThumbnailUrl() throws ParsingException { - return writing.getElementsByClass("thumb").attr("src"); + return getUploaderAvatarUrl(); } @Override public Description getCommentText() throws ParsingException { - final var text = writing.getElementsByClass("text").stream() - .filter(Objects::nonNull) - .map(Element::ownText) - .findFirst() - .orElseThrow(() -> new ParsingException("Could not get comment text")); - - return new Description(text, Description.PLAIN_TEXT); + return new Description(review.getString("why"), Description.PLAIN_TEXT); } @Override public String getUploaderName() throws ParsingException { - return writing.getElementsByClass("name").stream() - .filter(Objects::nonNull) - .map(Element::text) - .findFirst() - .orElseThrow(() -> new ParsingException("Could not get uploader name")); + return review.getString("name"); } @Override public String getUploaderAvatarUrl() { - return writing.getElementsByClass("thumb").attr("src"); + return getImageUrl(review.getLong("image_id"), false); } } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampCommentsExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampCommentsExtractorTest.java index 3d40d934f..2fd98e255 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampCommentsExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampCommentsExtractorTest.java @@ -37,6 +37,7 @@ public class BandcampCommentsExtractorTest { @Test public void testGetCommentsAllData() throws IOException, ExtractionException { ListExtractor.InfoItemsPage comments = extractor.getInitialPage(); + assertTrue(comments.hasNextPage()); DefaultTests.defaultTestListOfItems(Bandcamp, comments.getItems(), comments.getErrors()); for (CommentsInfoItem c : comments.getItems()) {