mirror of
https://github.com/TeamNewPipe/NewPipeExtractor
synced 2024-11-30 05:51:39 +01:00
[Bandcamp] Support loading additional comments (#1030)
This commit is contained in:
parent
6bdd698c25
commit
5a9b6ed2e3
@ -1,9 +1,13 @@
|
||||
package org.schabi.newpipe.extractor.services.bandcamp.extractors;
|
||||
|
||||
import static org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper.BASE_API_URL;
|
||||
|
||||
import com.grack.nanojson.JsonArray;
|
||||
import com.grack.nanojson.JsonObject;
|
||||
import com.grack.nanojson.JsonWriter;
|
||||
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import org.schabi.newpipe.extractor.Page;
|
||||
import org.schabi.newpipe.extractor.StreamingService;
|
||||
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
|
||||
@ -11,13 +15,22 @@ import org.schabi.newpipe.extractor.comments.CommentsInfoItem;
|
||||
import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector;
|
||||
import org.schabi.newpipe.extractor.downloader.Downloader;
|
||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
||||
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
||||
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import java.io.IOException;
|
||||
|
||||
public class BandcampCommentsExtractor extends CommentsExtractor {
|
||||
|
||||
private static final String REVIEWS_API_URL = BASE_API_URL + "/tralbumcollectors/2/reviews";
|
||||
|
||||
private Document document;
|
||||
|
||||
|
||||
@ -39,19 +52,81 @@ public class BandcampCommentsExtractor extends CommentsExtractor {
|
||||
|
||||
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
|
||||
|
||||
final Elements writings = document.getElementsByClass("writing");
|
||||
final JsonObject collectorsData = JsonUtils.toJsonObject(
|
||||
document.getElementById("collectors-data").attr("data-blob"));
|
||||
final JsonArray reviews = collectorsData.getArray("reviews");
|
||||
|
||||
for (final Element writing : writings) {
|
||||
collector.commit(new BandcampCommentsInfoItemExtractor(writing, getUrl()));
|
||||
for (final Object review : reviews) {
|
||||
collector.commit(
|
||||
new BandcampCommentsInfoItemExtractor((JsonObject) review, getUrl()));
|
||||
}
|
||||
|
||||
return new InfoItemsPage<>(collector, null);
|
||||
if (!collectorsData.getBoolean("more_reviews_available")) {
|
||||
return new InfoItemsPage<>(collector, null);
|
||||
}
|
||||
|
||||
final String trackId = getTrackId();
|
||||
final String token = getNextPageToken(reviews);
|
||||
return new InfoItemsPage<>(collector, new Page(List.of(trackId, token)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public InfoItemsPage<CommentsInfoItem> getPage(final Page page)
|
||||
throws IOException, ExtractionException {
|
||||
return null;
|
||||
|
||||
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
|
||||
|
||||
final List<String> pageIds = page.getIds();
|
||||
final String trackId = pageIds.get(0);
|
||||
final String token = pageIds.get(1);
|
||||
final JsonObject reviewsData = fetchReviewsData(trackId, token);
|
||||
final JsonArray reviews = reviewsData.getArray("results");
|
||||
|
||||
for (final Object review : reviews) {
|
||||
collector.commit(
|
||||
new BandcampCommentsInfoItemExtractor((JsonObject) review, getUrl()));
|
||||
}
|
||||
|
||||
if (!reviewsData.getBoolean("more_available")) {
|
||||
return new InfoItemsPage<>(collector, null);
|
||||
}
|
||||
|
||||
return new InfoItemsPage<>(collector,
|
||||
new Page(List.of(trackId, getNextPageToken(reviews))));
|
||||
}
|
||||
|
||||
private JsonObject fetchReviewsData(final String trackId, final String token)
|
||||
throws ParsingException {
|
||||
try {
|
||||
return JsonUtils.toJsonObject(getDownloader().postWithContentTypeJson(
|
||||
REVIEWS_API_URL,
|
||||
Collections.emptyMap(),
|
||||
JsonWriter.string().object()
|
||||
.value("tralbum_type", "t")
|
||||
.value("tralbum_id", trackId)
|
||||
.value("token", token)
|
||||
.value("count", 7)
|
||||
.array("exclude_fan_ids").end()
|
||||
.end().done().getBytes(StandardCharsets.UTF_8)).responseBody());
|
||||
} catch (final IOException | ReCaptchaException e) {
|
||||
throw new ParsingException("Could not fetch reviews", e);
|
||||
}
|
||||
}
|
||||
|
||||
private String getNextPageToken(final JsonArray reviews) throws ParsingException {
|
||||
return reviews.stream()
|
||||
.filter(JsonObject.class::isInstance)
|
||||
.map(JsonObject.class::cast)
|
||||
.map(review -> review.getString("token"))
|
||||
.reduce((a, b) -> b) // keep only the last element
|
||||
.orElseThrow(() -> new ParsingException("Could not get token"));
|
||||
}
|
||||
|
||||
private String getTrackId() throws ParsingException {
|
||||
final JsonObject pageProperties = JsonUtils.toJsonObject(
|
||||
document.selectFirst("meta[name=bc-page-properties]")
|
||||
.attr("content"));
|
||||
return Long.toString(pageProperties.getLong("item_id"));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -1,19 +1,20 @@
|
||||
package org.schabi.newpipe.extractor.services.bandcamp.extractors;
|
||||
|
||||
import org.jsoup.nodes.Element;
|
||||
import static org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper.getImageUrl;
|
||||
|
||||
import com.grack.nanojson.JsonObject;
|
||||
|
||||
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.stream.Description;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
public class BandcampCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
|
||||
|
||||
private final Element writing;
|
||||
private final JsonObject review;
|
||||
private final String url;
|
||||
|
||||
public BandcampCommentsInfoItemExtractor(final Element writing, final String url) {
|
||||
this.writing = writing;
|
||||
public BandcampCommentsInfoItemExtractor(final JsonObject review, final String url) {
|
||||
this.review = review;
|
||||
this.url = url;
|
||||
}
|
||||
|
||||
@ -29,31 +30,21 @@ public class BandcampCommentsInfoItemExtractor implements CommentsInfoItemExtrac
|
||||
|
||||
@Override
|
||||
public String getThumbnailUrl() throws ParsingException {
|
||||
return writing.getElementsByClass("thumb").attr("src");
|
||||
return getUploaderAvatarUrl();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Description getCommentText() throws ParsingException {
|
||||
final var text = writing.getElementsByClass("text").stream()
|
||||
.filter(Objects::nonNull)
|
||||
.map(Element::ownText)
|
||||
.findFirst()
|
||||
.orElseThrow(() -> new ParsingException("Could not get comment text"));
|
||||
|
||||
return new Description(text, Description.PLAIN_TEXT);
|
||||
return new Description(review.getString("why"), Description.PLAIN_TEXT);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUploaderName() throws ParsingException {
|
||||
return writing.getElementsByClass("name").stream()
|
||||
.filter(Objects::nonNull)
|
||||
.map(Element::text)
|
||||
.findFirst()
|
||||
.orElseThrow(() -> new ParsingException("Could not get uploader name"));
|
||||
return review.getString("name");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUploaderAvatarUrl() {
|
||||
return writing.getElementsByClass("thumb").attr("src");
|
||||
return getImageUrl(review.getLong("image_id"), false);
|
||||
}
|
||||
}
|
||||
|
@ -37,6 +37,7 @@ public class BandcampCommentsExtractorTest {
|
||||
@Test
|
||||
public void testGetCommentsAllData() throws IOException, ExtractionException {
|
||||
ListExtractor.InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
|
||||
assertTrue(comments.hasNextPage());
|
||||
|
||||
DefaultTests.defaultTestListOfItems(Bandcamp, comments.getItems(), comments.getErrors());
|
||||
for (CommentsInfoItem c : comments.getItems()) {
|
||||
|
Loading…
Reference in New Issue
Block a user