2021-04-26 17:25:04 +02:00
|
|
|
package org.schabi.newpipe.extractor.services.bandcamp.extractors;
|
|
|
|
|
2023-02-23 16:56:40 +01:00
|
|
|
import static org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper.BASE_API_URL;
|
|
|
|
|
|
|
|
import com.grack.nanojson.JsonArray;
|
|
|
|
import com.grack.nanojson.JsonObject;
|
|
|
|
import com.grack.nanojson.JsonWriter;
|
|
|
|
|
2021-04-26 17:25:04 +02:00
|
|
|
import org.jsoup.Jsoup;
|
|
|
|
import org.jsoup.nodes.Document;
|
|
|
|
import org.schabi.newpipe.extractor.Page;
|
|
|
|
import org.schabi.newpipe.extractor.StreamingService;
|
|
|
|
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
|
|
|
|
import org.schabi.newpipe.extractor.comments.CommentsInfoItem;
|
|
|
|
import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector;
|
|
|
|
import org.schabi.newpipe.extractor.downloader.Downloader;
|
|
|
|
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
2023-02-23 16:56:40 +01:00
|
|
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
|
|
|
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
2021-04-26 17:25:04 +02:00
|
|
|
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
2023-02-23 16:56:40 +01:00
|
|
|
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
2021-04-26 17:25:04 +02:00
|
|
|
|
|
|
|
import java.io.IOException;
|
2023-02-23 16:56:40 +01:00
|
|
|
import java.nio.charset.StandardCharsets;
|
|
|
|
import java.util.Collections;
|
2023-02-28 17:24:40 +01:00
|
|
|
import java.util.List;
|
2023-02-23 16:56:40 +01:00
|
|
|
|
|
|
|
import javax.annotation.Nonnull;
|
2021-04-26 17:25:04 +02:00
|
|
|
|
|
|
|
public class BandcampCommentsExtractor extends CommentsExtractor {
|
|
|
|
|
2023-02-23 16:56:40 +01:00
|
|
|
private static final String REVIEWS_API_URL = BASE_API_URL + "/tralbumcollectors/2/reviews";
|
|
|
|
|
2021-04-26 17:25:04 +02:00
|
|
|
private Document document;
|
|
|
|
|
|
|
|
|
2022-03-18 17:50:25 +01:00
|
|
|
public BandcampCommentsExtractor(final StreamingService service,
|
|
|
|
final ListLinkHandler linkHandler) {
|
2021-04-26 17:25:04 +02:00
|
|
|
super(service, linkHandler);
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2022-03-18 17:50:25 +01:00
|
|
|
public void onFetchPage(@Nonnull final Downloader downloader)
|
|
|
|
throws IOException, ExtractionException {
|
|
|
|
document = Jsoup.parse(downloader.get(getLinkHandler().getUrl()).responseBody());
|
2021-04-26 17:25:04 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Nonnull
|
|
|
|
@Override
|
2022-03-18 17:50:25 +01:00
|
|
|
public InfoItemsPage<CommentsInfoItem> getInitialPage()
|
|
|
|
throws IOException, ExtractionException {
|
2021-04-26 17:25:04 +02:00
|
|
|
|
2022-03-18 17:50:25 +01:00
|
|
|
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
|
2021-04-26 17:25:04 +02:00
|
|
|
|
2023-02-23 16:56:40 +01:00
|
|
|
final JsonObject collectorsData = JsonUtils.toJsonObject(
|
|
|
|
document.getElementById("collectors-data").attr("data-blob"));
|
|
|
|
final JsonArray reviews = collectorsData.getArray("reviews");
|
2021-04-26 17:25:04 +02:00
|
|
|
|
2023-02-23 16:56:40 +01:00
|
|
|
for (final Object review : reviews) {
|
|
|
|
collector.commit(
|
|
|
|
new BandcampCommentsInfoItemExtractor((JsonObject) review, getUrl()));
|
2021-04-26 17:25:04 +02:00
|
|
|
}
|
|
|
|
|
2023-02-23 16:56:40 +01:00
|
|
|
if (!collectorsData.getBoolean("more_reviews_available")) {
|
|
|
|
return new InfoItemsPage<>(collector, null);
|
|
|
|
}
|
|
|
|
|
|
|
|
final String trackId = getTrackId();
|
|
|
|
final String token = getNextPageToken(reviews);
|
2023-02-28 17:24:40 +01:00
|
|
|
return new InfoItemsPage<>(collector, new Page(List.of(trackId, token)));
|
2021-04-26 17:25:04 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2022-03-18 17:50:25 +01:00
|
|
|
public InfoItemsPage<CommentsInfoItem> getPage(final Page page)
|
|
|
|
throws IOException, ExtractionException {
|
2023-02-23 16:56:40 +01:00
|
|
|
|
|
|
|
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
|
|
|
|
|
2023-02-28 17:24:40 +01:00
|
|
|
final List<String> pageIds = page.getIds();
|
|
|
|
final String trackId = pageIds.get(0);
|
|
|
|
final String token = pageIds.get(1);
|
2023-02-23 16:56:40 +01:00
|
|
|
final JsonObject reviewsData = fetchReviewsData(trackId, token);
|
|
|
|
final JsonArray reviews = reviewsData.getArray("results");
|
|
|
|
|
|
|
|
for (final Object review : reviews) {
|
|
|
|
collector.commit(
|
|
|
|
new BandcampCommentsInfoItemExtractor((JsonObject) review, getUrl()));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!reviewsData.getBoolean("more_available")) {
|
|
|
|
return new InfoItemsPage<>(collector, null);
|
|
|
|
}
|
|
|
|
|
2023-02-28 17:24:40 +01:00
|
|
|
return new InfoItemsPage<>(collector,
|
|
|
|
new Page(List.of(trackId, getNextPageToken(reviews))));
|
2023-02-23 16:56:40 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
private JsonObject fetchReviewsData(final String trackId, final String token)
|
|
|
|
throws ParsingException {
|
|
|
|
try {
|
|
|
|
return JsonUtils.toJsonObject(getDownloader().postWithContentTypeJson(
|
|
|
|
REVIEWS_API_URL,
|
|
|
|
Collections.emptyMap(),
|
|
|
|
JsonWriter.string().object()
|
|
|
|
.value("tralbum_type", "t")
|
|
|
|
.value("tralbum_id", trackId)
|
|
|
|
.value("token", token)
|
|
|
|
.value("count", 7)
|
|
|
|
.array("exclude_fan_ids").end()
|
|
|
|
.end().done().getBytes(StandardCharsets.UTF_8)).responseBody());
|
|
|
|
} catch (final IOException | ReCaptchaException e) {
|
|
|
|
throw new ParsingException("Could not fetch reviews", e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private String getNextPageToken(final JsonArray reviews) throws ParsingException {
|
2023-02-28 17:24:40 +01:00
|
|
|
return reviews.stream()
|
|
|
|
.filter(JsonObject.class::isInstance)
|
|
|
|
.map(JsonObject.class::cast)
|
|
|
|
.map(review -> review.getString("token"))
|
|
|
|
.reduce((a, b) -> b) // keep only the last element
|
2023-02-23 16:56:40 +01:00
|
|
|
.orElseThrow(() -> new ParsingException("Could not get token"));
|
|
|
|
}
|
|
|
|
|
|
|
|
private String getTrackId() throws ParsingException {
|
|
|
|
final JsonObject pageProperties = JsonUtils.toJsonObject(
|
|
|
|
document.selectFirst("meta[name=bc-page-properties]")
|
|
|
|
.attr("content"));
|
|
|
|
return Long.toString(pageProperties.getLong("item_id"));
|
2021-04-26 17:25:04 +02:00
|
|
|
}
|
|
|
|
}
|