2018-08-20 00:52:19 +02:00
|
|
|
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
|
|
|
|
2022-12-08 01:41:49 +01:00
|
|
|
import com.grack.nanojson.JsonArray;
|
|
|
|
import com.grack.nanojson.JsonObject;
|
|
|
|
import com.grack.nanojson.JsonWriter;
|
2020-04-15 14:09:46 +02:00
|
|
|
import org.schabi.newpipe.extractor.Page;
|
2018-08-20 00:52:19 +02:00
|
|
|
import org.schabi.newpipe.extractor.StreamingService;
|
|
|
|
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
|
|
|
|
import org.schabi.newpipe.extractor.comments.CommentsInfoItem;
|
|
|
|
import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector;
|
2019-04-28 22:03:16 +02:00
|
|
|
import org.schabi.newpipe.extractor.downloader.Downloader;
|
2018-08-20 00:52:19 +02:00
|
|
|
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
|
|
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
|
|
|
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
2021-07-29 19:53:43 +02:00
|
|
|
import org.schabi.newpipe.extractor.localization.Localization;
|
2018-09-25 23:50:29 +02:00
|
|
|
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
2022-02-08 11:44:55 +01:00
|
|
|
import org.schabi.newpipe.extractor.utils.Utils;
|
2018-08-20 00:52:19 +02:00
|
|
|
|
2022-12-08 01:41:49 +01:00
|
|
|
import javax.annotation.Nonnull;
|
|
|
|
import javax.annotation.Nullable;
|
|
|
|
import java.io.IOException;
|
|
|
|
import java.nio.charset.StandardCharsets;
|
|
|
|
import java.util.Collections;
|
|
|
|
import java.util.List;
|
|
|
|
|
|
|
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
|
|
|
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
|
|
|
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
|
|
|
|
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
|
|
|
|
|
2018-08-20 00:52:19 +02:00
|
|
|
public class YoutubeCommentsExtractor extends CommentsExtractor {
|
2018-09-02 01:36:26 +02:00
|
|
|
|
2022-12-08 01:41:49 +01:00
|
|
|
/**
|
2022-12-08 12:44:47 +01:00
|
|
|
* Whether comments are disabled on video.
|
2022-12-08 01:41:49 +01:00
|
|
|
*/
|
2022-12-08 13:10:33 +01:00
|
|
|
private boolean commentsDisabled;
|
2022-12-08 01:41:49 +01:00
|
|
|
|
2022-02-10 16:43:02 +01:00
|
|
|
/**
|
|
|
|
* The second ajax <b>/next</b> response.
|
|
|
|
*/
|
2022-02-08 11:44:55 +01:00
|
|
|
private JsonObject ajaxJson;
|
2021-06-13 21:11:11 +02:00
|
|
|
|
|
|
|
public YoutubeCommentsExtractor(
|
|
|
|
final StreamingService service,
|
|
|
|
final ListLinkHandler uiHandler) {
|
2019-04-28 22:03:16 +02:00
|
|
|
super(service, uiHandler);
|
2018-09-02 01:36:26 +02:00
|
|
|
}
|
|
|
|
|
2021-07-29 19:53:43 +02:00
|
|
|
@Nonnull
|
2018-09-02 01:36:26 +02:00
|
|
|
@Override
|
2021-06-13 21:11:11 +02:00
|
|
|
public InfoItemsPage<CommentsInfoItem> getInitialPage()
|
|
|
|
throws IOException, ExtractionException {
|
|
|
|
|
2022-12-08 01:41:49 +01:00
|
|
|
if (commentsDisabled) {
|
2021-06-13 21:11:11 +02:00
|
|
|
return getInfoItemsPageForDisabledComments();
|
|
|
|
}
|
|
|
|
|
2022-12-08 12:44:47 +01:00
|
|
|
return extractComments(ajaxJson);
|
2021-06-13 21:11:11 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Finds the initial comments token and initializes commentsDisabled.
|
2022-02-21 19:55:33 +01:00
|
|
|
* <br/>
|
2022-12-08 01:41:49 +01:00
|
|
|
* Also sets {@link #commentsDisabled}.
|
2021-07-29 19:53:43 +02:00
|
|
|
*
|
2021-06-13 22:06:09 +02:00
|
|
|
* @return the continuation token or null if none was found
|
2021-06-13 21:11:11 +02:00
|
|
|
*/
|
2021-07-29 19:53:43 +02:00
|
|
|
@Nullable
|
2022-12-08 01:41:49 +01:00
|
|
|
private String findInitialCommentsToken(final JsonObject nextResponse)
|
|
|
|
throws ExtractionException {
|
2022-02-12 15:49:33 +01:00
|
|
|
final String token = JsonUtils.getArray(nextResponse,
|
2022-12-08 01:41:49 +01:00
|
|
|
"contents.twoColumnWatchNextResults.results.results.contents")
|
2022-02-12 15:49:33 +01:00
|
|
|
.stream()
|
|
|
|
// Only use JsonObjects
|
|
|
|
.filter(JsonObject.class::isInstance)
|
|
|
|
.map(JsonObject.class::cast)
|
|
|
|
// Check if the comment-section is present
|
|
|
|
.filter(jObj -> {
|
|
|
|
try {
|
|
|
|
return "comments-section".equals(
|
|
|
|
JsonUtils.getString(jObj, "itemSectionRenderer.targetId"));
|
2022-02-21 19:55:33 +01:00
|
|
|
} catch (final ParsingException ignored) {
|
2022-02-12 15:49:33 +01:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.findFirst()
|
|
|
|
// Extract the token (or null in case of error)
|
|
|
|
.map(itemSectionRenderer -> {
|
|
|
|
try {
|
|
|
|
return JsonUtils.getString(
|
|
|
|
itemSectionRenderer
|
|
|
|
.getObject("itemSectionRenderer")
|
|
|
|
.getArray("contents").getObject(0),
|
2022-03-18 15:09:06 +01:00
|
|
|
"continuationItemRenderer.continuationEndpoint"
|
|
|
|
+ ".continuationCommand.token");
|
2022-02-21 19:55:33 +01:00
|
|
|
} catch (final ParsingException ignored) {
|
2022-02-12 15:49:33 +01:00
|
|
|
return null;
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.orElse(null);
|
|
|
|
|
|
|
|
// The comments are disabled if we couldn't get a token
|
2022-12-08 01:41:49 +01:00
|
|
|
commentsDisabled = token == null;
|
2021-06-13 21:11:11 +02:00
|
|
|
|
2021-07-29 19:53:43 +02:00
|
|
|
return token;
|
2021-06-13 21:11:11 +02:00
|
|
|
}
|
|
|
|
|
2021-07-29 19:53:43 +02:00
|
|
|
@Nonnull
|
2021-06-13 21:11:11 +02:00
|
|
|
private InfoItemsPage<CommentsInfoItem> getInfoItemsPageForDisabledComments() {
|
|
|
|
return new InfoItemsPage<>(Collections.emptyList(), null, Collections.emptyList());
|
2018-09-02 01:36:26 +02:00
|
|
|
}
|
|
|
|
|
2021-07-29 19:53:43 +02:00
|
|
|
@Nullable
|
2022-12-08 01:41:49 +01:00
|
|
|
private Page getNextPage(@Nonnull final JsonObject jsonObject) throws ExtractionException {
|
2022-02-12 15:49:33 +01:00
|
|
|
final JsonArray onResponseReceivedEndpoints =
|
2022-12-08 01:41:49 +01:00
|
|
|
jsonObject.getArray("onResponseReceivedEndpoints");
|
2021-07-29 19:53:43 +02:00
|
|
|
|
2022-02-12 15:49:33 +01:00
|
|
|
// Prevent ArrayIndexOutOfBoundsException
|
|
|
|
if (onResponseReceivedEndpoints.isEmpty()) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
|
|
|
final JsonArray continuationItemsArray;
|
2018-09-25 23:50:29 +02:00
|
|
|
try {
|
2022-02-12 15:49:33 +01:00
|
|
|
final JsonObject endpoint = onResponseReceivedEndpoints
|
|
|
|
.getObject(onResponseReceivedEndpoints.size() - 1);
|
|
|
|
continuationItemsArray = endpoint
|
|
|
|
.getObject("reloadContinuationItemsCommand",
|
|
|
|
endpoint.getObject("appendContinuationItemsAction"))
|
|
|
|
.getArray("continuationItems");
|
2021-06-13 21:11:11 +02:00
|
|
|
} catch (final Exception e) {
|
2020-04-15 14:09:46 +02:00
|
|
|
return null;
|
2018-09-25 23:50:29 +02:00
|
|
|
}
|
2022-02-12 15:49:33 +01:00
|
|
|
// Prevent ArrayIndexOutOfBoundsException
|
|
|
|
if (continuationItemsArray.isEmpty()) {
|
2020-04-15 14:09:46 +02:00
|
|
|
return null;
|
2018-09-25 23:50:29 +02:00
|
|
|
}
|
2021-07-29 19:53:43 +02:00
|
|
|
|
2022-02-12 15:49:33 +01:00
|
|
|
final JsonObject continuationItemRenderer = continuationItemsArray
|
|
|
|
.getObject(continuationItemsArray.size() - 1)
|
|
|
|
.getObject("continuationItemRenderer");
|
2021-08-04 18:44:04 +02:00
|
|
|
|
2022-02-12 15:49:33 +01:00
|
|
|
final String jsonPath = continuationItemRenderer.has("button")
|
|
|
|
? "button.buttonRenderer.command.continuationCommand.token"
|
|
|
|
: "continuationEndpoint.continuationCommand.token";
|
2021-08-04 18:44:04 +02:00
|
|
|
|
2021-06-13 21:11:11 +02:00
|
|
|
final String continuation;
|
2018-09-25 23:50:29 +02:00
|
|
|
try {
|
2021-08-04 18:44:04 +02:00
|
|
|
continuation = JsonUtils.getString(continuationItemRenderer, jsonPath);
|
2021-06-13 21:11:11 +02:00
|
|
|
} catch (final Exception e) {
|
2020-04-15 14:09:46 +02:00
|
|
|
return null;
|
2018-09-02 01:36:26 +02:00
|
|
|
}
|
2020-04-15 14:09:46 +02:00
|
|
|
return getNextPage(continuation);
|
2018-09-02 01:36:26 +02:00
|
|
|
}
|
|
|
|
|
2021-07-29 19:53:43 +02:00
|
|
|
@Nonnull
|
2021-06-13 21:11:11 +02:00
|
|
|
private Page getNextPage(final String continuation) throws ParsingException {
|
2021-07-29 19:53:43 +02:00
|
|
|
return new Page(getUrl(), continuation); // URL is ignored tho
|
2018-09-02 01:36:26 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2021-07-29 19:53:43 +02:00
|
|
|
public InfoItemsPage<CommentsInfoItem> getPage(final Page page)
|
|
|
|
throws IOException, ExtractionException {
|
2022-12-08 01:41:49 +01:00
|
|
|
|
|
|
|
if (commentsDisabled) {
|
2021-06-13 21:11:11 +02:00
|
|
|
return getInfoItemsPageForDisabledComments();
|
|
|
|
}
|
2022-12-08 01:41:49 +01:00
|
|
|
|
2021-07-29 19:53:43 +02:00
|
|
|
if (page == null || isNullOrEmpty(page.getId())) {
|
|
|
|
throw new IllegalArgumentException("Page doesn't have the continuation.");
|
2020-05-11 15:25:18 +02:00
|
|
|
}
|
|
|
|
|
2021-07-29 19:53:43 +02:00
|
|
|
final Localization localization = getExtractorLocalization();
|
2022-12-08 01:41:49 +01:00
|
|
|
// @formatter:off
|
2022-02-12 15:49:33 +01:00
|
|
|
final byte[] body = JsonWriter.string(
|
|
|
|
prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
|
|
|
|
.value("continuation", page.getId())
|
|
|
|
.done())
|
|
|
|
.getBytes(StandardCharsets.UTF_8);
|
2022-12-08 01:41:49 +01:00
|
|
|
// @formatter:on
|
2021-07-29 19:53:43 +02:00
|
|
|
|
2023-01-24 22:39:08 +01:00
|
|
|
final JsonObject jsonObject = getJsonPostResponse("next", body, localization);
|
2021-07-29 19:53:43 +02:00
|
|
|
|
2022-12-08 12:44:47 +01:00
|
|
|
return extractComments(jsonObject);
|
|
|
|
}
|
|
|
|
|
|
|
|
private InfoItemsPage<CommentsInfoItem> extractComments(final JsonObject jsonObject)
|
|
|
|
throws ExtractionException {
|
2021-07-29 19:53:43 +02:00
|
|
|
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(
|
|
|
|
getServiceId());
|
2023-01-24 22:39:08 +01:00
|
|
|
collectCommentsFrom(collector, jsonObject);
|
2022-12-08 12:44:47 +01:00
|
|
|
return new InfoItemsPage<>(collector, getNextPage(jsonObject));
|
2018-09-02 01:36:26 +02:00
|
|
|
}
|
|
|
|
|
2023-01-24 22:39:08 +01:00
|
|
|
private void collectCommentsFrom(final CommentsInfoItemsCollector collector,
|
|
|
|
final JsonObject jsonObject)
|
2022-12-08 01:41:49 +01:00
|
|
|
throws ParsingException {
|
2021-07-29 19:53:43 +02:00
|
|
|
|
2022-02-12 15:49:33 +01:00
|
|
|
final JsonArray onResponseReceivedEndpoints =
|
2023-01-24 22:39:08 +01:00
|
|
|
jsonObject.getArray("onResponseReceivedEndpoints");
|
2022-02-12 15:49:33 +01:00
|
|
|
// Prevent ArrayIndexOutOfBoundsException
|
|
|
|
if (onResponseReceivedEndpoints.isEmpty()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
final JsonObject commentsEndpoint =
|
|
|
|
onResponseReceivedEndpoints.getObject(onResponseReceivedEndpoints.size() - 1);
|
2021-07-29 19:53:43 +02:00
|
|
|
|
|
|
|
final String path;
|
|
|
|
|
|
|
|
if (commentsEndpoint.has("reloadContinuationItemsCommand")) {
|
|
|
|
path = "reloadContinuationItemsCommand.continuationItems";
|
|
|
|
} else if (commentsEndpoint.has("appendContinuationItemsAction")) {
|
|
|
|
path = "appendContinuationItemsAction.continuationItems";
|
|
|
|
} else {
|
|
|
|
// No comments
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2021-06-13 21:11:11 +02:00
|
|
|
final JsonArray contents;
|
2018-09-26 00:51:58 +02:00
|
|
|
try {
|
2022-02-12 15:49:33 +01:00
|
|
|
contents = new JsonArray(JsonUtils.getArray(commentsEndpoint, path));
|
2021-06-13 21:11:11 +02:00
|
|
|
} catch (final Exception e) {
|
2021-07-29 19:53:43 +02:00
|
|
|
// No comments
|
2018-09-27 19:31:27 +02:00
|
|
|
return;
|
2018-09-26 00:51:58 +02:00
|
|
|
}
|
2021-07-29 19:53:43 +02:00
|
|
|
|
|
|
|
final int index = contents.size() - 1;
|
2022-02-12 15:49:33 +01:00
|
|
|
if (!contents.isEmpty() && contents.getObject(index).has("continuationItemRenderer")) {
|
2021-07-29 19:53:43 +02:00
|
|
|
contents.remove(index);
|
|
|
|
}
|
|
|
|
|
2022-02-12 15:49:33 +01:00
|
|
|
final String jsonKey = contents.getObject(0).has("commentThreadRenderer")
|
|
|
|
? "commentThreadRenderer"
|
|
|
|
: "commentRenderer";
|
2021-08-04 15:05:14 +02:00
|
|
|
|
2021-06-13 21:11:11 +02:00
|
|
|
final List<Object> comments;
|
2018-09-26 00:51:58 +02:00
|
|
|
try {
|
2021-09-11 21:39:10 +02:00
|
|
|
comments = JsonUtils.getValues(contents, jsonKey);
|
2021-06-13 21:11:11 +02:00
|
|
|
} catch (final Exception e) {
|
2021-07-29 19:53:43 +02:00
|
|
|
throw new ParsingException("Unable to get parse youtube comments", e);
|
2018-09-26 00:51:58 +02:00
|
|
|
}
|
2020-02-08 23:58:46 +01:00
|
|
|
|
2022-02-12 15:49:33 +01:00
|
|
|
final String url = getUrl();
|
|
|
|
comments.stream()
|
|
|
|
.filter(JsonObject.class::isInstance)
|
|
|
|
.map(JsonObject.class::cast)
|
|
|
|
.map(jObj -> new YoutubeCommentsInfoItemExtractor(jObj, url, getTimeAgoParser()))
|
|
|
|
.forEach(collector::commit);
|
2018-09-02 01:36:26 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2021-07-29 19:53:43 +02:00
|
|
|
public void onFetchPage(@Nonnull final Downloader downloader)
|
|
|
|
throws IOException, ExtractionException {
|
|
|
|
final Localization localization = getExtractorLocalization();
|
2022-12-08 01:41:49 +01:00
|
|
|
// @formatter:off
|
2022-02-12 15:49:33 +01:00
|
|
|
final byte[] body = JsonWriter.string(
|
|
|
|
prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
|
|
|
|
.value("videoId", getId())
|
|
|
|
.done())
|
|
|
|
.getBytes(StandardCharsets.UTF_8);
|
2022-12-08 01:41:49 +01:00
|
|
|
// @formatter:on
|
2018-09-02 01:36:26 +02:00
|
|
|
|
2022-12-08 12:44:47 +01:00
|
|
|
final String initialToken =
|
|
|
|
findInitialCommentsToken(getJsonPostResponse("next", body, localization));
|
|
|
|
|
2022-12-08 13:10:33 +01:00
|
|
|
if (initialToken == null) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2022-12-08 12:44:47 +01:00
|
|
|
// @formatter:off
|
|
|
|
final byte[] ajaxBody = JsonWriter.string(
|
|
|
|
prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
|
|
|
|
.value("continuation", initialToken)
|
|
|
|
.done())
|
|
|
|
.getBytes(StandardCharsets.UTF_8);
|
|
|
|
// @formatter:on
|
|
|
|
|
|
|
|
ajaxJson = getJsonPostResponse("next", ajaxBody, localization);
|
2018-09-02 01:36:26 +02:00
|
|
|
}
|
|
|
|
|
2021-06-13 21:11:11 +02:00
|
|
|
|
|
|
|
@Override
|
2022-12-08 01:41:49 +01:00
|
|
|
public boolean isCommentsDisabled() {
|
|
|
|
return commentsDisabled;
|
2021-06-13 21:11:11 +02:00
|
|
|
}
|
2022-02-08 11:44:55 +01:00
|
|
|
|
|
|
|
@Override
|
|
|
|
public int getCommentsCount() throws ExtractionException {
|
2022-12-08 12:44:47 +01:00
|
|
|
assertPageFetched();
|
|
|
|
|
2022-12-08 13:10:33 +01:00
|
|
|
if (commentsDisabled) {
|
|
|
|
return -1;
|
2022-02-10 16:43:02 +01:00
|
|
|
}
|
2022-12-08 12:44:47 +01:00
|
|
|
|
2022-12-08 13:10:33 +01:00
|
|
|
final JsonObject countText = ajaxJson
|
|
|
|
.getArray("onResponseReceivedEndpoints").getObject(0)
|
|
|
|
.getObject("reloadContinuationItemsCommand")
|
|
|
|
.getArray("continuationItems").getObject(0)
|
|
|
|
.getObject("commentsHeaderRenderer")
|
|
|
|
.getObject("countText");
|
|
|
|
|
|
|
|
try {
|
|
|
|
return Integer.parseInt(
|
|
|
|
Utils.removeNonDigitCharacters(getTextFromObject(countText))
|
|
|
|
);
|
|
|
|
} catch (final Exception e) {
|
|
|
|
throw new ExtractionException("Unable to get comments count", e);
|
|
|
|
}
|
2022-02-08 11:44:55 +01:00
|
|
|
}
|
2018-08-20 00:52:19 +02:00
|
|
|
}
|