NewPipeExtractor/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java

package org.schabi.newpipe.extractor.services.youtube.extractors;

import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonWriter;
import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
import org.schabi.newpipe.extractor.comments.CommentsInfoItem;
import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector;
import org.schabi.newpipe.extractor.downloader.Downloader;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
import org.schabi.newpipe.extractor.localization.Localization;
import org.schabi.newpipe.extractor.utils.JsonUtils;
import org.schabi.newpipe.extractor.utils.Utils;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.List;

import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;

public class YoutubeCommentsExtractor extends CommentsExtractor {

    /**
     * Whether comments are disabled on video.
     */
    private boolean commentsDisabled;

    /**
     * The second ajax <b>/next</b> response.
     */
    private JsonObject ajaxJson;

    public YoutubeCommentsExtractor(
            final StreamingService service,
            final ListLinkHandler uiHandler) {
        super(service, uiHandler);
    }

    @Nonnull
    @Override
    public InfoItemsPage<CommentsInfoItem> getInitialPage()
            throws IOException, ExtractionException {

        if (commentsDisabled) {
            return getInfoItemsPageForDisabledComments();
        }

        return extractComments(ajaxJson);
    }

    /**
     * Finds the initial comments token and initializes commentsDisabled.
     * <br/>
     * Also sets {@link #commentsDisabled}.
     *
     * @return the continuation token or null if none was found
     */
    @Nullable
    private String findInitialCommentsToken(final JsonObject nextResponse) {
        final JsonArray contents = getJsonContents(nextResponse);

        // For videos where comments are unavailable, this would be null
        if (contents == null) {
            return null;
        }

        final String token = contents
                .stream()
                // Only use JsonObjects
                .filter(JsonObject.class::isInstance)
                .map(JsonObject.class::cast)
                // Check if the comment-section is present
                .filter(jObj -> {
                    try {
                        return "comments-section".equals(
                                JsonUtils.getString(jObj, "itemSectionRenderer.targetId"));
                    } catch (final ParsingException ignored) {
                        return false;
                    }
                })
                .findFirst()
                // Extract the token (or null in case of error)
                .map(itemSectionRenderer -> {
                    try {
                        return JsonUtils.getString(
                                itemSectionRenderer
                                        .getObject("itemSectionRenderer")
                                        .getArray("contents").getObject(0),
                                "continuationItemRenderer.continuationEndpoint"
                                        + ".continuationCommand.token");
                    } catch (final ParsingException ignored) {
                        return null;
                    }
                })
                .orElse(null);

        // The comments are disabled if we couldn't get a token
        commentsDisabled = token == null;

        return token;
    }

    @Nullable
    private JsonArray getJsonContents(final JsonObject nextResponse) {
        try {
            return JsonUtils.getArray(nextResponse,
                    "contents.twoColumnWatchNextResults.results.results.contents");
        } catch (final ParsingException e) {
            return null;
        }
    }

    @Nonnull
    private InfoItemsPage<CommentsInfoItem> getInfoItemsPageForDisabledComments() {
        return new InfoItemsPage<>(Collections.emptyList(), null, Collections.emptyList());
    }

    @Nullable
    private Page getNextPage(@Nonnull final JsonObject jsonObject) throws ExtractionException {
        final JsonArray onResponseReceivedEndpoints =
                jsonObject.getArray("onResponseReceivedEndpoints");

        // Prevent ArrayIndexOutOfBoundsException
        if (onResponseReceivedEndpoints.isEmpty()) {
            return null;
        }

        final JsonArray continuationItemsArray;
        try {
            final JsonObject endpoint = onResponseReceivedEndpoints
                    .getObject(onResponseReceivedEndpoints.size() - 1);
            continuationItemsArray = endpoint
                    .getObject("reloadContinuationItemsCommand",
                            endpoint.getObject("appendContinuationItemsAction"))
                    .getArray("continuationItems");
        } catch (final Exception e) {
            return null;
        }
        // Prevent ArrayIndexOutOfBoundsException
        if (continuationItemsArray.isEmpty()) {
            return null;
        }

        final JsonObject continuationItemRenderer = continuationItemsArray
                .getObject(continuationItemsArray.size() - 1)
                .getObject("continuationItemRenderer");

        final String jsonPath = continuationItemRenderer.has("button")
                ? "button.buttonRenderer.command.continuationCommand.token"
                : "continuationEndpoint.continuationCommand.token";

        final String continuation;
        try {
            continuation = JsonUtils.getString(continuationItemRenderer, jsonPath);
        } catch (final Exception e) {
            return null;
        }
        return getNextPage(continuation);
    }

    @Nonnull
    private Page getNextPage(final String continuation) throws ParsingException {
        return new Page(getUrl(), continuation); // URL is ignored tho
    }

    @Override
    public InfoItemsPage<CommentsInfoItem> getPage(final Page page)
            throws IOException, ExtractionException {

        if (commentsDisabled) {
            return getInfoItemsPageForDisabledComments();
        }

        if (page == null || isNullOrEmpty(page.getId())) {
            throw new IllegalArgumentException("Page doesn't have the continuation.");
        }

        final Localization localization = getExtractorLocalization();
        // @formatter:off
        final byte[] body = JsonWriter.string(
                prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
                    .value("continuation", page.getId())
                    .done())
                .getBytes(StandardCharsets.UTF_8);
        // @formatter:on

        final JsonObject jsonObject = getJsonPostResponse("next", body, localization);

        return extractComments(jsonObject);
    }

    private InfoItemsPage<CommentsInfoItem> extractComments(final JsonObject jsonObject)
            throws ExtractionException {
        final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(
                getServiceId());
        collectCommentsFrom(collector, jsonObject);
        return new InfoItemsPage<>(collector, getNextPage(jsonObject));
    }

    private void collectCommentsFrom(final CommentsInfoItemsCollector collector,
                                     final JsonObject jsonObject)
            throws ParsingException {

        final JsonArray onResponseReceivedEndpoints =
                jsonObject.getArray("onResponseReceivedEndpoints");
        // Prevent ArrayIndexOutOfBoundsException
        if (onResponseReceivedEndpoints.isEmpty()) {
            return;
        }
        final JsonObject commentsEndpoint =
                onResponseReceivedEndpoints.getObject(onResponseReceivedEndpoints.size() - 1);

        final String path;

        if (commentsEndpoint.has("reloadContinuationItemsCommand")) {
            path = "reloadContinuationItemsCommand.continuationItems";
        } else if (commentsEndpoint.has("appendContinuationItemsAction")) {
            path = "appendContinuationItemsAction.continuationItems";
        } else {
            // No comments
            return;
        }

        final JsonArray contents;
        try {
            contents = new JsonArray(JsonUtils.getArray(commentsEndpoint, path));
        } catch (final Exception e) {
            // No comments
            return;
        }

        final int index = contents.size() - 1;
        if (!contents.isEmpty() && contents.getObject(index).has("continuationItemRenderer")) {
            contents.remove(index);
        }

        final String jsonKey = contents.getObject(0).has("commentThreadRenderer")
                ? "commentThreadRenderer"
                : "commentRenderer";

        final List<Object> comments;
        try {
            comments = JsonUtils.getValues(contents, jsonKey);
        } catch (final Exception e) {
            throw new ParsingException("Unable to get parse youtube comments", e);
        }

        final String url = getUrl();
        comments.stream()
                .filter(JsonObject.class::isInstance)
                .map(JsonObject.class::cast)
                .map(jObj -> new YoutubeCommentsInfoItemExtractor(jObj, url, getTimeAgoParser()))
                .forEach(collector::commit);
    }

    @Override
    public void onFetchPage(@Nonnull final Downloader downloader)
            throws IOException, ExtractionException {
        final Localization localization = getExtractorLocalization();
        // @formatter:off
        final byte[] body = JsonWriter.string(
                prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
                    .value("videoId", getId())
                    .done())
                .getBytes(StandardCharsets.UTF_8);
        // @formatter:on

        final String initialToken =
                findInitialCommentsToken(getJsonPostResponse("next", body, localization));

        if (initialToken == null) {
            return;
        }

        // @formatter:off
        final byte[] ajaxBody = JsonWriter.string(
                        prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
                                .value("continuation", initialToken)
                                .done())
                .getBytes(StandardCharsets.UTF_8);
        // @formatter:on

        ajaxJson = getJsonPostResponse("next", ajaxBody, localization);
    }


    @Override
    public boolean isCommentsDisabled() {
        return commentsDisabled;
    }

    @Override
    public int getCommentsCount() throws ExtractionException {
        assertPageFetched();

        if (commentsDisabled) {
            return -1;
        }

        final JsonObject countText = ajaxJson
                .getArray("onResponseReceivedEndpoints").getObject(0)
                .getObject("reloadContinuationItemsCommand")
                .getArray("continuationItems").getObject(0)
                .getObject("commentsHeaderRenderer")
                .getObject("countText");

        try {
            return Integer.parseInt(
                    Utils.removeNonDigitCharacters(getTextFromObject(countText))
            );
        } catch (final Exception e) {
            throw new ExtractionException("Unable to get comments count", e);
        }
    }
}