2018-09-25 23:50:29 +02:00
|
|
|
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
|
|
|
|
2019-04-28 22:03:16 +02:00
|
|
|
import com.grack.nanojson.JsonObject;
|
2022-07-22 21:34:12 +02:00
|
|
|
|
|
|
|
import org.schabi.newpipe.extractor.Image;
|
2021-08-04 14:24:47 +02:00
|
|
|
import org.schabi.newpipe.extractor.Page;
|
2018-09-25 23:50:29 +02:00
|
|
|
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
|
|
|
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
2019-11-03 19:45:25 +01:00
|
|
|
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
2020-02-08 23:58:46 +01:00
|
|
|
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
|
2022-11-28 01:22:10 +01:00
|
|
|
import org.schabi.newpipe.extractor.stream.Description;
|
2018-09-25 23:50:29 +02:00
|
|
|
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
2019-02-28 19:40:29 +01:00
|
|
|
import org.schabi.newpipe.extractor.utils.Utils;
|
2018-09-25 23:50:29 +02:00
|
|
|
|
2022-07-22 21:34:12 +02:00
|
|
|
import javax.annotation.Nonnull;
|
2019-11-03 19:45:25 +01:00
|
|
|
import javax.annotation.Nullable;
|
2022-07-22 21:34:12 +02:00
|
|
|
import java.util.List;
|
2018-09-25 23:50:29 +02:00
|
|
|
|
2022-11-28 01:22:10 +01:00
|
|
|
import static org.schabi.newpipe.extractor.comments.CommentsInfoItem.UNKNOWN_REPLY_COUNT;
|
2022-07-22 21:34:12 +02:00
|
|
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getImagesFromThumbnailsArray;
|
2022-11-28 01:22:10 +01:00
|
|
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
|
|
|
|
|
2018-09-26 00:51:58 +02:00
|
|
|
public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
|
|
|
|
|
2018-09-25 23:50:29 +02:00
|
|
|
private final JsonObject json;
|
2021-08-04 15:05:14 +02:00
|
|
|
private JsonObject commentRenderer;
|
2018-09-25 23:50:29 +02:00
|
|
|
private final String url;
|
2019-04-28 22:03:16 +02:00
|
|
|
private final TimeAgoParser timeAgoParser;
|
2018-09-26 00:51:58 +02:00
|
|
|
|
2021-07-29 19:53:43 +02:00
|
|
|
public YoutubeCommentsInfoItemExtractor(final JsonObject json,
|
|
|
|
final String url,
|
|
|
|
final TimeAgoParser timeAgoParser) {
|
2018-09-25 23:50:29 +02:00
|
|
|
this.json = json;
|
|
|
|
this.url = url;
|
2019-04-28 22:03:16 +02:00
|
|
|
this.timeAgoParser = timeAgoParser;
|
2018-09-25 23:50:29 +02:00
|
|
|
}
|
|
|
|
|
2021-08-04 15:05:14 +02:00
|
|
|
private JsonObject getCommentRenderer() throws ParsingException {
|
2022-03-18 15:09:06 +01:00
|
|
|
if (commentRenderer == null) {
|
|
|
|
if (json.has("comment")) {
|
2021-08-04 17:33:00 +02:00
|
|
|
commentRenderer = JsonUtils.getObject(json, "comment.commentRenderer");
|
2022-03-18 15:09:06 +01:00
|
|
|
} else {
|
|
|
|
commentRenderer = json;
|
|
|
|
}
|
2021-08-04 17:33:00 +02:00
|
|
|
}
|
|
|
|
return commentRenderer;
|
2021-08-04 15:05:14 +02:00
|
|
|
}
|
|
|
|
|
2022-07-22 21:34:12 +02:00
|
|
|
@Nonnull
|
|
|
|
private List<Image> getAuthorThumbnails() throws ParsingException {
|
|
|
|
try {
|
|
|
|
return getImagesFromThumbnailsArray(JsonUtils.getArray(getCommentRenderer(),
|
|
|
|
"authorThumbnail.thumbnails"));
|
|
|
|
} catch (final Exception e) {
|
|
|
|
throw new ParsingException("Could not get author thumbnails", e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-25 23:50:29 +02:00
|
|
|
@Override
|
|
|
|
public String getUrl() throws ParsingException {
|
|
|
|
return url;
|
|
|
|
}
|
|
|
|
|
2022-07-22 21:34:12 +02:00
|
|
|
@Nonnull
|
2018-09-25 23:50:29 +02:00
|
|
|
@Override
|
2022-07-22 21:34:12 +02:00
|
|
|
public List<Image> getThumbnails() throws ParsingException {
|
|
|
|
return getAuthorThumbnails();
|
2018-09-25 23:50:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public String getName() throws ParsingException {
|
2018-09-26 00:51:58 +02:00
|
|
|
try {
|
2021-08-04 15:05:14 +02:00
|
|
|
return getTextFromObject(JsonUtils.getObject(getCommentRenderer(), "authorText"));
|
2021-07-29 19:53:43 +02:00
|
|
|
} catch (final Exception e) {
|
2022-08-15 05:49:40 +02:00
|
|
|
return "";
|
2018-09-26 00:51:58 +02:00
|
|
|
}
|
2018-09-25 23:50:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2020-04-21 19:49:17 +02:00
|
|
|
public String getTextualUploadDate() throws ParsingException {
|
2018-09-26 00:51:58 +02:00
|
|
|
try {
|
2022-03-18 15:09:06 +01:00
|
|
|
return getTextFromObject(JsonUtils.getObject(getCommentRenderer(),
|
|
|
|
"publishedTimeText"));
|
2021-07-29 19:53:43 +02:00
|
|
|
} catch (final Exception e) {
|
2018-09-26 00:51:58 +02:00
|
|
|
throw new ParsingException("Could not get publishedTimeText", e);
|
|
|
|
}
|
2018-09-25 23:50:29 +02:00
|
|
|
}
|
|
|
|
|
2019-11-03 19:45:25 +01:00
|
|
|
@Nullable
|
2018-09-25 23:50:29 +02:00
|
|
|
@Override
|
2020-04-21 19:49:17 +02:00
|
|
|
public DateWrapper getUploadDate() throws ParsingException {
|
2022-03-18 15:09:06 +01:00
|
|
|
final String textualPublishedTime = getTextualUploadDate();
|
2021-07-29 19:53:43 +02:00
|
|
|
if (timeAgoParser != null && textualPublishedTime != null
|
|
|
|
&& !textualPublishedTime.isEmpty()) {
|
2019-04-28 22:03:16 +02:00
|
|
|
return timeAgoParser.parse(textualPublishedTime);
|
|
|
|
} else {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-05-21 19:40:51 +02:00
|
|
|
/**
|
2021-07-29 19:53:43 +02:00
|
|
|
* @implNote The method tries first to get the exact like count by using the accessibility data
|
|
|
|
* returned. But if the parsing of this accessibility data fails, the method parses internally
|
|
|
|
* a localized string.
|
|
|
|
* <br>
|
2021-05-21 20:58:35 +02:00
|
|
|
* <ul>
|
2021-07-29 19:53:43 +02:00
|
|
|
* <li>More than 1k likes will result in an inaccurate number</li>
|
|
|
|
* <li>This will fail for other languages than English. However as long as the Extractor
|
|
|
|
* only uses "en-GB" (as seen in {@link
|
|
|
|
* org.schabi.newpipe.extractor.services.youtube.YoutubeService#getSupportedLocalizations})
|
|
|
|
* , everything will work fine.</li>
|
2021-05-21 20:58:35 +02:00
|
|
|
* </ul>
|
2021-06-08 09:26:01 +02:00
|
|
|
* <br>
|
2021-05-27 19:48:31 +02:00
|
|
|
* Consider using {@link #getTextualLikeCount()}
|
2021-05-21 19:40:51 +02:00
|
|
|
*/
|
|
|
|
@Override
|
|
|
|
public int getLikeCount() throws ParsingException {
|
2021-07-29 19:53:43 +02:00
|
|
|
// Try first to get the exact like count by using the accessibility data
|
|
|
|
final String likeCount;
|
|
|
|
try {
|
2021-08-04 15:05:14 +02:00
|
|
|
likeCount = Utils.removeNonDigitCharacters(JsonUtils.getString(getCommentRenderer(),
|
2022-03-18 15:09:06 +01:00
|
|
|
"actionButtons.commentActionButtonsRenderer.likeButton.toggleButtonRenderer"
|
|
|
|
+ ".accessibilityData.accessibilityData.label"));
|
2021-07-29 19:53:43 +02:00
|
|
|
} catch (final Exception e) {
|
|
|
|
// Use the approximate like count returned into the voteCount object
|
|
|
|
// This may return a language dependent version, e.g. in German: 3,3 Mio
|
|
|
|
final String textualLikeCount = getTextualLikeCount();
|
|
|
|
try {
|
|
|
|
if (Utils.isBlank(textualLikeCount)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return (int) Utils.mixedNumberWordToLong(textualLikeCount);
|
|
|
|
} catch (final Exception i) {
|
|
|
|
throw new ParsingException(
|
|
|
|
"Unexpected error while converting textual like count to like count", i);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-05-21 19:40:51 +02:00
|
|
|
try {
|
2021-07-29 19:53:43 +02:00
|
|
|
if (Utils.isBlank(likeCount)) {
|
2021-05-21 19:40:51 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-07-29 19:53:43 +02:00
|
|
|
return Integer.parseInt(likeCount);
|
|
|
|
} catch (final Exception e) {
|
|
|
|
throw new ParsingException("Unexpected error while parsing like count as Integer", e);
|
2021-05-21 19:40:51 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-04-28 22:03:16 +02:00
|
|
|
@Override
|
2021-05-27 19:48:31 +02:00
|
|
|
public String getTextualLikeCount() throws ParsingException {
|
2021-05-20 21:09:45 +02:00
|
|
|
/*
|
|
|
|
* Example results as of 2021-05-20:
|
2021-05-20 20:10:14 +02:00
|
|
|
* Language = English
|
|
|
|
* 3.3M
|
|
|
|
* 48K
|
|
|
|
* 1.4K
|
|
|
|
* 270K
|
|
|
|
* 19
|
|
|
|
* 6
|
|
|
|
*
|
|
|
|
* Language = German
|
|
|
|
* 3,3 Mio
|
|
|
|
* 48.189
|
|
|
|
* 1419
|
|
|
|
* 270.984
|
|
|
|
* 19
|
|
|
|
* 6
|
|
|
|
*/
|
2018-09-26 00:51:58 +02:00
|
|
|
try {
|
2021-05-20 22:25:14 +02:00
|
|
|
// If a comment has no likes voteCount is not set
|
2021-08-04 15:05:14 +02:00
|
|
|
if (!getCommentRenderer().has("voteCount")) {
|
2022-08-15 05:49:40 +02:00
|
|
|
return "";
|
2021-05-20 21:23:31 +02:00
|
|
|
}
|
|
|
|
|
2021-08-04 15:05:14 +02:00
|
|
|
final JsonObject voteCountObj = JsonUtils.getObject(getCommentRenderer(), "voteCount");
|
2021-05-21 19:51:56 +02:00
|
|
|
if (voteCountObj.isEmpty()) {
|
2022-08-15 05:49:40 +02:00
|
|
|
return "";
|
2021-05-20 20:10:14 +02:00
|
|
|
}
|
|
|
|
return getTextFromObject(voteCountObj);
|
2021-07-29 19:53:43 +02:00
|
|
|
} catch (final Exception e) {
|
|
|
|
throw new ParsingException("Could not get the vote count", e);
|
2018-09-26 00:51:58 +02:00
|
|
|
}
|
2018-09-25 23:50:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2022-11-28 01:22:10 +01:00
|
|
|
public Description getCommentText() throws ParsingException {
|
2018-09-25 23:50:29 +02:00
|
|
|
try {
|
2021-08-04 15:05:14 +02:00
|
|
|
final JsonObject contentText = JsonUtils.getObject(getCommentRenderer(), "contentText");
|
2020-08-05 18:25:35 +02:00
|
|
|
if (contentText.isEmpty()) {
|
|
|
|
// completely empty comments as described in
|
|
|
|
// https://github.com/TeamNewPipe/NewPipeExtractor/issues/380#issuecomment-668808584
|
2022-11-28 01:22:10 +01:00
|
|
|
return Description.EMPTY_DESCRIPTION;
|
2020-08-05 18:25:35 +02:00
|
|
|
}
|
2022-09-17 12:33:39 +02:00
|
|
|
final String commentText = getTextFromObject(contentText, true);
|
2021-07-29 19:53:43 +02:00
|
|
|
// YouTube adds U+FEFF in some comments.
|
|
|
|
// eg. https://www.youtube.com/watch?v=Nj4F63E59io<feff>
|
2022-11-28 01:22:10 +01:00
|
|
|
final String commentTextBomRemoved = Utils.removeUTF8BOM(commentText);
|
|
|
|
|
|
|
|
return new Description(commentTextBomRemoved, Description.HTML);
|
2021-07-29 19:53:43 +02:00
|
|
|
} catch (final Exception e) {
|
2018-09-28 01:24:57 +02:00
|
|
|
throw new ParsingException("Could not get comment text", e);
|
2018-09-25 23:50:29 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public String getCommentId() throws ParsingException {
|
2018-09-26 00:51:58 +02:00
|
|
|
try {
|
2021-08-04 15:05:14 +02:00
|
|
|
return JsonUtils.getString(getCommentRenderer(), "commentId");
|
2021-07-29 19:53:43 +02:00
|
|
|
} catch (final Exception e) {
|
2018-09-26 00:51:58 +02:00
|
|
|
throw new ParsingException("Could not get comment id", e);
|
|
|
|
}
|
2018-09-25 23:50:29 +02:00
|
|
|
}
|
|
|
|
|
2022-07-22 21:34:12 +02:00
|
|
|
@Nonnull
|
2018-09-25 23:50:29 +02:00
|
|
|
@Override
|
2022-07-22 21:34:12 +02:00
|
|
|
public List<Image> getUploaderAvatars() throws ParsingException {
|
|
|
|
return getAuthorThumbnails();
|
2018-09-25 23:50:29 +02:00
|
|
|
}
|
|
|
|
|
2021-01-11 18:38:54 +01:00
|
|
|
@Override
|
2021-01-23 13:06:07 +01:00
|
|
|
public boolean isHeartedByUploader() throws ParsingException {
|
2022-03-18 15:09:06 +01:00
|
|
|
final JsonObject commentActionButtonsRenderer = getCommentRenderer()
|
|
|
|
.getObject("actionButtons")
|
2021-07-29 19:53:43 +02:00
|
|
|
.getObject("commentActionButtonsRenderer");
|
|
|
|
return commentActionButtonsRenderer.has("creatorHeart");
|
2021-01-11 18:38:54 +01:00
|
|
|
}
|
|
|
|
|
2021-01-22 23:59:22 +01:00
|
|
|
@Override
|
2021-08-04 15:05:14 +02:00
|
|
|
public boolean isPinned() throws ParsingException {
|
|
|
|
return getCommentRenderer().has("pinnedCommentBadge");
|
2021-01-22 23:59:22 +01:00
|
|
|
}
|
|
|
|
|
2022-07-22 21:34:12 +02:00
|
|
|
@Override
|
2021-08-04 15:05:14 +02:00
|
|
|
public boolean isUploaderVerified() throws ParsingException {
|
|
|
|
return getCommentRenderer().has("authorCommentBadge");
|
2021-01-22 01:44:58 +01:00
|
|
|
}
|
|
|
|
|
2018-09-25 23:50:29 +02:00
|
|
|
@Override
|
2020-04-21 19:49:17 +02:00
|
|
|
public String getUploaderName() throws ParsingException {
|
2018-09-26 00:51:58 +02:00
|
|
|
try {
|
2021-08-04 15:05:14 +02:00
|
|
|
return getTextFromObject(JsonUtils.getObject(getCommentRenderer(), "authorText"));
|
2021-07-29 19:53:43 +02:00
|
|
|
} catch (final Exception e) {
|
2022-08-15 05:49:40 +02:00
|
|
|
return "";
|
2018-09-26 00:51:58 +02:00
|
|
|
}
|
2018-09-25 23:50:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2020-04-21 19:49:17 +02:00
|
|
|
public String getUploaderUrl() throws ParsingException {
|
2018-09-26 00:51:58 +02:00
|
|
|
try {
|
2021-08-04 15:05:14 +02:00
|
|
|
return "https://www.youtube.com/channel/" + JsonUtils.getString(getCommentRenderer(),
|
2021-07-29 19:53:43 +02:00
|
|
|
"authorEndpoint.browseEndpoint.browseId");
|
|
|
|
} catch (final Exception e) {
|
2022-08-15 05:49:40 +02:00
|
|
|
return "";
|
2018-09-26 00:51:58 +02:00
|
|
|
}
|
2018-09-25 23:50:29 +02:00
|
|
|
}
|
2021-08-04 14:24:47 +02:00
|
|
|
|
2022-10-15 12:40:06 +02:00
|
|
|
@Override
|
|
|
|
public int getReplyCount() throws ParsingException {
|
2022-10-29 13:24:19 +02:00
|
|
|
final JsonObject commentRendererJsonObject = getCommentRenderer();
|
|
|
|
if (commentRendererJsonObject.has("replyCount")) {
|
|
|
|
return commentRendererJsonObject.getInt("replyCount");
|
2022-10-15 12:40:06 +02:00
|
|
|
}
|
|
|
|
return UNKNOWN_REPLY_COUNT;
|
|
|
|
}
|
|
|
|
|
2021-08-04 14:24:47 +02:00
|
|
|
@Override
|
2022-07-22 21:34:12 +02:00
|
|
|
public Page getReplies() {
|
2021-08-04 14:24:47 +02:00
|
|
|
try {
|
2022-03-18 15:09:06 +01:00
|
|
|
final String id = JsonUtils.getString(
|
|
|
|
JsonUtils.getArray(json, "replies.commentRepliesRenderer.contents")
|
|
|
|
.getObject(0),
|
|
|
|
"continuationItemRenderer.continuationEndpoint.continuationCommand.token");
|
2021-08-04 14:24:47 +02:00
|
|
|
return new Page(url, id);
|
|
|
|
} catch (final Exception e) {
|
2022-03-18 15:09:06 +01:00
|
|
|
return null;
|
2021-08-04 14:24:47 +02:00
|
|
|
}
|
|
|
|
}
|
2023-09-25 10:40:45 +02:00
|
|
|
|
|
|
|
@Override
|
2023-10-08 11:36:13 +02:00
|
|
|
public boolean isChannelOwner() throws ParsingException {
|
|
|
|
return getCommentRenderer().getBoolean("authorIsChannelOwner");
|
2023-09-25 10:40:45 +02:00
|
|
|
}
|
|
|
|
|
2023-09-25 10:40:45 +02:00
|
|
|
|
|
|
|
@Override
|
|
|
|
public boolean hasCreatorReply() throws ParsingException {
|
|
|
|
try {
|
|
|
|
final JsonObject commentRepliesRenderer = JsonUtils.getObject(json,
|
|
|
|
"replies.commentRepliesRenderer");
|
|
|
|
return commentRepliesRenderer.has("viewRepliesCreatorThumbnail");
|
|
|
|
} catch (final Exception e) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-25 23:50:29 +02:00
|
|
|
}
|