NewPipeExtractor/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtr...

276 lines
9.8 KiB
Java
Raw Normal View History

2018-09-25 23:50:29 +02:00
package org.schabi.newpipe.extractor.services.youtube.extractors;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
2021-08-04 14:24:47 +02:00
import org.schabi.newpipe.extractor.Page;
2018-09-25 23:50:29 +02:00
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.stream.Description;
2018-09-25 23:50:29 +02:00
import org.schabi.newpipe.extractor.utils.JsonUtils;
2019-02-28 19:40:29 +01:00
import org.schabi.newpipe.extractor.utils.Utils;
2018-09-25 23:50:29 +02:00
import javax.annotation.Nullable;
2018-09-25 23:50:29 +02:00
import static org.schabi.newpipe.extractor.comments.CommentsInfoItem.UNKNOWN_REPLY_COUNT;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
2018-09-26 00:51:58 +02:00
public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
2018-09-25 23:50:29 +02:00
private final JsonObject json;
2021-08-04 15:05:14 +02:00
private JsonObject commentRenderer;
2018-09-25 23:50:29 +02:00
private final String url;
private final TimeAgoParser timeAgoParser;
2018-09-26 00:51:58 +02:00
public YoutubeCommentsInfoItemExtractor(final JsonObject json,
final String url,
final TimeAgoParser timeAgoParser) {
2018-09-25 23:50:29 +02:00
this.json = json;
this.url = url;
this.timeAgoParser = timeAgoParser;
2018-09-25 23:50:29 +02:00
}
2021-08-04 15:05:14 +02:00
private JsonObject getCommentRenderer() throws ParsingException {
2022-03-18 15:09:06 +01:00
if (commentRenderer == null) {
if (json.has("comment")) {
2021-08-04 17:33:00 +02:00
commentRenderer = JsonUtils.getObject(json, "comment.commentRenderer");
2022-03-18 15:09:06 +01:00
} else {
commentRenderer = json;
}
2021-08-04 17:33:00 +02:00
}
return commentRenderer;
2021-08-04 15:05:14 +02:00
}
2018-09-25 23:50:29 +02:00
@Override
public String getUrl() throws ParsingException {
return url;
}
@Override
public String getThumbnailUrl() throws ParsingException {
2018-09-26 00:51:58 +02:00
try {
2022-03-18 15:09:06 +01:00
final JsonArray arr = JsonUtils.getArray(getCommentRenderer(),
"authorThumbnail.thumbnails");
2018-10-19 16:03:36 +02:00
return JsonUtils.getString(arr.getObject(2), "url");
} catch (final Exception e) {
2018-09-26 00:51:58 +02:00
throw new ParsingException("Could not get thumbnail url", e);
}
2018-09-25 23:50:29 +02:00
}
@Override
public String getName() throws ParsingException {
2018-09-26 00:51:58 +02:00
try {
2021-08-04 15:05:14 +02:00
return getTextFromObject(JsonUtils.getObject(getCommentRenderer(), "authorText"));
} catch (final Exception e) {
2022-08-15 05:49:40 +02:00
return "";
2018-09-26 00:51:58 +02:00
}
2018-09-25 23:50:29 +02:00
}
@Override
public String getTextualUploadDate() throws ParsingException {
2018-09-26 00:51:58 +02:00
try {
2022-03-18 15:09:06 +01:00
return getTextFromObject(JsonUtils.getObject(getCommentRenderer(),
"publishedTimeText"));
} catch (final Exception e) {
2018-09-26 00:51:58 +02:00
throw new ParsingException("Could not get publishedTimeText", e);
}
2018-09-25 23:50:29 +02:00
}
@Nullable
2018-09-25 23:50:29 +02:00
@Override
public DateWrapper getUploadDate() throws ParsingException {
2022-03-18 15:09:06 +01:00
final String textualPublishedTime = getTextualUploadDate();
if (timeAgoParser != null && textualPublishedTime != null
&& !textualPublishedTime.isEmpty()) {
return timeAgoParser.parse(textualPublishedTime);
} else {
return null;
}
}
2021-05-21 19:40:51 +02:00
/**
* @implNote The method tries first to get the exact like count by using the accessibility data
* returned. But if the parsing of this accessibility data fails, the method parses internally
* a localized string.
* <br>
2021-05-21 20:58:35 +02:00
* <ul>
* <li>More than 1k likes will result in an inaccurate number</li>
* <li>This will fail for other languages than English. However as long as the Extractor
* only uses "en-GB" (as seen in {@link
* org.schabi.newpipe.extractor.services.youtube.YoutubeService#getSupportedLocalizations})
* , everything will work fine.</li>
2021-05-21 20:58:35 +02:00
* </ul>
2021-06-08 09:26:01 +02:00
* <br>
2021-05-27 19:48:31 +02:00
* Consider using {@link #getTextualLikeCount()}
2021-05-21 19:40:51 +02:00
*/
@Override
public int getLikeCount() throws ParsingException {
// Try first to get the exact like count by using the accessibility data
final String likeCount;
try {
2021-08-04 15:05:14 +02:00
likeCount = Utils.removeNonDigitCharacters(JsonUtils.getString(getCommentRenderer(),
2022-03-18 15:09:06 +01:00
"actionButtons.commentActionButtonsRenderer.likeButton.toggleButtonRenderer"
+ ".accessibilityData.accessibilityData.label"));
} catch (final Exception e) {
// Use the approximate like count returned into the voteCount object
// This may return a language dependent version, e.g. in German: 3,3 Mio
final String textualLikeCount = getTextualLikeCount();
try {
if (Utils.isBlank(textualLikeCount)) {
return 0;
}
return (int) Utils.mixedNumberWordToLong(textualLikeCount);
} catch (final Exception i) {
throw new ParsingException(
"Unexpected error while converting textual like count to like count", i);
}
}
2021-05-21 19:40:51 +02:00
try {
if (Utils.isBlank(likeCount)) {
2021-05-21 19:40:51 +02:00
return 0;
}
return Integer.parseInt(likeCount);
} catch (final Exception e) {
throw new ParsingException("Unexpected error while parsing like count as Integer", e);
2021-05-21 19:40:51 +02:00
}
}
@Override
2021-05-27 19:48:31 +02:00
public String getTextualLikeCount() throws ParsingException {
2021-05-20 21:09:45 +02:00
/*
* Example results as of 2021-05-20:
* Language = English
* 3.3M
* 48K
* 1.4K
* 270K
* 19
* 6
*
* Language = German
* 3,3 Mio
* 48.189
* 1419
* 270.984
* 19
* 6
*/
2018-09-26 00:51:58 +02:00
try {
// If a comment has no likes voteCount is not set
2021-08-04 15:05:14 +02:00
if (!getCommentRenderer().has("voteCount")) {
2022-08-15 05:49:40 +02:00
return "";
}
2021-08-04 15:05:14 +02:00
final JsonObject voteCountObj = JsonUtils.getObject(getCommentRenderer(), "voteCount");
2021-05-21 19:51:56 +02:00
if (voteCountObj.isEmpty()) {
2022-08-15 05:49:40 +02:00
return "";
}
return getTextFromObject(voteCountObj);
} catch (final Exception e) {
throw new ParsingException("Could not get the vote count", e);
2018-09-26 00:51:58 +02:00
}
2018-09-25 23:50:29 +02:00
}
@Override
public Description getCommentText() throws ParsingException {
2018-09-25 23:50:29 +02:00
try {
2021-08-04 15:05:14 +02:00
final JsonObject contentText = JsonUtils.getObject(getCommentRenderer(), "contentText");
if (contentText.isEmpty()) {
// completely empty comments as described in
// https://github.com/TeamNewPipe/NewPipeExtractor/issues/380#issuecomment-668808584
return Description.EMPTY_DESCRIPTION;
}
final String commentText = getTextFromObject(contentText, true);
// YouTube adds U+FEFF in some comments.
// eg. https://www.youtube.com/watch?v=Nj4F63E59io<feff>
final String commentTextBomRemoved = Utils.removeUTF8BOM(commentText);
return new Description(commentTextBomRemoved, Description.HTML);
} catch (final Exception e) {
throw new ParsingException("Could not get comment text", e);
2018-09-25 23:50:29 +02:00
}
}
@Override
public String getCommentId() throws ParsingException {
2018-09-26 00:51:58 +02:00
try {
2021-08-04 15:05:14 +02:00
return JsonUtils.getString(getCommentRenderer(), "commentId");
} catch (final Exception e) {
2018-09-26 00:51:58 +02:00
throw new ParsingException("Could not get comment id", e);
}
2018-09-25 23:50:29 +02:00
}
@Override
public String getUploaderAvatarUrl() throws ParsingException {
2018-09-26 00:51:58 +02:00
try {
2022-03-18 15:09:06 +01:00
final JsonArray arr = JsonUtils.getArray(getCommentRenderer(),
"authorThumbnail.thumbnails");
2018-10-19 16:03:36 +02:00
return JsonUtils.getString(arr.getObject(2), "url");
} catch (final Exception e) {
2018-09-26 00:51:58 +02:00
throw new ParsingException("Could not get author thumbnail", e);
}
2018-09-25 23:50:29 +02:00
}
@Override
public boolean isHeartedByUploader() throws ParsingException {
2022-03-18 15:09:06 +01:00
final JsonObject commentActionButtonsRenderer = getCommentRenderer()
.getObject("actionButtons")
.getObject("commentActionButtonsRenderer");
return commentActionButtonsRenderer.has("creatorHeart");
}
2021-01-22 23:59:22 +01:00
@Override
2021-08-04 15:05:14 +02:00
public boolean isPinned() throws ParsingException {
return getCommentRenderer().has("pinnedCommentBadge");
2021-01-22 23:59:22 +01:00
}
2021-08-04 15:05:14 +02:00
public boolean isUploaderVerified() throws ParsingException {
return getCommentRenderer().has("authorCommentBadge");
}
2018-09-25 23:50:29 +02:00
@Override
public String getUploaderName() throws ParsingException {
2018-09-26 00:51:58 +02:00
try {
2021-08-04 15:05:14 +02:00
return getTextFromObject(JsonUtils.getObject(getCommentRenderer(), "authorText"));
} catch (final Exception e) {
2022-08-15 05:49:40 +02:00
return "";
2018-09-26 00:51:58 +02:00
}
2018-09-25 23:50:29 +02:00
}
@Override
public String getUploaderUrl() throws ParsingException {
2018-09-26 00:51:58 +02:00
try {
2021-08-04 15:05:14 +02:00
return "https://www.youtube.com/channel/" + JsonUtils.getString(getCommentRenderer(),
"authorEndpoint.browseEndpoint.browseId");
} catch (final Exception e) {
2022-08-15 05:49:40 +02:00
return "";
2018-09-26 00:51:58 +02:00
}
2018-09-25 23:50:29 +02:00
}
2021-08-04 14:24:47 +02:00
@Override
public int getReplyCount() throws ParsingException {
final JsonObject commentRendererJsonObject = getCommentRenderer();
if (commentRendererJsonObject.has("replyCount")) {
return commentRendererJsonObject.getInt("replyCount");
}
return UNKNOWN_REPLY_COUNT;
}
2021-08-04 14:24:47 +02:00
@Override
public Page getReplies() throws ParsingException {
try {
2022-03-18 15:09:06 +01:00
final String id = JsonUtils.getString(
JsonUtils.getArray(json, "replies.commentRepliesRenderer.contents")
.getObject(0),
"continuationItemRenderer.continuationEndpoint.continuationCommand.token");
2021-08-04 14:24:47 +02:00
return new Page(url, id);
} catch (final Exception e) {
2022-03-18 15:09:06 +01:00
return null;
2021-08-04 14:24:47 +02:00
}
}
2018-09-25 23:50:29 +02:00
}