2018-05-08 21:19:03 +02:00
|
|
|
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
2017-03-01 18:47:52 +01:00
|
|
|
|
2020-02-23 14:19:13 +01:00
|
|
|
import com.grack.nanojson.JsonArray;
|
2020-02-17 20:24:48 +01:00
|
|
|
import com.grack.nanojson.JsonObject;
|
2017-03-01 18:47:52 +01:00
|
|
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
2019-11-03 19:45:25 +01:00
|
|
|
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
2020-02-08 23:58:46 +01:00
|
|
|
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
|
2020-04-10 10:51:05 +02:00
|
|
|
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
|
2020-02-22 20:19:41 +01:00
|
|
|
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory;
|
2017-06-29 20:12:55 +02:00
|
|
|
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
|
2017-07-11 05:08:03 +02:00
|
|
|
import org.schabi.newpipe.extractor.stream.StreamType;
|
2021-09-01 20:40:00 +02:00
|
|
|
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
2022-12-08 13:14:27 +01:00
|
|
|
import org.schabi.newpipe.extractor.utils.Parser;
|
2017-07-11 05:08:03 +02:00
|
|
|
import org.schabi.newpipe.extractor.utils.Utils;
|
2017-03-01 18:47:52 +01:00
|
|
|
|
2023-02-20 17:15:20 +01:00
|
|
|
import javax.annotation.Nonnull;
|
2022-10-22 15:28:48 +02:00
|
|
|
import javax.annotation.Nullable;
|
2020-10-18 05:48:14 +02:00
|
|
|
import java.time.Instant;
|
|
|
|
import java.time.OffsetDateTime;
|
|
|
|
import java.time.ZoneOffset;
|
|
|
|
import java.time.format.DateTimeFormatter;
|
2023-02-20 16:28:36 +01:00
|
|
|
import java.util.regex.Pattern;
|
2020-10-18 05:48:14 +02:00
|
|
|
|
2022-10-22 15:28:48 +02:00
|
|
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
|
|
|
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getThumbnailUrlFromInfoItem;
|
|
|
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getUrlFromNavigationEndpoint;
|
|
|
|
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
|
2020-02-27 17:39:23 +01:00
|
|
|
|
2017-06-29 20:12:55 +02:00
|
|
|
/*
|
2017-03-01 18:47:52 +01:00
|
|
|
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
|
|
|
|
* YoutubeStreamInfoItemExtractor.java is part of NewPipe.
|
|
|
|
*
|
|
|
|
* NewPipe is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* NewPipe is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
|
|
|
public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
2023-02-20 16:28:36 +01:00
|
|
|
|
|
|
|
private static final Pattern ACCESSIBILITY_DATA_VIEW_COUNT_REGEX =
|
|
|
|
Pattern.compile("([\\d,]+) views$");
|
|
|
|
private static final String NO_VIEWS_LOWERCASE = "no views";
|
|
|
|
|
2021-09-01 20:40:00 +02:00
|
|
|
private final JsonObject videoInfo;
|
2019-10-02 07:02:01 +02:00
|
|
|
private final TimeAgoParser timeAgoParser;
|
2020-03-07 20:46:00 +01:00
|
|
|
private StreamType cachedStreamType;
|
2022-12-08 13:14:27 +01:00
|
|
|
private Boolean isPremiere;
|
2019-10-02 07:02:01 +02:00
|
|
|
|
2020-02-17 20:24:48 +01:00
|
|
|
/**
|
|
|
|
* Creates an extractor of StreamInfoItems from a YouTube page.
|
|
|
|
*
|
|
|
|
* @param videoInfoItem The JSON page element
|
|
|
|
* @param timeAgoParser A parser of the textual dates or {@code null}.
|
|
|
|
*/
|
2022-03-18 15:09:06 +01:00
|
|
|
public YoutubeStreamInfoItemExtractor(final JsonObject videoInfoItem,
|
|
|
|
@Nullable final TimeAgoParser timeAgoParser) {
|
2020-02-22 20:19:41 +01:00
|
|
|
this.videoInfo = videoInfoItem;
|
2020-02-17 20:24:48 +01:00
|
|
|
this.timeAgoParser = timeAgoParser;
|
|
|
|
}
|
|
|
|
|
2017-08-10 19:50:59 +02:00
|
|
|
@Override
|
2020-02-22 20:19:41 +01:00
|
|
|
public StreamType getStreamType() {
|
2020-03-07 20:46:00 +01:00
|
|
|
if (cachedStreamType != null) {
|
|
|
|
return cachedStreamType;
|
|
|
|
}
|
|
|
|
|
2020-04-16 16:08:14 +02:00
|
|
|
final JsonArray badges = videoInfo.getArray("badges");
|
2021-06-23 11:15:40 +02:00
|
|
|
for (final Object badge : badges) {
|
2022-12-08 13:14:27 +01:00
|
|
|
if (!(badge instanceof JsonObject)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
final JsonObject badgeRenderer
|
|
|
|
= ((JsonObject) badge).getObject("metadataBadgeRenderer");
|
2022-08-15 05:49:40 +02:00
|
|
|
if (badgeRenderer.getString("style", "").equals("BADGE_STYLE_TYPE_LIVE_NOW")
|
|
|
|
|| badgeRenderer.getString("label", "").equals("LIVE NOW")) {
|
2022-03-18 15:09:06 +01:00
|
|
|
cachedStreamType = StreamType.LIVE_STREAM;
|
|
|
|
return cachedStreamType;
|
2020-03-07 20:42:00 +01:00
|
|
|
}
|
2020-04-16 16:08:14 +02:00
|
|
|
}
|
|
|
|
|
2021-06-23 11:15:40 +02:00
|
|
|
for (final Object overlay : videoInfo.getArray("thumbnailOverlays")) {
|
2022-12-08 13:14:27 +01:00
|
|
|
if (!(overlay instanceof JsonObject)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2021-06-23 11:15:40 +02:00
|
|
|
final String style = ((JsonObject) overlay)
|
2022-03-18 15:09:06 +01:00
|
|
|
.getObject("thumbnailOverlayTimeStatusRenderer")
|
2022-08-15 05:49:40 +02:00
|
|
|
.getString("style", "");
|
2021-06-23 11:15:40 +02:00
|
|
|
if (style.equalsIgnoreCase("LIVE")) {
|
2022-03-18 15:09:06 +01:00
|
|
|
cachedStreamType = StreamType.LIVE_STREAM;
|
|
|
|
return cachedStreamType;
|
2021-06-23 11:15:40 +02:00
|
|
|
}
|
2020-04-16 16:08:14 +02:00
|
|
|
}
|
2020-03-07 20:42:00 +01:00
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
cachedStreamType = StreamType.VIDEO_STREAM;
|
|
|
|
return cachedStreamType;
|
2017-08-10 19:50:59 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2020-02-24 12:55:51 +01:00
|
|
|
public boolean isAd() throws ParsingException {
|
2022-03-18 15:09:06 +01:00
|
|
|
return isPremium() || getName().equals("[Private video]")
|
|
|
|
|| getName().equals("[Deleted video]");
|
2017-08-10 19:50:59 +02:00
|
|
|
}
|
|
|
|
|
2017-03-01 18:47:52 +01:00
|
|
|
@Override
|
2017-08-11 20:21:49 +02:00
|
|
|
public String getUrl() throws ParsingException {
|
2017-03-01 18:47:52 +01:00
|
|
|
try {
|
2022-03-18 15:09:06 +01:00
|
|
|
final String videoId = videoInfo.getString("videoId");
|
2020-02-22 20:19:41 +01:00
|
|
|
return YoutubeStreamLinkHandlerFactory.getInstance().getUrl(videoId);
|
2022-03-18 15:09:06 +01:00
|
|
|
} catch (final Exception e) {
|
2020-02-22 20:19:41 +01:00
|
|
|
throw new ParsingException("Could not get url", e);
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2017-08-11 20:21:49 +02:00
|
|
|
public String getName() throws ParsingException {
|
2022-10-22 15:28:48 +02:00
|
|
|
String name = getTextFromObject(videoInfo.getObject("title"));
|
2022-03-18 15:09:06 +01:00
|
|
|
if (!isNullOrEmpty(name)) {
|
|
|
|
return name;
|
|
|
|
}
|
2022-10-22 15:28:48 +02:00
|
|
|
|
|
|
|
name = getTextFromObject(videoInfo.getObject("headline"));
|
|
|
|
if (!isNullOrEmpty(name)) {
|
|
|
|
return name;
|
|
|
|
}
|
|
|
|
|
2020-02-22 20:19:41 +01:00
|
|
|
throw new ParsingException("Could not get name");
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2017-08-11 20:21:49 +02:00
|
|
|
public long getDuration() throws ParsingException {
|
2022-12-08 13:14:27 +01:00
|
|
|
if (getStreamType() == StreamType.LIVE_STREAM) {
|
2020-02-29 22:19:34 +01:00
|
|
|
return -1;
|
|
|
|
}
|
2020-02-29 17:18:50 +01:00
|
|
|
|
2020-04-16 16:08:14 +02:00
|
|
|
String duration = getTextFromObject(videoInfo.getObject("lengthText"));
|
2020-02-29 17:18:50 +01:00
|
|
|
|
2020-05-11 11:40:24 +02:00
|
|
|
if (isNullOrEmpty(duration)) {
|
2022-12-08 13:14:27 +01:00
|
|
|
// Available in playlists for videos
|
|
|
|
duration = videoInfo.getString("lengthSeconds");
|
|
|
|
|
|
|
|
if (isNullOrEmpty(duration)) {
|
|
|
|
final JsonObject timeOverlay = videoInfo.getArray("thumbnailOverlays")
|
|
|
|
.stream()
|
|
|
|
.filter(JsonObject.class::isInstance)
|
|
|
|
.map(JsonObject.class::cast)
|
|
|
|
.filter(thumbnailOverlay ->
|
|
|
|
thumbnailOverlay.has("thumbnailOverlayTimeStatusRenderer"))
|
|
|
|
.findFirst()
|
|
|
|
.orElse(null);
|
|
|
|
|
|
|
|
if (timeOverlay != null) {
|
|
|
|
duration = getTextFromObject(
|
|
|
|
timeOverlay.getObject("thumbnailOverlayTimeStatusRenderer")
|
|
|
|
.getObject("text"));
|
2020-02-26 09:31:26 +01:00
|
|
|
}
|
2020-04-16 16:08:14 +02:00
|
|
|
}
|
2020-02-29 17:18:50 +01:00
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
if (isNullOrEmpty(duration)) {
|
2022-12-08 13:14:27 +01:00
|
|
|
if (isPremiere()) {
|
|
|
|
// Premieres can be livestreams, so the duration is not available in this
|
|
|
|
// case
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2022-10-22 15:28:48 +02:00
|
|
|
// Duration of short videos in channel tab
|
|
|
|
// example: "simple is best - 49 seconds - play video"
|
2022-11-02 19:17:23 +01:00
|
|
|
final String accessibilityLabel = videoInfo.getObject("accessibility")
|
2022-10-22 15:28:48 +02:00
|
|
|
.getObject("accessibilityData").getString("label");
|
2022-11-02 19:40:28 +01:00
|
|
|
if (accessibilityLabel == null || timeAgoParser == null) {
|
2022-11-02 19:17:23 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-10-22 15:28:48 +02:00
|
|
|
final String[] labelParts = accessibilityLabel.split(" \u2013 ");
|
|
|
|
|
|
|
|
if (labelParts.length > 2) {
|
2022-11-02 19:40:28 +01:00
|
|
|
final String textualDuration = labelParts[labelParts.length - 2];
|
|
|
|
return timeAgoParser.parseDuration(textualDuration);
|
2022-10-22 15:28:48 +02:00
|
|
|
}
|
2020-02-29 17:18:50 +01:00
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
throw new ParsingException("Could not get duration");
|
|
|
|
}
|
2022-03-17 14:50:12 +01:00
|
|
|
}
|
|
|
|
|
2020-02-29 17:18:50 +01:00
|
|
|
return YoutubeParsingHelper.parseDurationString(duration);
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2017-08-10 04:50:29 +02:00
|
|
|
public String getUploaderName() throws ParsingException {
|
2020-04-16 16:08:14 +02:00
|
|
|
String name = getTextFromObject(videoInfo.getObject("longBylineText"));
|
2020-02-29 17:18:50 +01:00
|
|
|
|
2020-05-11 11:40:24 +02:00
|
|
|
if (isNullOrEmpty(name)) {
|
2020-04-16 16:08:14 +02:00
|
|
|
name = getTextFromObject(videoInfo.getObject("ownerText"));
|
2020-02-29 17:18:50 +01:00
|
|
|
|
2020-05-11 11:40:24 +02:00
|
|
|
if (isNullOrEmpty(name)) {
|
2020-04-16 16:08:14 +02:00
|
|
|
name = getTextFromObject(videoInfo.getObject("shortBylineText"));
|
2020-02-29 17:18:50 +01:00
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
if (isNullOrEmpty(name)) {
|
|
|
|
throw new ParsingException("Could not get uploader name");
|
|
|
|
}
|
2020-02-29 17:18:50 +01:00
|
|
|
}
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
2020-02-29 17:18:50 +01:00
|
|
|
|
|
|
|
return name;
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
|
2017-11-08 10:17:44 +01:00
|
|
|
@Override
|
|
|
|
public String getUploaderUrl() throws ParsingException {
|
2020-04-16 16:08:14 +02:00
|
|
|
String url = getUrlFromNavigationEndpoint(videoInfo.getObject("longBylineText")
|
|
|
|
.getArray("runs").getObject(0).getObject("navigationEndpoint"));
|
2020-02-29 17:18:50 +01:00
|
|
|
|
2020-05-11 11:40:24 +02:00
|
|
|
if (isNullOrEmpty(url)) {
|
2020-04-16 16:08:14 +02:00
|
|
|
url = getUrlFromNavigationEndpoint(videoInfo.getObject("ownerText")
|
2020-02-29 17:18:50 +01:00
|
|
|
.getArray("runs").getObject(0).getObject("navigationEndpoint"));
|
|
|
|
|
2020-05-11 11:40:24 +02:00
|
|
|
if (isNullOrEmpty(url)) {
|
2020-04-16 16:08:14 +02:00
|
|
|
url = getUrlFromNavigationEndpoint(videoInfo.getObject("shortBylineText")
|
2020-02-27 17:39:23 +01:00
|
|
|
.getArray("runs").getObject(0).getObject("navigationEndpoint"));
|
2020-02-29 17:18:50 +01:00
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
if (isNullOrEmpty(url)) {
|
|
|
|
throw new ParsingException("Could not get uploader url");
|
|
|
|
}
|
2020-02-23 13:48:54 +01:00
|
|
|
}
|
2017-11-08 10:17:44 +01:00
|
|
|
}
|
2020-02-29 17:18:50 +01:00
|
|
|
|
|
|
|
return url;
|
2017-11-08 10:17:44 +01:00
|
|
|
}
|
|
|
|
|
2021-09-01 20:40:00 +02:00
|
|
|
@Nullable
|
|
|
|
@Override
|
|
|
|
public String getUploaderAvatarUrl() throws ParsingException {
|
2021-09-03 19:51:01 +02:00
|
|
|
if (videoInfo.has("channelThumbnailSupportedRenderers")) {
|
2022-03-18 15:09:06 +01:00
|
|
|
return JsonUtils.getArray(videoInfo, "channelThumbnailSupportedRenderers"
|
|
|
|
+ ".channelThumbnailWithLinkRenderer.thumbnail.thumbnails")
|
2021-09-01 21:05:56 +02:00
|
|
|
.getObject(0).getString("url");
|
2021-09-01 20:40:00 +02:00
|
|
|
}
|
|
|
|
|
2021-09-03 19:51:01 +02:00
|
|
|
if (videoInfo.has("channelThumbnail")) {
|
|
|
|
return JsonUtils.getArray(videoInfo, "channelThumbnail.thumbnails")
|
|
|
|
.getObject(0).getString("url");
|
|
|
|
}
|
|
|
|
|
2021-09-01 20:40:00 +02:00
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
2021-01-22 01:44:58 +01:00
|
|
|
@Override
|
|
|
|
public boolean isUploaderVerified() throws ParsingException {
|
|
|
|
return YoutubeParsingHelper.isVerified(videoInfo.getArray("ownerBadges"));
|
|
|
|
}
|
|
|
|
|
2019-11-03 19:45:25 +01:00
|
|
|
@Nullable
|
2017-03-01 18:47:52 +01:00
|
|
|
@Override
|
2020-02-29 22:19:34 +01:00
|
|
|
public String getTextualUploadDate() throws ParsingException {
|
|
|
|
if (getStreamType().equals(StreamType.LIVE_STREAM)) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (isPremiere()) {
|
2020-10-18 05:48:14 +02:00
|
|
|
return DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm").format(getDateFromPremiere());
|
2020-02-29 22:19:34 +01:00
|
|
|
}
|
|
|
|
|
2022-12-08 13:14:27 +01:00
|
|
|
String publishedTimeText = getTextFromObject(videoInfo.getObject("publishedTimeText"));
|
|
|
|
|
|
|
|
if (isNullOrEmpty(publishedTimeText) && videoInfo.has("videoInfo")) {
|
|
|
|
/*
|
|
|
|
Returned in playlists, in the form: view count separator upload date
|
|
|
|
*/
|
|
|
|
publishedTimeText = videoInfo.getObject("videoInfo")
|
|
|
|
.getArray("runs")
|
|
|
|
.getObject(2)
|
|
|
|
.getString("text");
|
2022-03-18 15:09:06 +01:00
|
|
|
}
|
2020-04-16 16:08:14 +02:00
|
|
|
|
2023-03-21 00:45:53 +01:00
|
|
|
if (isNullOrEmpty(publishedTimeText)) {
|
|
|
|
publishedTimeText = getTextFromObject(videoInfo
|
|
|
|
.getObject("navigationEndpoint")
|
|
|
|
.getObject("reelWatchEndpoint").getObject("overlay")
|
|
|
|
.getObject("reelPlayerOverlayRenderer")
|
|
|
|
.getObject("reelPlayerHeaderSupportedRenderers")
|
|
|
|
.getObject("reelPlayerHeaderRenderer")
|
|
|
|
.getObject("timestampText")
|
|
|
|
);
|
2022-11-03 09:19:20 +01:00
|
|
|
}
|
|
|
|
|
2022-12-08 13:14:27 +01:00
|
|
|
return isNullOrEmpty(publishedTimeText) ? null : publishedTimeText;
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
|
2019-11-03 19:45:25 +01:00
|
|
|
@Nullable
|
2019-10-02 07:02:01 +02:00
|
|
|
@Override
|
2020-02-25 10:38:54 +01:00
|
|
|
public DateWrapper getUploadDate() throws ParsingException {
|
2020-02-29 22:19:34 +01:00
|
|
|
if (getStreamType().equals(StreamType.LIVE_STREAM)) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (isPremiere()) {
|
|
|
|
return new DateWrapper(getDateFromPremiere());
|
|
|
|
}
|
|
|
|
|
|
|
|
final String textualUploadDate = getTextualUploadDate();
|
2020-04-15 18:49:58 +02:00
|
|
|
if (timeAgoParser != null && !isNullOrEmpty(textualUploadDate)) {
|
2020-02-25 10:38:54 +01:00
|
|
|
try {
|
|
|
|
return timeAgoParser.parse(textualUploadDate);
|
2022-03-18 15:09:06 +01:00
|
|
|
} catch (final ParsingException e) {
|
2020-02-25 10:38:54 +01:00
|
|
|
throw new ParsingException("Could not get upload date", e);
|
|
|
|
}
|
|
|
|
}
|
2020-02-22 20:19:41 +01:00
|
|
|
return null;
|
2019-10-02 07:02:01 +02:00
|
|
|
}
|
|
|
|
|
2017-03-01 18:47:52 +01:00
|
|
|
@Override
|
|
|
|
public long getViewCount() throws ParsingException {
|
2023-01-19 22:11:21 +01:00
|
|
|
if (isPremium() || isPremiere()) {
|
2022-12-08 13:14:27 +01:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2023-02-20 17:15:20 +01:00
|
|
|
// Ignore all exceptions, as the view count can be hidden by creators, and so cannot be
|
|
|
|
// found in this case
|
2022-12-08 13:14:27 +01:00
|
|
|
|
2023-02-20 17:15:20 +01:00
|
|
|
final String viewCountText = getTextFromObject(videoInfo.getObject("viewCountText"));
|
2023-03-21 23:43:34 +01:00
|
|
|
final boolean isReelItem =
|
|
|
|
videoInfo.getString("videoType", "").equals("REEL_VIDEO_TYPE_VIDEO");
|
2023-02-20 17:15:20 +01:00
|
|
|
if (!isNullOrEmpty(viewCountText)) {
|
2022-12-08 13:14:27 +01:00
|
|
|
try {
|
2023-03-21 23:43:34 +01:00
|
|
|
return getViewCountFromViewCountText(viewCountText, isReelItem);
|
2022-12-08 13:14:27 +01:00
|
|
|
} catch (final Exception ignored) {
|
2020-02-23 14:19:13 +01:00
|
|
|
}
|
2022-12-08 13:14:27 +01:00
|
|
|
}
|
2020-02-29 22:01:43 +01:00
|
|
|
|
2022-12-08 13:14:27 +01:00
|
|
|
// Try parsing the real view count from accessibility data, if that's not a running
|
|
|
|
// livestream (the view count is returned and not the count of people watching currently
|
|
|
|
// the livestream)
|
|
|
|
if (getStreamType() != StreamType.LIVE_STREAM) {
|
|
|
|
try {
|
2023-02-20 17:15:20 +01:00
|
|
|
return getViewCountFromAccessibilityData();
|
2022-12-08 13:14:27 +01:00
|
|
|
} catch (final Exception ignored) {
|
2020-02-29 22:01:43 +01:00
|
|
|
}
|
2022-12-08 13:14:27 +01:00
|
|
|
}
|
2020-02-29 22:01:43 +01:00
|
|
|
|
2022-12-08 13:14:27 +01:00
|
|
|
// Fallback to a short view count, always used for livestreams (see why above)
|
2023-02-20 17:15:20 +01:00
|
|
|
if (videoInfo.has("videoInfo")) {
|
2022-12-08 13:14:27 +01:00
|
|
|
// Returned in playlists, in the form: view count separator upload date
|
2023-02-20 17:15:20 +01:00
|
|
|
try {
|
|
|
|
return getViewCountFromViewCountText(videoInfo.getObject("videoInfo")
|
2022-12-08 13:14:27 +01:00
|
|
|
.getArray("runs")
|
|
|
|
.getObject(0)
|
2023-02-20 17:15:20 +01:00
|
|
|
.getString("text", ""), true);
|
|
|
|
} catch (final Exception ignored) {
|
2020-02-29 22:02:00 +01:00
|
|
|
}
|
2023-02-20 17:15:20 +01:00
|
|
|
}
|
2020-02-29 21:28:38 +01:00
|
|
|
|
2023-02-20 17:15:20 +01:00
|
|
|
if (videoInfo.has("shortViewCountText")) {
|
2022-12-08 13:14:27 +01:00
|
|
|
// Returned everywhere but in playlists, used by the website to show view counts
|
2023-02-20 17:15:20 +01:00
|
|
|
try {
|
|
|
|
final String shortViewCountText =
|
2023-02-20 16:28:36 +01:00
|
|
|
getTextFromObject(videoInfo.getObject("shortViewCountText"));
|
2023-02-20 17:15:20 +01:00
|
|
|
if (!isNullOrEmpty(shortViewCountText)) {
|
|
|
|
return getViewCountFromViewCountText(shortViewCountText, true);
|
2023-02-20 16:28:36 +01:00
|
|
|
}
|
2023-02-20 17:15:20 +01:00
|
|
|
} catch (final Exception ignored) {
|
2020-02-29 22:02:00 +01:00
|
|
|
}
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
2020-02-29 22:02:00 +01:00
|
|
|
|
2022-12-08 13:14:27 +01:00
|
|
|
// No view count extracted: return -1, as the view count can be hidden by creators on videos
|
|
|
|
return -1;
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
|
2023-02-20 17:15:20 +01:00
|
|
|
private long getViewCountFromViewCountText(@Nonnull final String viewCountText,
|
|
|
|
final boolean isMixedNumber)
|
|
|
|
throws NumberFormatException, ParsingException {
|
|
|
|
// These approaches are language dependent
|
|
|
|
if (viewCountText.toLowerCase().contains(NO_VIEWS_LOWERCASE)) {
|
|
|
|
return 0;
|
|
|
|
} else if (viewCountText.toLowerCase().contains("recommended")) {
|
|
|
|
return -1;
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
2023-02-20 17:15:20 +01:00
|
|
|
|
|
|
|
return isMixedNumber ? Utils.mixedNumberWordToLong(viewCountText)
|
|
|
|
: Long.parseLong(Utils.removeNonDigitCharacters(viewCountText));
|
|
|
|
}
|
|
|
|
|
|
|
|
private long getViewCountFromAccessibilityData()
|
|
|
|
throws NumberFormatException, Parser.RegexException {
|
|
|
|
// These approaches are language dependent
|
|
|
|
final String videoInfoTitleAccessibilityData = videoInfo.getObject("title")
|
|
|
|
.getObject("accessibility")
|
|
|
|
.getObject("accessibilityData")
|
|
|
|
.getString("label", "");
|
|
|
|
|
|
|
|
if (videoInfoTitleAccessibilityData.toLowerCase().endsWith(NO_VIEWS_LOWERCASE)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return Long.parseLong(Utils.removeNonDigitCharacters(
|
|
|
|
Parser.matchGroup1(ACCESSIBILITY_DATA_VIEW_COUNT_REGEX,
|
|
|
|
videoInfoTitleAccessibilityData)));
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public String getThumbnailUrl() throws ParsingException {
|
2022-02-02 20:23:11 +01:00
|
|
|
return getThumbnailUrlFromInfoItem(videoInfo);
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
2020-02-23 14:19:13 +01:00
|
|
|
|
|
|
|
private boolean isPremium() {
|
2022-03-18 15:09:06 +01:00
|
|
|
final JsonArray badges = videoInfo.getArray("badges");
|
|
|
|
for (final Object badge : badges) {
|
|
|
|
if (((JsonObject) badge).getObject("metadataBadgeRenderer")
|
2022-08-15 05:49:40 +02:00
|
|
|
.getString("label", "").equals("Premium")) {
|
2020-04-16 16:08:14 +02:00
|
|
|
return true;
|
2020-02-23 14:19:13 +01:00
|
|
|
}
|
2020-02-29 22:19:34 +01:00
|
|
|
}
|
2020-02-23 14:19:13 +01:00
|
|
|
return false;
|
|
|
|
}
|
2020-02-29 22:19:34 +01:00
|
|
|
|
|
|
|
private boolean isPremiere() {
|
2022-12-08 13:14:27 +01:00
|
|
|
if (isPremiere == null) {
|
|
|
|
isPremiere = videoInfo.has("upcomingEventData");
|
|
|
|
}
|
|
|
|
return isPremiere;
|
2020-02-29 22:19:34 +01:00
|
|
|
}
|
|
|
|
|
2020-10-18 05:48:14 +02:00
|
|
|
private OffsetDateTime getDateFromPremiere() throws ParsingException {
|
2020-02-29 22:19:34 +01:00
|
|
|
final JsonObject upcomingEventData = videoInfo.getObject("upcomingEventData");
|
|
|
|
final String startTime = upcomingEventData.getString("startTime");
|
|
|
|
|
|
|
|
try {
|
2020-10-18 05:48:14 +02:00
|
|
|
return OffsetDateTime.ofInstant(Instant.ofEpochSecond(Long.parseLong(startTime)),
|
|
|
|
ZoneOffset.UTC);
|
2022-03-18 15:09:06 +01:00
|
|
|
} catch (final Exception e) {
|
|
|
|
throw new ParsingException("Could not parse date from premiere: \"" + startTime + "\"");
|
2020-02-29 22:19:34 +01:00
|
|
|
}
|
|
|
|
}
|
2021-09-22 17:19:44 +02:00
|
|
|
|
|
|
|
@Nullable
|
|
|
|
@Override
|
|
|
|
public String getShortDescription() throws ParsingException {
|
|
|
|
|
2021-10-04 02:28:07 +02:00
|
|
|
if (videoInfo.has("detailedMetadataSnippets")) {
|
2022-03-18 15:09:06 +01:00
|
|
|
return getTextFromObject(videoInfo.getArray("detailedMetadataSnippets")
|
|
|
|
.getObject(0).getObject("snippetText"));
|
2021-10-04 02:28:07 +02:00
|
|
|
}
|
2021-09-22 17:19:44 +02:00
|
|
|
|
2021-10-04 02:28:07 +02:00
|
|
|
if (videoInfo.has("descriptionSnippet")) {
|
|
|
|
return getTextFromObject(videoInfo.getObject("descriptionSnippet"));
|
|
|
|
}
|
2021-09-22 17:19:44 +02:00
|
|
|
|
|
|
|
return null;
|
|
|
|
}
|
2022-10-17 22:51:16 +02:00
|
|
|
|
|
|
|
@Override
|
|
|
|
public boolean isShortFormContent() throws ParsingException {
|
|
|
|
try {
|
|
|
|
final String webPageType = videoInfo.getObject("navigationEndpoint")
|
|
|
|
.getObject("commandMetadata").getObject("webCommandMetadata")
|
|
|
|
.getString("webPageType");
|
2022-10-30 00:53:36 +02:00
|
|
|
|
|
|
|
boolean isShort = !isNullOrEmpty(webPageType)
|
|
|
|
&& webPageType.equals("WEB_PAGE_TYPE_SHORTS");
|
|
|
|
|
|
|
|
if (!isShort) {
|
|
|
|
isShort = videoInfo.getObject("navigationEndpoint").has("reelWatchEndpoint");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!isShort) {
|
|
|
|
final JsonObject thumbnailTimeOverlay = videoInfo.getArray("thumbnailOverlays")
|
|
|
|
.stream()
|
|
|
|
.filter(JsonObject.class::isInstance)
|
|
|
|
.map(JsonObject.class::cast)
|
|
|
|
.filter(thumbnailOverlay -> thumbnailOverlay.has(
|
|
|
|
"thumbnailOverlayTimeStatusRenderer"))
|
|
|
|
.map(thumbnailOverlay -> thumbnailOverlay.getObject(
|
|
|
|
"thumbnailOverlayTimeStatusRenderer"))
|
|
|
|
.findFirst()
|
|
|
|
.orElse(null);
|
|
|
|
|
|
|
|
if (!isNullOrEmpty(thumbnailTimeOverlay)) {
|
2022-10-31 12:05:11 +01:00
|
|
|
isShort = thumbnailTimeOverlay.getString("style", "")
|
2022-10-30 00:53:36 +02:00
|
|
|
.equalsIgnoreCase("SHORTS")
|
|
|
|
|| thumbnailTimeOverlay.getObject("icon")
|
2022-10-31 12:05:11 +01:00
|
|
|
.getString("iconType", "")
|
2022-10-30 00:53:36 +02:00
|
|
|
.toLowerCase()
|
|
|
|
.contains("shorts");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return isShort;
|
2022-10-17 22:51:16 +02:00
|
|
|
} catch (final Exception e) {
|
2022-10-29 02:38:20 +02:00
|
|
|
throw new ParsingException("Could not determine if this is short-form content", e);
|
2022-10-17 22:51:16 +02:00
|
|
|
}
|
|
|
|
}
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|