NewPipeExtractor/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtrac...

501 lines
19 KiB
Java
Raw Normal View History

2018-05-08 21:19:03 +02:00
package org.schabi.newpipe.extractor.services.youtube.extractors;
2017-03-01 18:47:52 +01:00
2020-02-23 14:19:13 +01:00
import com.grack.nanojson.JsonArray;
2020-02-17 20:24:48 +01:00
import com.grack.nanojson.JsonObject;
2017-03-01 18:47:52 +01:00
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory;
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
import org.schabi.newpipe.extractor.stream.StreamType;
2021-09-01 20:40:00 +02:00
import org.schabi.newpipe.extractor.utils.JsonUtils;
import org.schabi.newpipe.extractor.utils.Parser;
import org.schabi.newpipe.extractor.utils.Utils;
2017-03-01 18:47:52 +01:00
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.time.Instant;
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.util.regex.Pattern;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getThumbnailUrlFromInfoItem;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getUrlFromNavigationEndpoint;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
2020-02-27 17:39:23 +01:00
/*
2017-03-01 18:47:52 +01:00
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
* YoutubeStreamInfoItemExtractor.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
private static final Pattern ACCESSIBILITY_DATA_VIEW_COUNT_REGEX =
Pattern.compile("([\\d,]+) views$");
private static final String NO_VIEWS_LOWERCASE = "no views";
2021-09-01 20:40:00 +02:00
private final JsonObject videoInfo;
private final TimeAgoParser timeAgoParser;
private StreamType cachedStreamType;
private Boolean isPremiere;
2020-02-17 20:24:48 +01:00
/**
* Creates an extractor of StreamInfoItems from a YouTube page.
*
* @param videoInfoItem The JSON page element
* @param timeAgoParser A parser of the textual dates or {@code null}.
*/
2022-03-18 15:09:06 +01:00
public YoutubeStreamInfoItemExtractor(final JsonObject videoInfoItem,
@Nullable final TimeAgoParser timeAgoParser) {
this.videoInfo = videoInfoItem;
2020-02-17 20:24:48 +01:00
this.timeAgoParser = timeAgoParser;
}
@Override
public StreamType getStreamType() {
if (cachedStreamType != null) {
return cachedStreamType;
}
2020-04-16 16:08:14 +02:00
final JsonArray badges = videoInfo.getArray("badges");
2021-06-23 11:15:40 +02:00
for (final Object badge : badges) {
if (!(badge instanceof JsonObject)) {
continue;
}
2022-03-18 15:09:06 +01:00
final JsonObject badgeRenderer
= ((JsonObject) badge).getObject("metadataBadgeRenderer");
2022-08-15 05:49:40 +02:00
if (badgeRenderer.getString("style", "").equals("BADGE_STYLE_TYPE_LIVE_NOW")
|| badgeRenderer.getString("label", "").equals("LIVE NOW")) {
2022-03-18 15:09:06 +01:00
cachedStreamType = StreamType.LIVE_STREAM;
return cachedStreamType;
}
2020-04-16 16:08:14 +02:00
}
2021-06-23 11:15:40 +02:00
for (final Object overlay : videoInfo.getArray("thumbnailOverlays")) {
if (!(overlay instanceof JsonObject)) {
continue;
}
2021-06-23 11:15:40 +02:00
final String style = ((JsonObject) overlay)
2022-03-18 15:09:06 +01:00
.getObject("thumbnailOverlayTimeStatusRenderer")
2022-08-15 05:49:40 +02:00
.getString("style", "");
2021-06-23 11:15:40 +02:00
if (style.equalsIgnoreCase("LIVE")) {
2022-03-18 15:09:06 +01:00
cachedStreamType = StreamType.LIVE_STREAM;
return cachedStreamType;
2021-06-23 11:15:40 +02:00
}
2020-04-16 16:08:14 +02:00
}
2022-03-18 15:09:06 +01:00
cachedStreamType = StreamType.VIDEO_STREAM;
return cachedStreamType;
}
@Override
2020-02-24 12:55:51 +01:00
public boolean isAd() throws ParsingException {
2022-03-18 15:09:06 +01:00
return isPremium() || getName().equals("[Private video]")
|| getName().equals("[Deleted video]");
}
2017-03-01 18:47:52 +01:00
@Override
2017-08-11 20:21:49 +02:00
public String getUrl() throws ParsingException {
2017-03-01 18:47:52 +01:00
try {
2022-03-18 15:09:06 +01:00
final String videoId = videoInfo.getString("videoId");
return YoutubeStreamLinkHandlerFactory.getInstance().getUrl(videoId);
2022-03-18 15:09:06 +01:00
} catch (final Exception e) {
throw new ParsingException("Could not get url", e);
2017-03-01 18:47:52 +01:00
}
}
@Override
2017-08-11 20:21:49 +02:00
public String getName() throws ParsingException {
String name = getTextFromObject(videoInfo.getObject("title"));
2022-03-18 15:09:06 +01:00
if (!isNullOrEmpty(name)) {
return name;
}
name = getTextFromObject(videoInfo.getObject("headline"));
if (!isNullOrEmpty(name)) {
return name;
}
throw new ParsingException("Could not get name");
2017-03-01 18:47:52 +01:00
}
@Override
2017-08-11 20:21:49 +02:00
public long getDuration() throws ParsingException {
if (getStreamType() == StreamType.LIVE_STREAM) {
return -1;
}
2020-02-29 17:18:50 +01:00
2020-04-16 16:08:14 +02:00
String duration = getTextFromObject(videoInfo.getObject("lengthText"));
2020-02-29 17:18:50 +01:00
if (isNullOrEmpty(duration)) {
// Available in playlists for videos
duration = videoInfo.getString("lengthSeconds");
if (isNullOrEmpty(duration)) {
final JsonObject timeOverlay = videoInfo.getArray("thumbnailOverlays")
.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.filter(thumbnailOverlay ->
thumbnailOverlay.has("thumbnailOverlayTimeStatusRenderer"))
.findFirst()
.orElse(null);
if (timeOverlay != null) {
duration = getTextFromObject(
timeOverlay.getObject("thumbnailOverlayTimeStatusRenderer")
.getObject("text"));
2020-02-26 09:31:26 +01:00
}
2020-04-16 16:08:14 +02:00
}
2020-02-29 17:18:50 +01:00
2022-03-18 15:09:06 +01:00
if (isNullOrEmpty(duration)) {
if (isPremiere()) {
// Premieres can be livestreams, so the duration is not available in this
// case
return -1;
}
// Duration of short videos in channel tab
// example: "simple is best - 49 seconds - play video"
final String accessibilityLabel = videoInfo.getObject("accessibility")
.getObject("accessibilityData").getString("label");
if (accessibilityLabel == null || timeAgoParser == null) {
return 0;
}
final String[] labelParts = accessibilityLabel.split(" \u2013 ");
if (labelParts.length > 2) {
final String textualDuration = labelParts[labelParts.length - 2];
return timeAgoParser.parseDuration(textualDuration);
}
2020-02-29 17:18:50 +01:00
2022-03-18 15:09:06 +01:00
throw new ParsingException("Could not get duration");
}
}
2020-02-29 17:18:50 +01:00
return YoutubeParsingHelper.parseDurationString(duration);
2017-03-01 18:47:52 +01:00
}
@Override
public String getUploaderName() throws ParsingException {
2020-04-16 16:08:14 +02:00
String name = getTextFromObject(videoInfo.getObject("longBylineText"));
2020-02-29 17:18:50 +01:00
if (isNullOrEmpty(name)) {
2020-04-16 16:08:14 +02:00
name = getTextFromObject(videoInfo.getObject("ownerText"));
2020-02-29 17:18:50 +01:00
if (isNullOrEmpty(name)) {
2020-04-16 16:08:14 +02:00
name = getTextFromObject(videoInfo.getObject("shortBylineText"));
2020-02-29 17:18:50 +01:00
2022-03-18 15:09:06 +01:00
if (isNullOrEmpty(name)) {
throw new ParsingException("Could not get uploader name");
}
2020-02-29 17:18:50 +01:00
}
2017-03-01 18:47:52 +01:00
}
2020-02-29 17:18:50 +01:00
return name;
2017-03-01 18:47:52 +01:00
}
2017-11-08 10:17:44 +01:00
@Override
public String getUploaderUrl() throws ParsingException {
2020-04-16 16:08:14 +02:00
String url = getUrlFromNavigationEndpoint(videoInfo.getObject("longBylineText")
.getArray("runs").getObject(0).getObject("navigationEndpoint"));
2020-02-29 17:18:50 +01:00
if (isNullOrEmpty(url)) {
2020-04-16 16:08:14 +02:00
url = getUrlFromNavigationEndpoint(videoInfo.getObject("ownerText")
2020-02-29 17:18:50 +01:00
.getArray("runs").getObject(0).getObject("navigationEndpoint"));
if (isNullOrEmpty(url)) {
2020-04-16 16:08:14 +02:00
url = getUrlFromNavigationEndpoint(videoInfo.getObject("shortBylineText")
2020-02-27 17:39:23 +01:00
.getArray("runs").getObject(0).getObject("navigationEndpoint"));
2020-02-29 17:18:50 +01:00
2022-03-18 15:09:06 +01:00
if (isNullOrEmpty(url)) {
throw new ParsingException("Could not get uploader url");
}
2020-02-23 13:48:54 +01:00
}
2017-11-08 10:17:44 +01:00
}
2020-02-29 17:18:50 +01:00
return url;
2017-11-08 10:17:44 +01:00
}
2021-09-01 20:40:00 +02:00
@Nullable
@Override
public String getUploaderAvatarUrl() throws ParsingException {
if (videoInfo.has("channelThumbnailSupportedRenderers")) {
2022-03-18 15:09:06 +01:00
return JsonUtils.getArray(videoInfo, "channelThumbnailSupportedRenderers"
+ ".channelThumbnailWithLinkRenderer.thumbnail.thumbnails")
2021-09-01 21:05:56 +02:00
.getObject(0).getString("url");
2021-09-01 20:40:00 +02:00
}
if (videoInfo.has("channelThumbnail")) {
return JsonUtils.getArray(videoInfo, "channelThumbnail.thumbnails")
.getObject(0).getString("url");
}
2021-09-01 20:40:00 +02:00
return null;
}
@Override
public boolean isUploaderVerified() throws ParsingException {
return YoutubeParsingHelper.isVerified(videoInfo.getArray("ownerBadges"));
}
@Nullable
2017-03-01 18:47:52 +01:00
@Override
public String getTextualUploadDate() throws ParsingException {
if (getStreamType().equals(StreamType.LIVE_STREAM)) {
return null;
}
if (isPremiere()) {
return DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm").format(getDateFromPremiere());
}
String publishedTimeText = getTextFromObject(videoInfo.getObject("publishedTimeText"));
if (isNullOrEmpty(publishedTimeText) && videoInfo.has("videoInfo")) {
/*
Returned in playlists, in the form: view count separator upload date
*/
publishedTimeText = videoInfo.getObject("videoInfo")
.getArray("runs")
.getObject(2)
.getString("text");
2022-03-18 15:09:06 +01:00
}
2020-04-16 16:08:14 +02:00
if (isNullOrEmpty(publishedTimeText)) {
publishedTimeText = getTextFromObject(videoInfo
.getObject("navigationEndpoint")
.getObject("reelWatchEndpoint").getObject("overlay")
.getObject("reelPlayerOverlayRenderer")
.getObject("reelPlayerHeaderSupportedRenderers")
.getObject("reelPlayerHeaderRenderer")
.getObject("timestampText")
);
2022-11-03 09:19:20 +01:00
}
return isNullOrEmpty(publishedTimeText) ? null : publishedTimeText;
2017-03-01 18:47:52 +01:00
}
@Nullable
@Override
public DateWrapper getUploadDate() throws ParsingException {
if (getStreamType().equals(StreamType.LIVE_STREAM)) {
return null;
}
if (isPremiere()) {
return new DateWrapper(getDateFromPremiere());
}
final String textualUploadDate = getTextualUploadDate();
2020-04-15 18:49:58 +02:00
if (timeAgoParser != null && !isNullOrEmpty(textualUploadDate)) {
try {
return timeAgoParser.parse(textualUploadDate);
2022-03-18 15:09:06 +01:00
} catch (final ParsingException e) {
throw new ParsingException("Could not get upload date", e);
}
}
return null;
}
2017-03-01 18:47:52 +01:00
@Override
public long getViewCount() throws ParsingException {
2023-01-19 22:11:21 +01:00
if (isPremium() || isPremiere()) {
return -1;
}
// Ignore all exceptions, as the view count can be hidden by creators, and so cannot be
// found in this case
final String viewCountText = getTextFromObject(videoInfo.getObject("viewCountText"));
2023-03-21 23:43:34 +01:00
final boolean isReelItem =
videoInfo.getString("videoType", "").equals("REEL_VIDEO_TYPE_VIDEO");
if (!isNullOrEmpty(viewCountText)) {
try {
2023-03-21 23:43:34 +01:00
return getViewCountFromViewCountText(viewCountText, isReelItem);
} catch (final Exception ignored) {
2020-02-23 14:19:13 +01:00
}
}
// Try parsing the real view count from accessibility data, if that's not a running
// livestream (the view count is returned and not the count of people watching currently
// the livestream)
if (getStreamType() != StreamType.LIVE_STREAM) {
try {
return getViewCountFromAccessibilityData();
} catch (final Exception ignored) {
}
}
// Fallback to a short view count, always used for livestreams (see why above)
if (videoInfo.has("videoInfo")) {
// Returned in playlists, in the form: view count separator upload date
try {
return getViewCountFromViewCountText(videoInfo.getObject("videoInfo")
.getArray("runs")
.getObject(0)
.getString("text", ""), true);
} catch (final Exception ignored) {
}
}
if (videoInfo.has("shortViewCountText")) {
// Returned everywhere but in playlists, used by the website to show view counts
try {
final String shortViewCountText =
getTextFromObject(videoInfo.getObject("shortViewCountText"));
if (!isNullOrEmpty(shortViewCountText)) {
return getViewCountFromViewCountText(shortViewCountText, true);
}
} catch (final Exception ignored) {
}
2017-03-01 18:47:52 +01:00
}
// No view count extracted: return -1, as the view count can be hidden by creators on videos
return -1;
2017-03-01 18:47:52 +01:00
}
private long getViewCountFromViewCountText(@Nonnull final String viewCountText,
final boolean isMixedNumber)
throws NumberFormatException, ParsingException {
// These approaches are language dependent
if (viewCountText.toLowerCase().contains(NO_VIEWS_LOWERCASE)) {
return 0;
} else if (viewCountText.toLowerCase().contains("recommended")) {
return -1;
2017-03-01 18:47:52 +01:00
}
return isMixedNumber ? Utils.mixedNumberWordToLong(viewCountText)
: Long.parseLong(Utils.removeNonDigitCharacters(viewCountText));
}
private long getViewCountFromAccessibilityData()
throws NumberFormatException, Parser.RegexException {
// These approaches are language dependent
final String videoInfoTitleAccessibilityData = videoInfo.getObject("title")
.getObject("accessibility")
.getObject("accessibilityData")
.getString("label", "");
if (videoInfoTitleAccessibilityData.toLowerCase().endsWith(NO_VIEWS_LOWERCASE)) {
return 0;
}
return Long.parseLong(Utils.removeNonDigitCharacters(
Parser.matchGroup1(ACCESSIBILITY_DATA_VIEW_COUNT_REGEX,
videoInfoTitleAccessibilityData)));
2017-03-01 18:47:52 +01:00
}
@Override
public String getThumbnailUrl() throws ParsingException {
return getThumbnailUrlFromInfoItem(videoInfo);
2017-03-01 18:47:52 +01:00
}
2020-02-23 14:19:13 +01:00
private boolean isPremium() {
2022-03-18 15:09:06 +01:00
final JsonArray badges = videoInfo.getArray("badges");
for (final Object badge : badges) {
if (((JsonObject) badge).getObject("metadataBadgeRenderer")
2022-08-15 05:49:40 +02:00
.getString("label", "").equals("Premium")) {
2020-04-16 16:08:14 +02:00
return true;
2020-02-23 14:19:13 +01:00
}
}
2020-02-23 14:19:13 +01:00
return false;
}
private boolean isPremiere() {
if (isPremiere == null) {
isPremiere = videoInfo.has("upcomingEventData");
}
return isPremiere;
}
private OffsetDateTime getDateFromPremiere() throws ParsingException {
final JsonObject upcomingEventData = videoInfo.getObject("upcomingEventData");
final String startTime = upcomingEventData.getString("startTime");
try {
return OffsetDateTime.ofInstant(Instant.ofEpochSecond(Long.parseLong(startTime)),
ZoneOffset.UTC);
2022-03-18 15:09:06 +01:00
} catch (final Exception e) {
throw new ParsingException("Could not parse date from premiere: \"" + startTime + "\"");
}
}
@Nullable
@Override
public String getShortDescription() throws ParsingException {
if (videoInfo.has("detailedMetadataSnippets")) {
2022-03-18 15:09:06 +01:00
return getTextFromObject(videoInfo.getArray("detailedMetadataSnippets")
.getObject(0).getObject("snippetText"));
}
if (videoInfo.has("descriptionSnippet")) {
return getTextFromObject(videoInfo.getObject("descriptionSnippet"));
}
return null;
}
@Override
public boolean isShortFormContent() throws ParsingException {
try {
final String webPageType = videoInfo.getObject("navigationEndpoint")
.getObject("commandMetadata").getObject("webCommandMetadata")
.getString("webPageType");
2022-10-30 00:53:36 +02:00
boolean isShort = !isNullOrEmpty(webPageType)
&& webPageType.equals("WEB_PAGE_TYPE_SHORTS");
if (!isShort) {
isShort = videoInfo.getObject("navigationEndpoint").has("reelWatchEndpoint");
}
if (!isShort) {
final JsonObject thumbnailTimeOverlay = videoInfo.getArray("thumbnailOverlays")
.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.filter(thumbnailOverlay -> thumbnailOverlay.has(
"thumbnailOverlayTimeStatusRenderer"))
.map(thumbnailOverlay -> thumbnailOverlay.getObject(
"thumbnailOverlayTimeStatusRenderer"))
.findFirst()
.orElse(null);
if (!isNullOrEmpty(thumbnailTimeOverlay)) {
2022-10-31 12:05:11 +01:00
isShort = thumbnailTimeOverlay.getString("style", "")
2022-10-30 00:53:36 +02:00
.equalsIgnoreCase("SHORTS")
|| thumbnailTimeOverlay.getObject("icon")
2022-10-31 12:05:11 +01:00
.getString("iconType", "")
2022-10-30 00:53:36 +02:00
.toLowerCase()
.contains("shorts");
}
}
return isShort;
} catch (final Exception e) {
throw new ParsingException("Could not determine if this is short-form content", e);
}
}
2017-03-01 18:47:52 +01:00
}