NewPipeExtractor/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtrac...

502 lines
19 KiB
Java

package org.schabi.newpipe.extractor.services.youtube.extractors;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory;
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
import org.schabi.newpipe.extractor.stream.StreamType;
import org.schabi.newpipe.extractor.utils.JsonUtils;
import org.schabi.newpipe.extractor.utils.Parser;
import org.schabi.newpipe.extractor.utils.Utils;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.time.Instant;
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.util.regex.Pattern;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getThumbnailUrlFromInfoItem;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getUrlFromNavigationEndpoint;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
/*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
* YoutubeStreamInfoItemExtractor.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
private static final Pattern ACCESSIBILITY_DATA_VIEW_COUNT_REGEX =
Pattern.compile("([\\d,]+) views$");
private static final String NO_VIEWS_LOWERCASE = "no views";
private final JsonObject videoInfo;
private final TimeAgoParser timeAgoParser;
private StreamType cachedStreamType;
private Boolean isPremiere;
/**
* Creates an extractor of StreamInfoItems from a YouTube page.
*
* @param videoInfoItem The JSON page element
* @param timeAgoParser A parser of the textual dates or {@code null}.
*/
public YoutubeStreamInfoItemExtractor(final JsonObject videoInfoItem,
@Nullable final TimeAgoParser timeAgoParser) {
this.videoInfo = videoInfoItem;
this.timeAgoParser = timeAgoParser;
}
@Override
public StreamType getStreamType() {
if (cachedStreamType != null) {
return cachedStreamType;
}
final JsonArray badges = videoInfo.getArray("badges");
for (final Object badge : badges) {
if (!(badge instanceof JsonObject)) {
continue;
}
final JsonObject badgeRenderer
= ((JsonObject) badge).getObject("metadataBadgeRenderer");
if (badgeRenderer.getString("style", "").equals("BADGE_STYLE_TYPE_LIVE_NOW")
|| badgeRenderer.getString("label", "").equals("LIVE NOW")) {
cachedStreamType = StreamType.LIVE_STREAM;
return cachedStreamType;
}
}
for (final Object overlay : videoInfo.getArray("thumbnailOverlays")) {
if (!(overlay instanceof JsonObject)) {
continue;
}
final String style = ((JsonObject) overlay)
.getObject("thumbnailOverlayTimeStatusRenderer")
.getString("style", "");
if (style.equalsIgnoreCase("LIVE")) {
cachedStreamType = StreamType.LIVE_STREAM;
return cachedStreamType;
}
}
cachedStreamType = StreamType.VIDEO_STREAM;
return cachedStreamType;
}
@Override
public boolean isAd() throws ParsingException {
return isPremium() || getName().equals("[Private video]")
|| getName().equals("[Deleted video]");
}
@Override
public String getUrl() throws ParsingException {
try {
final String videoId = videoInfo.getString("videoId");
return YoutubeStreamLinkHandlerFactory.getInstance().getUrl(videoId);
} catch (final Exception e) {
throw new ParsingException("Could not get url", e);
}
}
@Override
public String getName() throws ParsingException {
String name = getTextFromObject(videoInfo.getObject("title"));
if (!isNullOrEmpty(name)) {
return name;
}
name = getTextFromObject(videoInfo.getObject("headline"));
if (!isNullOrEmpty(name)) {
return name;
}
throw new ParsingException("Could not get name");
}
@Override
public long getDuration() throws ParsingException {
if (getStreamType() == StreamType.LIVE_STREAM) {
return -1;
}
String duration = getTextFromObject(videoInfo.getObject("lengthText"));
if (isNullOrEmpty(duration)) {
// Available in playlists for videos
duration = videoInfo.getString("lengthSeconds");
if (isNullOrEmpty(duration)) {
final JsonObject timeOverlay = videoInfo.getArray("thumbnailOverlays")
.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.filter(thumbnailOverlay ->
thumbnailOverlay.has("thumbnailOverlayTimeStatusRenderer"))
.findFirst()
.orElse(null);
if (timeOverlay != null) {
duration = getTextFromObject(
timeOverlay.getObject("thumbnailOverlayTimeStatusRenderer")
.getObject("text"));
}
}
if (isNullOrEmpty(duration)) {
if (isPremiere()) {
// Premieres can be livestreams, so the duration is not available in this
// case
return -1;
}
// Duration of short videos in channel tab
// example: "simple is best - 49 seconds - play video"
final String accessibilityLabel = videoInfo.getObject("accessibility")
.getObject("accessibilityData").getString("label");
if (accessibilityLabel == null || timeAgoParser == null) {
return 0;
}
final String[] labelParts = accessibilityLabel.split(" \u2013 ");
if (labelParts.length > 2) {
final String textualDuration = labelParts[labelParts.length - 2];
return timeAgoParser.parseDuration(textualDuration);
}
throw new ParsingException("Could not get duration");
}
}
return YoutubeParsingHelper.parseDurationString(duration);
}
@Override
public String getUploaderName() throws ParsingException {
String name = getTextFromObject(videoInfo.getObject("longBylineText"));
if (isNullOrEmpty(name)) {
name = getTextFromObject(videoInfo.getObject("ownerText"));
if (isNullOrEmpty(name)) {
name = getTextFromObject(videoInfo.getObject("shortBylineText"));
if (isNullOrEmpty(name)) {
throw new ParsingException("Could not get uploader name");
}
}
}
return name;
}
@Override
public String getUploaderUrl() throws ParsingException {
String url = getUrlFromNavigationEndpoint(videoInfo.getObject("longBylineText")
.getArray("runs").getObject(0).getObject("navigationEndpoint"));
if (isNullOrEmpty(url)) {
url = getUrlFromNavigationEndpoint(videoInfo.getObject("ownerText")
.getArray("runs").getObject(0).getObject("navigationEndpoint"));
if (isNullOrEmpty(url)) {
url = getUrlFromNavigationEndpoint(videoInfo.getObject("shortBylineText")
.getArray("runs").getObject(0).getObject("navigationEndpoint"));
if (isNullOrEmpty(url)) {
throw new ParsingException("Could not get uploader url");
}
}
}
return url;
}
@Nullable
@Override
public String getUploaderAvatarUrl() throws ParsingException {
if (videoInfo.has("channelThumbnailSupportedRenderers")) {
return JsonUtils.getArray(videoInfo, "channelThumbnailSupportedRenderers"
+ ".channelThumbnailWithLinkRenderer.thumbnail.thumbnails")
.getObject(0).getString("url");
}
if (videoInfo.has("channelThumbnail")) {
return JsonUtils.getArray(videoInfo, "channelThumbnail.thumbnails")
.getObject(0).getString("url");
}
return null;
}
@Override
public boolean isUploaderVerified() throws ParsingException {
return YoutubeParsingHelper.isVerified(videoInfo.getArray("ownerBadges"));
}
@Nullable
@Override
public String getTextualUploadDate() throws ParsingException {
if (getStreamType().equals(StreamType.LIVE_STREAM)) {
return null;
}
if (isPremiere()) {
return DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm").format(getDateFromPremiere());
}
String publishedTimeText = getTextFromObject(videoInfo.getObject("publishedTimeText"));
if (isNullOrEmpty(publishedTimeText) && videoInfo.has("videoInfo")) {
/*
Returned in playlists, in the form: view count separator upload date
*/
publishedTimeText = videoInfo.getObject("videoInfo")
.getArray("runs")
.getObject(2)
.getString("text");
}
if (isNullOrEmpty(publishedTimeText)) {
publishedTimeText = getTextFromObject(videoInfo
.getObject("navigationEndpoint")
.getObject("reelWatchEndpoint")
.getObject("overlay")
.getObject("reelPlayerOverlayRenderer")
.getObject("reelPlayerHeaderSupportedRenderers")
.getObject("reelPlayerHeaderRenderer")
.getObject("timestampText")
);
}
return isNullOrEmpty(publishedTimeText) ? null : publishedTimeText;
}
@Nullable
@Override
public DateWrapper getUploadDate() throws ParsingException {
if (getStreamType().equals(StreamType.LIVE_STREAM)) {
return null;
}
if (isPremiere()) {
return new DateWrapper(getDateFromPremiere());
}
final String textualUploadDate = getTextualUploadDate();
if (timeAgoParser != null && !isNullOrEmpty(textualUploadDate)) {
try {
return timeAgoParser.parse(textualUploadDate);
} catch (final ParsingException e) {
throw new ParsingException("Could not get upload date", e);
}
}
return null;
}
@Override
public long getViewCount() throws ParsingException {
if (isPremium() || isPremiere()) {
return -1;
}
// Ignore all exceptions, as the view count can be hidden by creators, and so cannot be
// found in this case
final String viewCountText = getTextFromObject(videoInfo.getObject("viewCountText"));
final boolean isReelItem =
videoInfo.getString("videoType", "").startsWith("REEL_");
if (!isNullOrEmpty(viewCountText)) {
try {
return getViewCountFromViewCountText(viewCountText, isReelItem);
} catch (final Exception ignored) {
}
}
// Try parsing the real view count from accessibility data, if that's not a running
// livestream (the view count is returned and not the count of people watching currently
// the livestream)
if (getStreamType() != StreamType.LIVE_STREAM) {
try {
return getViewCountFromAccessibilityData();
} catch (final Exception ignored) {
}
}
// Fallback to a short view count, always used for livestreams (see why above)
if (videoInfo.has("videoInfo")) {
// Returned in playlists, in the form: view count separator upload date
try {
return getViewCountFromViewCountText(videoInfo.getObject("videoInfo")
.getArray("runs")
.getObject(0)
.getString("text", ""), true);
} catch (final Exception ignored) {
}
}
if (videoInfo.has("shortViewCountText")) {
// Returned everywhere but in playlists, used by the website to show view counts
try {
final String shortViewCountText =
getTextFromObject(videoInfo.getObject("shortViewCountText"));
if (!isNullOrEmpty(shortViewCountText)) {
return getViewCountFromViewCountText(shortViewCountText, true);
}
} catch (final Exception ignored) {
}
}
// No view count extracted: return -1, as the view count can be hidden by creators on videos
return -1;
}
private long getViewCountFromViewCountText(@Nonnull final String viewCountText,
final boolean isMixedNumber)
throws NumberFormatException, ParsingException {
// These approaches are language dependent
if (viewCountText.toLowerCase().contains(NO_VIEWS_LOWERCASE)) {
return 0;
} else if (viewCountText.toLowerCase().contains("recommended")) {
return -1;
}
return isMixedNumber ? Utils.mixedNumberWordToLong(viewCountText)
: Long.parseLong(Utils.removeNonDigitCharacters(viewCountText));
}
private long getViewCountFromAccessibilityData()
throws NumberFormatException, Parser.RegexException {
// These approaches are language dependent
final String videoInfoTitleAccessibilityData = videoInfo.getObject("title")
.getObject("accessibility")
.getObject("accessibilityData")
.getString("label", "");
if (videoInfoTitleAccessibilityData.toLowerCase().endsWith(NO_VIEWS_LOWERCASE)) {
return 0;
}
return Long.parseLong(Utils.removeNonDigitCharacters(
Parser.matchGroup1(ACCESSIBILITY_DATA_VIEW_COUNT_REGEX,
videoInfoTitleAccessibilityData)));
}
@Override
public String getThumbnailUrl() throws ParsingException {
return getThumbnailUrlFromInfoItem(videoInfo);
}
private boolean isPremium() {
final JsonArray badges = videoInfo.getArray("badges");
for (final Object badge : badges) {
if (((JsonObject) badge).getObject("metadataBadgeRenderer")
.getString("label", "").equals("Premium")) {
return true;
}
}
return false;
}
private boolean isPremiere() {
if (isPremiere == null) {
isPremiere = videoInfo.has("upcomingEventData");
}
return isPremiere;
}
private OffsetDateTime getDateFromPremiere() throws ParsingException {
final JsonObject upcomingEventData = videoInfo.getObject("upcomingEventData");
final String startTime = upcomingEventData.getString("startTime");
try {
return OffsetDateTime.ofInstant(Instant.ofEpochSecond(Long.parseLong(startTime)),
ZoneOffset.UTC);
} catch (final Exception e) {
throw new ParsingException("Could not parse date from premiere: \"" + startTime + "\"");
}
}
@Nullable
@Override
public String getShortDescription() throws ParsingException {
if (videoInfo.has("detailedMetadataSnippets")) {
return getTextFromObject(videoInfo.getArray("detailedMetadataSnippets")
.getObject(0).getObject("snippetText"));
}
if (videoInfo.has("descriptionSnippet")) {
return getTextFromObject(videoInfo.getObject("descriptionSnippet"));
}
return null;
}
@Override
public boolean isShortFormContent() throws ParsingException {
try {
final String webPageType = videoInfo.getObject("navigationEndpoint")
.getObject("commandMetadata").getObject("webCommandMetadata")
.getString("webPageType");
boolean isShort = !isNullOrEmpty(webPageType)
&& webPageType.equals("WEB_PAGE_TYPE_SHORTS");
if (!isShort) {
isShort = videoInfo.getObject("navigationEndpoint").has("reelWatchEndpoint");
}
if (!isShort) {
final JsonObject thumbnailTimeOverlay = videoInfo.getArray("thumbnailOverlays")
.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.filter(thumbnailOverlay -> thumbnailOverlay.has(
"thumbnailOverlayTimeStatusRenderer"))
.map(thumbnailOverlay -> thumbnailOverlay.getObject(
"thumbnailOverlayTimeStatusRenderer"))
.findFirst()
.orElse(null);
if (!isNullOrEmpty(thumbnailTimeOverlay)) {
isShort = thumbnailTimeOverlay.getString("style", "")
.equalsIgnoreCase("SHORTS")
|| thumbnailTimeOverlay.getObject("icon")
.getString("iconType", "")
.toLowerCase()
.contains("shorts");
}
}
return isShort;
} catch (final Exception e) {
throw new ParsingException("Could not determine if this is short-form content", e);
}
}
}