Move stuff from extractVideoPreviewInfo() into YoutubeStreamInfoItemExtractor and partially fix search

This commit is contained in:
wb9688 2020-02-22 20:19:41 +01:00 committed by TobiGr
parent af49b3c487
commit b88188d419
4 changed files with 118 additions and 354 deletions

View File

@ -4,6 +4,7 @@ import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException; import com.grack.nanojson.JsonParserException;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
@ -23,9 +24,10 @@ import org.schabi.newpipe.extractor.stream.StreamType;
import org.schabi.newpipe.extractor.utils.Parser; import org.schabi.newpipe.extractor.utils.Parser;
import org.schabi.newpipe.extractor.utils.Utils; import org.schabi.newpipe.extractor.utils.Utils;
import java.io.IOException;
import javax.annotation.Nonnull; import javax.annotation.Nonnull;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.io.IOException;
@SuppressWarnings("WeakerAccess") @SuppressWarnings("WeakerAccess")
public class YoutubePlaylistExtractor extends PlaylistExtractor { public class YoutubePlaylistExtractor extends PlaylistExtractor {
@ -318,7 +320,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
} }
@Override @Override
public String getTextualUploadDate() throws ParsingException { public String getTextualUploadDate() {
return ""; return "";
} }

View File

@ -1,5 +1,10 @@
package org.schabi.newpipe.extractor.services.youtube.extractors; package org.schabi.newpipe.extractor.services.youtube.extractors;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
@ -16,12 +21,13 @@ import org.schabi.newpipe.extractor.search.SearchExtractor;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.utils.Parser; import org.schabi.newpipe.extractor.utils.Parser;
import javax.annotation.Nonnull;
import java.io.IOException; import java.io.IOException;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import javax.annotation.Nonnull;
/* /*
* Created by Christian Schabesberger on 22.07.2018 * Created by Christian Schabesberger on 22.07.2018
* *
@ -45,6 +51,7 @@ import java.net.URL;
public class YoutubeSearchExtractor extends SearchExtractor { public class YoutubeSearchExtractor extends SearchExtractor {
private Document doc; private Document doc;
private JsonObject ytInitialData;
public YoutubeSearchExtractor(StreamingService service, SearchQueryHandler linkHandler) { public YoutubeSearchExtractor(StreamingService service, SearchQueryHandler linkHandler) {
super(service, linkHandler); super(service, linkHandler);
@ -55,6 +62,7 @@ public class YoutubeSearchExtractor extends SearchExtractor {
final String url = getUrl(); final String url = getUrl();
final Response response = downloader.get(url, getExtractorLocalization()); final Response response = downloader.get(url, getExtractorLocalization());
doc = YoutubeParsingHelper.parseAndCheckPage(url, response); doc = YoutubeParsingHelper.parseAndCheckPage(url, response);
ytInitialData = getInitialData();
} }
@Nonnull @Nonnull
@ -86,6 +94,7 @@ public class YoutubeSearchExtractor extends SearchExtractor {
@Override @Override
public InfoItemsPage<InfoItem> getPage(String pageUrl) throws IOException, ExtractionException { public InfoItemsPage<InfoItem> getPage(String pageUrl) throws IOException, ExtractionException {
// TODO: Get extracting next pages working
final String response = getDownloader().get(pageUrl, getExtractorLocalization()).responseBody(); final String response = getDownloader().get(pageUrl, getExtractorLocalization()).responseBody();
doc = Jsoup.parse(response, pageUrl); doc = Jsoup.parse(response, pageUrl);
@ -108,37 +117,33 @@ public class YoutubeSearchExtractor extends SearchExtractor {
InfoItemsSearchCollector collector = getInfoItemSearchCollector(); InfoItemsSearchCollector collector = getInfoItemSearchCollector();
collector.reset(); collector.reset();
Element list = doc.select("ol[class=\"item-section\"]").first();
final TimeAgoParser timeAgoParser = getTimeAgoParser(); final TimeAgoParser timeAgoParser = getTimeAgoParser();
for (Element item : list.children()) { JsonArray list = ytInitialData.getObject("contents").getObject("twoColumnSearchResultsRenderer")
/* First we need to determine which kind of item we are working with. .getObject("primaryContents").getObject("sectionListRenderer").getArray("contents")
Youtube depicts five different kinds of items on its search result page. These are .getObject(0).getObject("itemSectionRenderer").getArray("contents");
regular videos, playlists, channels, two types of video suggestions, and a "no video
found" item. Since we only want videos, we need to filter out all the others.
An example for this can be seen here:
https://www.youtube.com/results?search_query=asdf&page=1
We already applied a filter to the url, so we don't need to care about channels and for (Object item : list) {
playlists now. if (((JsonObject) item).getObject("backgroundPromoRenderer") != null) {
*/ throw new NothingFoundException(((JsonObject) item).getObject("backgroundPromoRenderer")
.getObject("bodyText").getArray("runs").getObject(0).getString("text"));
Element el; } else if (((JsonObject) item).getObject("videoRenderer") != null) {
collector.commit(new YoutubeStreamInfoItemExtractor(((JsonObject) item).getObject("videoRenderer"), timeAgoParser));
if ((el = item.select("div[class*=\"search-message\"]").first()) != null) { } else if (((JsonObject) item).getObject("channelRenderer") != null) {
throw new NothingFoundException(el.text()); // collector.commit(new YoutubeChannelInfoItemExtractor(((JsonObject) item).getObject("channelRenderer")));
} else if (((JsonObject) item).getObject("playlistRenderer") != null) {
// video item type // collector.commit(new YoutubePlaylistInfoItemExtractor(((JsonObject) item).getObject("playlistRenderer")));
} else if ((el = item.select("div[class*=\"yt-lockup-video\"]").first()) != null) {
collector.commit(new YoutubeStreamInfoItemExtractor(el, timeAgoParser));
} else if ((el = item.select("div[class*=\"yt-lockup-channel\"]").first()) != null) {
collector.commit(new YoutubeChannelInfoItemExtractor(el));
} else if ((el = item.select("div[class*=\"yt-lockup-playlist\"]").first()) != null &&
item.select(".yt-pl-icon-mix").isEmpty()) {
collector.commit(new YoutubePlaylistInfoItemExtractor(el));
} }
} }
return collector; return collector;
} }
private JsonObject getInitialData() throws ParsingException {
try {
String initialData = Parser.matchGroup1("window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", doc.toString());
return JsonParser.object().from(initialData);
} catch (JsonParserException | Parser.RegexException e) {
throw new ParsingException("Could not get ytInitialData", e);
}
}
} }

View File

@ -4,10 +4,10 @@ import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException; import com.grack.nanojson.JsonParserException;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.mozilla.javascript.Context; import org.mozilla.javascript.Context;
import org.mozilla.javascript.Function; import org.mozilla.javascript.Function;
import org.mozilla.javascript.ScriptableObject; import org.mozilla.javascript.ScriptableObject;
@ -25,24 +25,38 @@ import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
import org.schabi.newpipe.extractor.localization.DateWrapper; import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.localization.TimeAgoParser; import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.services.youtube.ItagItem; import org.schabi.newpipe.extractor.services.youtube.ItagItem;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory; import org.schabi.newpipe.extractor.stream.AudioStream;
import org.schabi.newpipe.extractor.stream.*; import org.schabi.newpipe.extractor.stream.Description;
import org.schabi.newpipe.extractor.stream.Frameset;
import org.schabi.newpipe.extractor.stream.Stream;
import org.schabi.newpipe.extractor.stream.StreamExtractor;
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
import org.schabi.newpipe.extractor.stream.StreamType;
import org.schabi.newpipe.extractor.stream.SubtitlesStream;
import org.schabi.newpipe.extractor.stream.VideoStream;
import org.schabi.newpipe.extractor.utils.JsonUtils; import org.schabi.newpipe.extractor.utils.JsonUtils;
import org.schabi.newpipe.extractor.utils.Parser; import org.schabi.newpipe.extractor.utils.Parser;
import org.schabi.newpipe.extractor.utils.Utils; import org.schabi.newpipe.extractor.utils.Utils;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.IOException; import java.io.IOException;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.util.*; import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
/* /*
* Created by Christian Schabesberger on 06.08.15. * Created by Christian Schabesberger on 06.08.15.
* *
@ -663,7 +677,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
final TimeAgoParser timeAgoParser = getTimeAgoParser(); final TimeAgoParser timeAgoParser = getTimeAgoParser();
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
collector.commit(extractVideoPreviewInfo(videoInfo, timeAgoParser)); collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser));
return collector.getItems().get(0); return collector.getItems().get(0);
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get next video", e); throw new ParsingException("Could not get next video", e);
@ -684,7 +698,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
for (Object ul : results) { for (Object ul : results) {
final JsonObject videoInfo = ((JsonObject) ul).getObject("compactVideoRenderer"); final JsonObject videoInfo = ((JsonObject) ul).getObject("compactVideoRenderer");
if (videoInfo != null) collector.commit(extractVideoPreviewInfo(videoInfo, timeAgoParser)); if (videoInfo != null) collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser));
} }
return collector; return collector;
} catch (Exception e) { } catch (Exception e) {
@ -1064,123 +1078,6 @@ public class YoutubeStreamExtractor extends StreamExtractor {
return urlAndItags; return urlAndItags;
} }
/**
* Provides information about links to other videos on the video page, such as related videos.
* This is encapsulated in a StreamInfoItem object, which is a subset of the fields in a full StreamInfo.
*/
private StreamInfoItemExtractor extractVideoPreviewInfo(final JsonObject videoInfo, final TimeAgoParser timeAgoParser) {
return new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser) {
@Override
public StreamType getStreamType() {
try {
if (videoInfo.getArray("badges").getObject(0).getObject("metadataBadgeRenderer").getString("label").equals("LIVE NOW")) {
return StreamType.LIVE_STREAM;
}
} catch (Exception ignored) {}
return StreamType.VIDEO_STREAM;
}
@Override
public boolean isAd() {
return false;
}
@Override
public String getUrl() throws ParsingException {
try {
String videoId = videoInfo.getString("videoId");
return YoutubeStreamLinkHandlerFactory.getInstance().getUrl(videoId);
} catch (Exception e) {
throw new ParsingException("Could not get url", e);
}
}
@Override
public String getName() throws ParsingException {
String name = null;
try {
name = videoInfo.getObject("title").getString("simpleText");
} catch (Exception ignored) {}
if (name != null && !name.isEmpty()) return name;
throw new ParsingException("Could not get title");
}
@Override
public long getDuration() throws ParsingException {
try {
if (getStreamType() == StreamType.LIVE_STREAM) return -1;
return YoutubeParsingHelper.parseDurationString(videoInfo.getObject("lengthText").getString("simpleText"));
} catch (Exception e) {
throw new ParsingException("Could not get duration", e);
}
}
@Override
public String getUploaderUrl() throws ParsingException {
try {
String id = videoInfo.getObject("longBylineText").getArray("runs")
.getObject(0).getObject("navigationEndpoint")
.getObject("browseEndpoint").getString("browseId");
if (id == null || id.isEmpty()) {
throw new IllegalArgumentException("is empty");
}
return YoutubeChannelLinkHandlerFactory.getInstance().getUrl(id);
} catch (Exception e) {
throw new ParsingException("Could not get uploader url");
}
}
@Nullable
@Override
public String getTextualUploadDate() {
return null;
}
@Nullable
@Override
public DateWrapper getUploadDate() {
return null;
}
@Override
public long getViewCount() throws ParsingException {
try {
String viewCount;
if (getStreamType() == StreamType.LIVE_STREAM) {
viewCount = videoInfo.getObject("viewCountText")
.getArray("runs").getObject(0).getString("text");
} else {
viewCount = videoInfo.getObject("viewCountText").getString("simpleText");
}
if (viewCount.equals("Recommended for you")) return -1;
return Long.parseLong(Utils.removeNonDigitCharacters(viewCount));
} catch (Exception e) {
throw new ParsingException("Could not get view count", e);
}
}
@Override
public String getUploaderName() throws ParsingException {
try {
return videoInfo.getObject("longBylineText").getArray("runs")
.getObject(0).getString("text");
} catch (Exception e) {
throw new ParsingException("Could not get uploader name", e);
}
}
@Override
public String getThumbnailUrl() throws ParsingException {
try {
return videoInfo.getObject("thumbnail").getArray("thumbnails")
.getObject(0).getString("url");
} catch (Exception e) {
throw new ParsingException("Could not get thumbnail url", e);
}
}
};
}
@Nonnull @Nonnull
@Override @Override
public List<Frameset> getFrames() throws ExtractionException { public List<Frameset> getFrames() throws ExtractionException {

View File

@ -2,19 +2,17 @@ package org.schabi.newpipe.extractor.services.youtube.extractors;
import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonObject;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper; import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.localization.TimeAgoParser; import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory;
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor; import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
import org.schabi.newpipe.extractor.stream.StreamType; import org.schabi.newpipe.extractor.stream.StreamType;
import org.schabi.newpipe.extractor.utils.Utils; import org.schabi.newpipe.extractor.utils.Utils;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
/* /*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org> * Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
@ -36,20 +34,10 @@ import java.util.Date;
public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
private JsonObject videoInfoItem; private JsonObject videoInfo;
private Element item;
private final TimeAgoParser timeAgoParser; private final TimeAgoParser timeAgoParser;
private String cachedUploadDate; public YoutubeStreamInfoItemExtractor(Element a, @Nullable TimeAgoParser timeAgoParser) {
/**
* Creates an extractor of StreamInfoItems from a YouTube page.
*
* @param item The page element
* @param timeAgoParser A parser of the textual dates or {@code null}.
*/
public YoutubeStreamInfoItemExtractor(Element item, @Nullable TimeAgoParser timeAgoParser) {
this.item = item;
this.timeAgoParser = timeAgoParser; this.timeAgoParser = timeAgoParser;
} }
@ -60,251 +48,123 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
* @param timeAgoParser A parser of the textual dates or {@code null}. * @param timeAgoParser A parser of the textual dates or {@code null}.
*/ */
public YoutubeStreamInfoItemExtractor(JsonObject videoInfoItem, @Nullable TimeAgoParser timeAgoParser) { public YoutubeStreamInfoItemExtractor(JsonObject videoInfoItem, @Nullable TimeAgoParser timeAgoParser) {
this.videoInfoItem = videoInfoItem; this.videoInfo = videoInfoItem;
this.timeAgoParser = timeAgoParser; this.timeAgoParser = timeAgoParser;
} }
@Override @Override
public StreamType getStreamType() throws ParsingException { public StreamType getStreamType() {
if (isLiveStream(item)) { try {
return StreamType.LIVE_STREAM; if (videoInfo.getArray("badges").getObject(0).getObject("metadataBadgeRenderer").getString("label").equals("LIVE NOW")) {
} else { return StreamType.LIVE_STREAM;
return StreamType.VIDEO_STREAM; }
} } catch (Exception ignored) {}
return StreamType.VIDEO_STREAM;
} }
@Override @Override
public boolean isAd() throws ParsingException { public boolean isAd() {
return !item.select("span[class*=\"icon-not-available\"]").isEmpty() return false;
|| !item.select("span[class*=\"yt-badge-ad\"]").isEmpty()
|| isPremiumVideo();
}
private boolean isPremiumVideo() {
Element premiumSpan = item.select("span[class=\"standalone-collection-badge-renderer-red-text\"]").first();
if (premiumSpan == null) return false;
// if this span has text it most likely says ("Free Video") so we can play this
if (premiumSpan.hasText()) return false;
return true;
} }
@Override @Override
public String getUrl() throws ParsingException { public String getUrl() throws ParsingException {
try { try {
Element el = item.select("div[class*=\"yt-lockup-video\"]").first(); String videoId = videoInfo.getString("videoId");
Element dl = el.select("h3").first().select("a").first(); return YoutubeStreamLinkHandlerFactory.getInstance().getUrl(videoId);
return dl.attr("abs:href");
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get web page url for the video", e); throw new ParsingException("Could not get url", e);
} }
} }
@Override @Override
public String getName() throws ParsingException { public String getName() throws ParsingException {
String name = null;
try { try {
Element el = item.select("div[class*=\"yt-lockup-video\"]").first(); name = videoInfo.getObject("title").getString("simpleText");
Element dl = el.select("h3").first().select("a").first(); } catch (Exception ignored) {}
return dl.text(); if (name == null) {
} catch (Exception e) { try {
throw new ParsingException("Could not get title", e); name = videoInfo.getObject("title").getArray("runs").getObject(0).getString("text");
} catch (Exception ignored) {}
} }
if (name != null && !name.isEmpty()) return name;
throw new ParsingException("Could not get name");
} }
@Override @Override
public long getDuration() throws ParsingException { public long getDuration() throws ParsingException {
try { try {
if (getStreamType() == StreamType.LIVE_STREAM) return -1; if (getStreamType() == StreamType.LIVE_STREAM) return -1;
return YoutubeParsingHelper.parseDurationString(videoInfo.getObject("lengthText").getString("simpleText"));
final Element duration = item.select("span[class*=\"video-time\"]").first();
// apparently on youtube, video-time element will not show up if the video has a duration of 00:00
// see: https://www.youtube.com/results?sp=EgIQAVAU&q=asdfgf
return duration == null ? 0 : YoutubeParsingHelper.parseDurationString(duration.text());
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get Duration: " + getUrl(), e); throw new ParsingException("Could not get duration", e);
} }
} }
@Override @Override
public String getUploaderName() throws ParsingException { public String getUploaderName() throws ParsingException {
try { try {
return item.select("div[class=\"yt-lockup-byline\"]").first() return videoInfo.getObject("longBylineText").getArray("runs")
.select("a").first() .getObject(0).getString("text");
.text();
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get uploader", e); throw new ParsingException("Could not get uploader name", e);
} }
} }
@Override @Override
public String getUploaderUrl() throws ParsingException { public String getUploaderUrl() throws ParsingException {
// this url is not always in the form "/channel/..."
// sometimes Youtube provides urls in the from "/user/..."
try { try {
try { String id = videoInfo.getObject("longBylineText").getArray("runs")
return item.select("div[class=\"yt-lockup-byline\"]").first() .getObject(0).getObject("navigationEndpoint")
.select("a").first() .getObject("browseEndpoint").getString("browseId");
.attr("abs:href"); if (id == null || id.isEmpty()) {
} catch (Exception e){} throw new IllegalArgumentException("is empty");
// try this if the first didn't work
return item.select("span[class=\"title\"")
.text().split(" - ")[0];
} catch (Exception e) {
System.out.println(item.html());
throw new ParsingException("Could not get uploader url", e);
}
}
@Nullable
@Override
public String getTextualUploadDate() throws ParsingException {
if (getStreamType().equals(StreamType.LIVE_STREAM)) {
return null;
}
if (cachedUploadDate != null) {
return cachedUploadDate;
}
try {
if (isVideoReminder()) {
final Calendar calendar = getDateFromReminder();
if (calendar != null) {
return cachedUploadDate = new SimpleDateFormat("yyyy-MM-dd HH:mm")
.format(calendar.getTime());
}
} }
return YoutubeChannelLinkHandlerFactory.getInstance().getUrl(id);
Element meta = item.select("div[class=\"yt-lockup-meta\"]").first();
if (meta == null) return "";
final Elements li = meta.select("li");
if (li.isEmpty()) return "";
return cachedUploadDate = li.first().text();
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get upload date", e); throw new ParsingException("Could not get uploader url");
} }
} }
@Nullable @Nullable
@Override @Override
public DateWrapper getUploadDate() throws ParsingException { public String getTextualUploadDate() {
if (getStreamType().equals(StreamType.LIVE_STREAM)) { // TODO: Get upload date in case of a videoRenderer (not available in case of a compactVideoRenderer)
return null; return null;
} }
if (isVideoReminder()) { @Nullable
return new DateWrapper(getDateFromReminder()); @Override
} public DateWrapper getUploadDate() {
return null;
String textualUploadDate = getTextualUploadDate();
if (timeAgoParser != null && textualUploadDate != null && !textualUploadDate.isEmpty()) {
return timeAgoParser.parse(textualUploadDate);
} else {
return null;
}
} }
@Override @Override
public long getViewCount() throws ParsingException { public long getViewCount() throws ParsingException {
String input;
final Element spanViewCount = item.select("span.view-count").first();
if (spanViewCount != null) {
input = spanViewCount.text();
} else if (getStreamType().equals(StreamType.LIVE_STREAM)) {
Element meta = item.select("ul.yt-lockup-meta-info").first();
if (meta == null) return 0;
final Elements li = meta.select("li");
if (li.isEmpty()) return 0;
input = li.first().text();
} else {
try {
Element meta = item.select("div.yt-lockup-meta").first();
if (meta == null) return -1;
// This case can happen if google releases a special video
if (meta.select("li").size() < 2) return -1;
input = meta.select("li").get(1).text();
} catch (IndexOutOfBoundsException e) {
throw new ParsingException("Could not parse yt-lockup-meta although available: " + getUrl(), e);
}
}
if (input == null) {
throw new ParsingException("Input is null");
}
try { try {
String viewCount;
return Long.parseLong(Utils.removeNonDigitCharacters(input)); if (getStreamType() == StreamType.LIVE_STREAM) {
} catch (NumberFormatException e) { viewCount = videoInfo.getObject("viewCountText")
// if this happens the video probably has no views .getArray("runs").getObject(0).getString("text");
if (!input.isEmpty()) { } else {
return 0; viewCount = videoInfo.getObject("viewCountText").getString("simpleText");
} }
if (viewCount.equals("Recommended for you")) return -1;
throw new ParsingException("Could not handle input: " + input, e); return Long.parseLong(Utils.removeNonDigitCharacters(viewCount));
} catch (Exception e) {
throw new ParsingException("Could not get view count", e);
} }
} }
@Override @Override
public String getThumbnailUrl() throws ParsingException { public String getThumbnailUrl() throws ParsingException {
try { try {
String url; // TODO: Don't simply get the first item, but look at all thumbnails and their resolution
Element te = item.select("div[class=\"yt-thumb video-thumb\"]").first() return videoInfo.getObject("thumbnail").getArray("thumbnails")
.select("img").first(); .getObject(0).getString("url");
url = te.attr("abs:src");
// Sometimes youtube sends links to gif files which somehow seem to not exist
// anymore. Items with such gif also offer a secondary image source. So we are going
// to use that if we've caught such an item.
if (url.contains(".gif")) {
url = te.attr("abs:data-thumb");
}
return url;
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get thumbnail url", e); throw new ParsingException("Could not get thumbnail url", e);
} }
} }
private boolean isVideoReminder() {
return !item.select("span.yt-uix-livereminder").isEmpty();
}
private Calendar getDateFromReminder() throws ParsingException {
final Element timeFuture = item.select("span.yt-badge.localized-date").first();
if (timeFuture == null) {
throw new ParsingException("Span timeFuture is null");
}
final String timestamp = timeFuture.attr("data-timestamp");
if (!timestamp.isEmpty()) {
try {
final Calendar calendar = Calendar.getInstance();
calendar.setTime(new Date(Long.parseLong(timestamp) * 1000L));
return calendar;
} catch (Exception e) {
throw new ParsingException("Could not parse = \"" + timestamp + "\"");
}
}
throw new ParsingException("Could not parse date from reminder element: \"" + timeFuture + "\"");
}
/**
* Generic method that checks if the element contains any clues that it's a livestream item
*/
protected static boolean isLiveStream(Element item) {
return !item.select("span[class*=\"yt-badge-live\"]").isEmpty()
|| !item.select("span[class*=\"video-time-overlay-live\"]").isEmpty();
}
} }