From 39bf1ff1eba51a059944d9f54872f9596823f8d1 Mon Sep 17 00:00:00 2001 From: TobiGr Date: Mon, 17 Feb 2020 09:55:06 +0100 Subject: [PATCH 01/53] Add ytInitialData regex --- .../youtube/extractors/YoutubeChannelExtractor.java | 11 +++++++++++ .../youtube/extractors/YoutubeStreamExtractor.java | 11 +++++++++++ .../java/org/schabi/newpipe/DownloaderTestImpl.java | 2 +- 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index cc37cbdab..24c7f4d10 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -49,6 +49,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor { private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000"; private Document doc; + private JsonObject ytInitialData; public YoutubeChannelExtractor(StreamingService service, ListLinkHandler linkHandler) { super(service, linkHandler); @@ -59,6 +60,16 @@ public class YoutubeChannelExtractor extends ChannelExtractor { String channelUrl = super.getUrl() + CHANNEL_URL_PARAMETERS; final Response response = downloader.get(channelUrl, getExtractorLocalization()); doc = YoutubeParsingHelper.parseAndCheckPage(channelUrl, response); + ytInitialData = getInitialData(); + } + + private JsonObject getInitialData() throws ParsingException { + try { + String initialData = Parser.matchGroup1("window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", doc.toString()); + return JsonParser.object().from(initialData); + } catch (JsonParserException | Parser.RegexException e) { + throw new ParsingException("Could not get ytInitialData", e); + } } @Override diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 9568c7ff5..65946436c 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -88,6 +88,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Nonnull private final Map videoInfoPage = new HashMap<>(); private JsonObject playerResponse; + private JsonObject ytInitialData; @Nonnull private List subtitlesInfos = new ArrayList<>(); @@ -736,6 +737,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { isAgeRestricted = false; } playerResponse = getPlayerResponse(); + ytInitialData = getInitialData(); if (decryptionCode.isEmpty()) { decryptionCode = loadDecryptionCode(playerUrl); @@ -809,6 +811,15 @@ public class YoutubeStreamExtractor extends StreamExtractor { } } + private JsonObject getInitialData() throws ParsingException { + try { + String initialData = Parser.matchGroup1("window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", doc.toString()); + return JsonParser.object().from(initialData); + } catch (JsonParserException | Parser.RegexException e) { + throw new ParsingException("Could not get ytInitialData", e); + } + } + @Nonnull private EmbeddedInfo getEmbeddedInfo() throws ParsingException, ReCaptchaException { try { diff --git a/extractor/src/test/java/org/schabi/newpipe/DownloaderTestImpl.java b/extractor/src/test/java/org/schabi/newpipe/DownloaderTestImpl.java index 948975a05..4fdab77e3 100644 --- a/extractor/src/test/java/org/schabi/newpipe/DownloaderTestImpl.java +++ b/extractor/src/test/java/org/schabi/newpipe/DownloaderTestImpl.java @@ -20,7 +20,7 @@ import java.util.Map; public class DownloaderTestImpl extends Downloader { - private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0"; + private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:54.0) Gecko/20100101 Firefox/73.0"; private static final String DEFAULT_HTTP_ACCEPT_LANGUAGE = "en"; private static DownloaderTestImpl instance = null; From be3e20e263a88ed63c901164e6875ddf82279210 Mon Sep 17 00:00:00 2001 From: TobiGr Date: Mon, 17 Feb 2020 11:02:43 +0100 Subject: [PATCH 02/53] Adapt most channel changes --- .../extractors/YoutubeChannelExtractor.java | 37 +++++++++++++------ .../schabi/newpipe/extractor/utils/Utils.java | 6 +-- 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index 24c7f4d10..103a59c44 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -23,6 +23,9 @@ import org.schabi.newpipe.extractor.utils.Utils; import javax.annotation.Nonnull; import java.io.IOException; +import static org.schabi.newpipe.extractor.utils.Utils.HTTP; +import static org.schabi.newpipe.extractor.utils.Utils.HTTPS; + /* * Created by Christian Schabesberger on 25.07.16. * @@ -91,7 +94,10 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public String getId() throws ParsingException { try { - return doc.select("meta[itemprop=\"channelId\"]").first().attr("content"); + return doc.select("meta[property=\"og:url\"]").first().attr("content").replace(CHANNEL_URL_BASE, ""); + } catch (Exception ignored) {} + try { + return ytInitialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("navigationEndpoint").getObject("browseEndpoint").getString("browseId"); } catch (Exception ignored) {} // fallback method; does not work with channels that have no "Subscribe" button (e.g. EminemVEVO) @@ -118,7 +124,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public String getAvatarUrl() throws ParsingException { try { - return doc.select("img[class=\"channel-header-profile-image\"]").first().attr("abs:src"); + return ytInitialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("avatar").getArray("thumbnails").getObject(0).getString("url"); } catch (Exception e) { throw new ParsingException("Could not get avatar", e); } @@ -127,11 +133,21 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public String getBannerUrl() throws ParsingException { try { - Element el = doc.select("div[id=\"gh-banner\"]").first().select("style").first(); - String cssContent = el.html(); - String url = "https:" + Parser.matchGroup1("url\\(([^)]+)\\)", cssContent); + String url = ytInitialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("banner").getArray("thumbnails").getObject(0).getString("url"); + if (url.contains("s.ytimg.com") || url.contains("default_banner")) { + return null; + } + // the first characters of the banner URLs are different for each channel and some are not even valid URLs + if (url.startsWith("//")) { + url = url.substring(2); + } + if (url.startsWith(HTTP)) { + url = Utils.replaceHttpWithHttps(url); + } else if (!url.startsWith(HTTPS)) { + url = HTTPS + url; + } - return url.contains("s.ytimg.com") || url.contains("default_banner") ? null : url; + return url; } catch (Exception e) { throw new ParsingException("Could not get Banner", e); } @@ -149,11 +165,10 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public long getSubscriberCount() throws ParsingException { - final Element el = doc.select("span[class*=\"yt-subscription-button-subscriber-count\"]").first(); - if (el != null) { - String elTitle = el.attr("title"); + final String simpleText = ytInitialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("subscriberCountText").getArray("runs").getObject(0).getString("text"); + if (simpleText != null) { try { - return Utils.mixedNumberWordToLong(elTitle); + return Utils.mixedNumberWordToLong(simpleText); } catch (NumberFormatException e) { throw new ParsingException("Could not get subscriber count", e); } @@ -166,7 +181,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public String getDescription() throws ParsingException { try { - return doc.select("meta[name=\"description\"]").first().attr("content"); + return ytInitialData.getObject("metadata").getObject("channelMetadataRenderer").getString("description"); } catch (Exception e) { throw new ParsingException("Could not get channel description", e); } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java index ebd0ba16a..76aa2944f 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java @@ -10,6 +10,9 @@ import java.util.List; public class Utils { + public static final String HTTP = "http://"; + public static final String HTTPS = "https://"; + private Utils() { //no instance } @@ -83,9 +86,6 @@ public class Utils { } } - private static final String HTTP = "http://"; - private static final String HTTPS = "https://"; - public static String replaceHttpWithHttps(final String url) { if (url == null) return null; From 7dcc9f159bb7280fd97f48af42cf5c2b3c182e27 Mon Sep 17 00:00:00 2001 From: TobiGr Date: Mon, 17 Feb 2020 11:41:11 +0100 Subject: [PATCH 03/53] Stream: fix uploader avatar url --- .../youtube/extractors/YoutubeStreamExtractor.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 65946436c..b60fa8751 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -476,9 +476,11 @@ public class YoutubeStreamExtractor extends StreamExtractor { String uploaderAvatarUrl = null; try { - uploaderAvatarUrl = doc.select("a[class*=\"yt-user-photo\"]").first() - .select("img").first() - .attr("abs:data-thumb"); + uploaderAvatarUrl = ytInitialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("secondaryResults") + .getObject("secondaryResults").getArray("results").getObject(0).getObject("compactAutoplayRenderer") + .getArray("contents").getObject(0).getObject("compactVideoRenderer").getObject("channelThumbnail") + .getArray("thumbnails").getObject(0).getString("url"); + } catch (Exception e) {//todo: add fallback method throw new ParsingException("Could not get uploader avatar url", e); } From 655c9997956147d2f16ef577e2c271d5ecb5d522 Mon Sep 17 00:00:00 2001 From: TobiGr Date: Mon, 17 Feb 2020 17:30:22 +0100 Subject: [PATCH 04/53] Detect disabled subriber count correctly Fix parsing of kiosk name --- .../extractors/YoutubeChannelExtractor.java | 7 ++--- .../extractors/YoutubeTrendingExtractor.java | 26 ++++++++++++++++--- 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index 103a59c44..544a9e461 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -165,10 +165,11 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public long getSubscriberCount() throws ParsingException { - final String simpleText = ytInitialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("subscriberCountText").getArray("runs").getObject(0).getString("text"); - if (simpleText != null) { + final JsonObject subscriberInfo = ytInitialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("subscriberCountText"); + if (subscriberInfo != null) { try { - return Utils.mixedNumberWordToLong(simpleText); + + return Utils.mixedNumberWordToLong(subscriberInfo.getArray("runs").getObject(0).getString("text")); } catch (NumberFormatException e) { throw new ParsingException("Could not get subscriber count", e); } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java index be5820de7..44f9c7e5a 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java @@ -20,6 +20,9 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; * along with NewPipe. If not, see . */ +import com.grack.nanojson.JsonObject; +import com.grack.nanojson.JsonParser; +import com.grack.nanojson.JsonParserException; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; @@ -34,6 +37,7 @@ import org.schabi.newpipe.extractor.localization.TimeAgoParser; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; import org.schabi.newpipe.extractor.stream.StreamInfoItem; import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; +import org.schabi.newpipe.extractor.utils.Parser; import javax.annotation.Nonnull; import java.io.IOException; @@ -41,6 +45,7 @@ import java.io.IOException; public class YoutubeTrendingExtractor extends KioskExtractor { private Document doc; + private JsonObject initialData; public YoutubeTrendingExtractor(StreamingService service, ListLinkHandler linkHandler, @@ -55,6 +60,16 @@ public class YoutubeTrendingExtractor extends KioskExtractor { final Response response = downloader.get(url, getExtractorLocalization()); doc = YoutubeParsingHelper.parseAndCheckPage(url, response); + initialData = getInitialData(); + } + + private JsonObject getInitialData() throws ParsingException { + try { + String initialData = Parser.matchGroup1("window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", doc.toString()); + return JsonParser.object().from(initialData); + } catch (JsonParserException | Parser.RegexException e) { + throw new ParsingException("Could not get ytInitialData", e); + } } @Override @@ -70,14 +85,17 @@ public class YoutubeTrendingExtractor extends KioskExtractor { @Nonnull @Override public String getName() throws ParsingException { + String name; try { - Element a = doc.select("a[href*=\"/feed/trending\"]").first(); - Element span = a.select("span[class*=\"display-name\"]").first(); - Element nameSpan = span.select("span").first(); - return nameSpan.text(); + name = initialData.getObject("header").getObject("feedTabbedHeaderRenderer").getObject("title") + .getArray("runs").getObject(0).getString("text"); } catch (Exception e) { throw new ParsingException("Could not get Trending name", e); } + if (name != null && !name.isEmpty()) { + return name; + } + throw new ParsingException("Could not get Trending name"); } @Nonnull From 21253abd9b6d45695198733c16e638d67e597eb8 Mon Sep 17 00:00:00 2001 From: TobiGr Date: Mon, 17 Feb 2020 17:52:29 +0100 Subject: [PATCH 05/53] Use Firefox ESR User-Agent: 68 --- .../src/test/java/org/schabi/newpipe/DownloaderTestImpl.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extractor/src/test/java/org/schabi/newpipe/DownloaderTestImpl.java b/extractor/src/test/java/org/schabi/newpipe/DownloaderTestImpl.java index 4fdab77e3..e524ac8d4 100644 --- a/extractor/src/test/java/org/schabi/newpipe/DownloaderTestImpl.java +++ b/extractor/src/test/java/org/schabi/newpipe/DownloaderTestImpl.java @@ -20,7 +20,7 @@ import java.util.Map; public class DownloaderTestImpl extends Downloader { - private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:54.0) Gecko/20100101 Firefox/73.0"; + private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:68.0) Gecko/20100101 Firefox/68.0"; private static final String DEFAULT_HTTP_ACCEPT_LANGUAGE = "en"; private static DownloaderTestImpl instance = null; From b705515da06ff2ebca28bed2bb513e4f94f6a94a Mon Sep 17 00:00:00 2001 From: TobiGr Date: Mon, 17 Feb 2020 18:58:12 +0100 Subject: [PATCH 06/53] Adapt most playlist fields --- .../extractors/YoutubePlaylistExtractor.java | 101 ++++++++++++++---- 1 file changed, 79 insertions(+), 22 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java index 520bda80c..692b9cc3a 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java @@ -1,5 +1,6 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; +import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParserException; @@ -19,6 +20,7 @@ import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingH import org.schabi.newpipe.extractor.stream.StreamInfoItem; import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; import org.schabi.newpipe.extractor.stream.StreamType; +import org.schabi.newpipe.extractor.utils.Parser; import org.schabi.newpipe.extractor.utils.Utils; import javax.annotation.Nonnull; @@ -29,6 +31,10 @@ import java.io.IOException; public class YoutubePlaylistExtractor extends PlaylistExtractor { private Document doc; + private JsonObject initialData; + private JsonObject uploaderInfo; + private JsonObject playlistInfo; + private JsonObject playlistVideos; public YoutubePlaylistExtractor(StreamingService service, ListLinkHandler linkHandler) { super(service, linkHandler); @@ -39,6 +45,62 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { final String url = getUrl(); final Response response = downloader.get(url, getExtractorLocalization()); doc = YoutubeParsingHelper.parseAndCheckPage(url, response); + initialData = getInitialData(); + uploaderInfo = getUploaderInfo(); + playlistInfo = getPlaylistInfo(); + playlistVideos = getPlaylistVideos(); + } + + private JsonObject getInitialData() throws ParsingException { + try { + String initialData = Parser.matchGroup1("window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", doc.toString()); + return JsonParser.object().from(initialData); + } catch (JsonParserException | Parser.RegexException e) { + throw new ParsingException("Could not get ytInitialData", e); + } + } + + private JsonObject getUploaderInfo() throws ParsingException { + JsonArray items = initialData.getObject("sidebar").getObject("playlistSidebarRenderer").getArray("items"); + try { + JsonObject uploaderInfo = items.getObject(1).getObject("playlistSidebarSecondaryInfoRenderer") + .getObject("videoOwner").getObject("videoOwnerRenderer"); + if (uploaderInfo != null) { + return uploaderInfo; + } + } catch (Exception ignored) {} + + // we might want to create a loop here instead of using duplicated code + try { + JsonObject uploaderInfo = items.getObject(items.size()).getObject("playlistSidebarSecondaryInfoRenderer") + .getObject("videoOwner").getObject("videoOwnerRenderer"); + if (uploaderInfo != null) { + return uploaderInfo; + } + } catch (Exception e) { + throw new ParsingException("Could not get uploader info", e); + } + throw new ParsingException("Could not get uploader info"); + } + + private JsonObject getPlaylistInfo() throws ParsingException { + try { + return initialData.getObject("sidebar").getObject("playlistSidebarRenderer").getArray("items") + .getObject(0).getObject("playlistSidebarPrimaryInfoRenderer"); + } catch (Exception e) { + throw new ParsingException("Could not get PlaylistInfo", e); + } + } + + private JsonObject getPlaylistVideos() throws ParsingException { + try { + return initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer") + .getArray("tabs").getObject(0).getObject("tabRenderer").getObject("content").getObject("sectionListRenderer") + .getArray("contents").getObject(0).getObject("itemSectionRenderer").getArray("contents") + .getObject(0).getObject("playlistVideoListRenderer"); + } catch (Exception e) { + throw new ParsingException("Could not get playlist info", e); + } } @Override @@ -50,7 +112,11 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { @Override public String getName() throws ParsingException { try { - return doc.select("div[id=pl-header] h1[class=pl-header-title]").first().text(); + String name = playlistInfo.getObject("title").getArray("runs").getObject(0).getString("text"); + if (name != null) return name; + } catch (Exception ignored) {} + try { + return initialData.getObject("microformat").getObject("microformatDataRenderer").getString("title"); } catch (Exception e) { throw new ParsingException("Could not get playlist name", e); } @@ -59,7 +125,12 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { @Override public String getThumbnailUrl() throws ParsingException { try { - return doc.select("div[id=pl-header] div[class=pl-header-thumb] img").first().attr("abs:src"); + return playlistInfo.getObject("thumbnailRenderer").getObject("playlistVideoThumbnailRenderer") + .getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url"); + } catch (Exception ignored) {} + try { + return initialData.getObject("microformat").getObject("microformatDataRenderer").getObject("thumbnail") + .getArray("thumbnails").getObject(0).getString("url"); } catch (Exception e) { throw new ParsingException("Could not get playlist thumbnail", e); } @@ -75,8 +146,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { public String getUploaderUrl() throws ParsingException { try { return YoutubeChannelExtractor.CHANNEL_URL_BASE + - doc.select("button[class*=\"yt-uix-subscription-button\"]") - .first().attr("data-channel-external-id"); + uploaderInfo.getObject("navigationEndpoint").getObject("browseEndpoint").getString("browseId"); } catch (Exception e) { throw new ParsingException("Could not get playlist uploader url", e); } @@ -85,7 +155,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { @Override public String getUploaderName() throws ParsingException { try { - return doc.select("span[class=\"qualified-channel-title-text\"]").first().select("a").first().text(); + return uploaderInfo.getObject("title").getArray("runs").getObject(0).getString("text"); } catch (Exception e) { throw new ParsingException("Could not get playlist uploader name", e); } @@ -94,7 +164,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { @Override public String getUploaderAvatarUrl() throws ParsingException { try { - return doc.select("div[id=gh-banner] img[class=channel-header-profile-image]").first().attr("abs:src"); + return uploaderInfo.getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url"); } catch (Exception e) { throw new ParsingException("Could not get playlist uploader avatar", e); } @@ -102,25 +172,12 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { @Override public long getStreamCount() throws ParsingException { - String input; - try { - input = doc.select("ul[class=\"pl-header-details\"] li").get(1).text(); - } catch (IndexOutOfBoundsException e) { + String viewsText = getPlaylistInfo().getArray("stats").getObject(0).getArray("runs").getObject(0).getString("text"); + return Long.parseLong(Utils.removeNonDigitCharacters(viewsText)); + } catch (Exception e) { throw new ParsingException("Could not get video count from playlist", e); } - - try { - return Long.parseLong(Utils.removeNonDigitCharacters(input)); - } catch (NumberFormatException e) { - // When there's no videos in a playlist, there's no number in the "innerHtml", - // all characters that is not a number is removed, so we try to parse a empty string - if (!input.isEmpty()) { - return 0; - } else { - throw new ParsingException("Could not handle input: " + input, e); - } - } } @Nonnull From a34e5c18e1fc838715414542d55f9e3a638bdfd9 Mon Sep 17 00:00:00 2001 From: TobiGr Date: Mon, 17 Feb 2020 20:24:48 +0100 Subject: [PATCH 07/53] Trending StreamInfoItemCollecetor --- .../YoutubeStreamInfoItemExtractor.java | 15 ++- .../extractors/YoutubeTrendingExtractor.java | 127 +++++++++--------- 2 files changed, 76 insertions(+), 66 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java index eda36d21e..1c1271804 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java @@ -1,5 +1,6 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; +import com.grack.nanojson.JsonObject; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.schabi.newpipe.extractor.exceptions.ParsingException; @@ -35,7 +36,8 @@ import java.util.Date; public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { - private final Element item; + private JsonObject videoInfoItem; + private Element item; private final TimeAgoParser timeAgoParser; private String cachedUploadDate; @@ -51,6 +53,17 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { this.timeAgoParser = timeAgoParser; } + /** + * Creates an extractor of StreamInfoItems from a YouTube page. + * + * @param videoInfoItem The JSON page element + * @param timeAgoParser A parser of the textual dates or {@code null}. + */ + public YoutubeStreamInfoItemExtractor(JsonObject videoInfoItem, @Nullable TimeAgoParser timeAgoParser) { + this.videoInfoItem = videoInfoItem; + this.timeAgoParser = timeAgoParser; + } + @Override public StreamType getStreamType() throws ParsingException { if (isLiveStream(item)) { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java index 44f9c7e5a..05a33a356 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java @@ -20,6 +20,7 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; * along with NewPipe. If not, see . */ +import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParserException; @@ -34,7 +35,9 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.kiosk.KioskExtractor; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; import org.schabi.newpipe.extractor.localization.TimeAgoParser; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory; import org.schabi.newpipe.extractor.stream.StreamInfoItem; import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; import org.schabi.newpipe.extractor.utils.Parser; @@ -102,85 +105,79 @@ public class YoutubeTrendingExtractor extends KioskExtractor { @Override public InfoItemsPage getInitialPage() throws ParsingException { StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); - Elements uls = doc.select("ul[class*=\"expanded-shelf-content-list\"]"); + JsonArray firstPageElements = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer") + .getArray("tabs").getObject(0).getObject("tabRenderer").getObject("content") + .getObject("sectionListRenderer").getArray("contents").getObject(0).getObject("itemSectionRenderer") + .getArray("contents").getObject(0).getObject("shelfRenderer").getObject("content") + .getObject("expandedShelfContentsRenderer").getArray("items"); final TimeAgoParser timeAgoParser = getTimeAgoParser(); - for (Element ul : uls) { - for (final Element li : ul.children()) { - final Element el = li.select("div[class*=\"yt-lockup-dismissable\"]").first(); - collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) { - @Override - public String getUrl() throws ParsingException { - try { - Element dl = el.select("h3").first().select("a").first(); - return dl.attr("abs:href"); - } catch (Exception e) { - throw new ParsingException("Could not get web page url for the video", e); - } + for (Object ul : firstPageElements) { + final JsonObject videoInfo = ((JsonObject) ul).getObject("videoRenderer"); + collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser) { + @Override + public String getUrl() throws ParsingException { + try { + String videoId = videoInfo.getString("videoId"); + return YoutubeStreamLinkHandlerFactory.getInstance().getUrl(videoId); + } catch (Exception e) { + throw new ParsingException("Could not get web page url for the video", e); } + } - @Override - public String getName() throws ParsingException { - try { - Element dl = el.select("h3").first().select("a").first(); - return dl.text(); - } catch (Exception e) { - throw new ParsingException("Could not get web page url for the video", e); - } + @Override + public String getName() throws ParsingException { + String name = null; + try { + name = videoInfo.getObject("title").getObject("accessibility") + .getObject("accessibilityData").getString("label"); + } catch (Exception ignored) { } + if (name != null && !name.isEmpty()) return name; + try { + name = videoInfo.getObject("title").getArray("runs").getObject(0).getString("text"); + } catch (Exception e) { + throw new ParsingException("Could not get web page url for the video", e); + } + if (name != null && !name.isEmpty()) return name; + throw new ParsingException("Could not get web page url for the video"); + } - @Override - public String getUploaderUrl() throws ParsingException { - try { - String link = getUploaderLink().attr("abs:href"); - if (link.isEmpty()) { - throw new IllegalArgumentException("is empty"); - } - return link; - } catch (Exception e) { - throw new ParsingException("Could not get Uploader name"); + @Override + public String getUploaderUrl() throws ParsingException { + try { + String id = videoInfo.getObject("ownerText").getArray("runs").getObject(0). + getObject("browseEndpoint").getString("browseId"); + if (id == null || id.isEmpty()) { + throw new IllegalArgumentException("is empty"); } + return YoutubeChannelLinkHandlerFactory.getInstance().getUrl(id); + } catch (Exception e) { + throw new ParsingException("Could not get Uploader url"); } + } - private Element getUploaderLink() { - // this url is not always in the form "/channel/..." - // sometimes Youtube provides urls in the from "/user/..." - Element uploaderEl = el.select("div[class*=\"yt-lockup-byline \"]").first(); - return uploaderEl.select("a").first(); + @Override + public String getUploaderName() throws ParsingException { + try { + return videoInfo.getObject("ownerText").getArray("runs").getObject(0).getString("text"); + } catch (Exception e) { + throw new ParsingException("Could not get uploader name"); } + } - @Override - public String getUploaderName() throws ParsingException { - try { - return getUploaderLink().text(); - } catch (Exception e) { - throw new ParsingException("Could not get Uploader name"); - } + @Override + public String getThumbnailUrl() throws ParsingException { + try { + return videoInfo.getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url"); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); } - - @Override - public String getThumbnailUrl() throws ParsingException { - try { - String url; - Element te = li.select("span[class=\"yt-thumb-simple\"]").first() - .select("img").first(); - url = te.attr("abs:src"); - // Sometimes youtube sends links to gif files which somehow seem to not exist - // anymore. Items with such gif also offer a secondary image source. So we are going - // to use that if we've caught such an item. - if (url.contains(".gif")) { - url = te.attr("abs:data-thumb"); - } - return url; - } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); - } - } - }); - } + } + }); } - return new InfoItemsPage<>(collector, getNextPageUrl()); + } } From 3ec422a54253e6267b6359924b7ee0c02a3b537e Mon Sep 17 00:00:00 2001 From: wb9688 Date: Tue, 18 Feb 2020 11:10:41 +0100 Subject: [PATCH 08/53] Fix like/dislike parsing --- .../services/youtube/extractors/YoutubeStreamExtractor.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index b60fa8751..a0ee0bf3b 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -388,9 +388,8 @@ public class YoutubeStreamExtractor extends StreamExtractor { assertPageFetched(); String likesString = ""; try { - Element button = doc.select("button.like-button-renderer-like-button").first(); try { - likesString = button.select("span.yt-uix-button-content").first().text(); + likesString = ytInitialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("results").getObject("results").getArray("contents").getObject(0).getObject("videoPrimaryInfoRenderer").getObject("sentimentBar").getObject("sentimentBarRenderer").getString("tooltip").split("/")[0]; } catch (NullPointerException e) { //if this kicks in our button has no content and therefore ratings must be disabled if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) { @@ -413,7 +412,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { try { Element button = doc.select("button.like-button-renderer-dislike-button").first(); try { - dislikesString = button.select("span.yt-uix-button-content").first().text(); + dislikesString = ytInitialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("results").getObject("results").getArray("contents").getObject(0).getObject("videoPrimaryInfoRenderer").getObject("sentimentBar").getObject("sentimentBarRenderer").getString("tooltip").split("/")[1]; } catch (NullPointerException e) { //if this kicks in our button has no content and therefore ratings must be disabled if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) { From 12166ca60ce88544140e3aba1f88ff3c7356eefe Mon Sep 17 00:00:00 2001 From: TobiGr Date: Tue, 18 Feb 2020 13:05:11 +0100 Subject: [PATCH 09/53] [StreamExtractor] Add fallback method for uploader avatar url --- .../youtube/extractors/YoutubeStreamExtractor.java | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index a0ee0bf3b..4bb491d05 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -479,8 +479,17 @@ public class YoutubeStreamExtractor extends StreamExtractor { .getObject("secondaryResults").getArray("results").getObject(0).getObject("compactAutoplayRenderer") .getArray("contents").getObject(0).getObject("compactVideoRenderer").getObject("channelThumbnail") .getArray("thumbnails").getObject(0).getString("url"); + if (uploaderAvatarUrl != null && !uploaderAvatarUrl.isEmpty()) { + return uploaderAvatarUrl; + } + } catch (Exception ignored) {} - } catch (Exception e) {//todo: add fallback method + try { + uploaderAvatarUrl = ytInitialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("results") + .getObject("results").getArray("contents").getObject(1).getObject("videoSecondaryInfoRenderer") + .getObject("owner").getObject("videoOwnerRenderer").getObject("thumbnail").getArray("thumbnails") + .getObject(0).getString("url"); + } catch (Exception e) { throw new ParsingException("Could not get uploader avatar url", e); } From 7bfc0e62c665863ac3e3be0716400eca58162be2 Mon Sep 17 00:00:00 2001 From: wb9688 Date: Tue, 18 Feb 2020 17:04:22 +0100 Subject: [PATCH 10/53] Fix getRelatedStreams() in YoutubeStreamExtractor --- .../extractors/YoutubeStreamExtractor.java | 115 ++++++++++++++++-- 1 file changed, 107 insertions(+), 8 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 4bb491d05..7c69d2e30 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -25,7 +25,9 @@ import org.schabi.newpipe.extractor.linkhandler.LinkHandler; import org.schabi.newpipe.extractor.localization.DateWrapper; import org.schabi.newpipe.extractor.localization.TimeAgoParser; import org.schabi.newpipe.extractor.services.youtube.ItagItem; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory; import org.schabi.newpipe.extractor.stream.*; import org.schabi.newpipe.extractor.utils.JsonUtils; import org.schabi.newpipe.extractor.utils.Parser; @@ -653,21 +655,118 @@ public class YoutubeStreamExtractor extends StreamExtractor { } } + @Override - public StreamInfoItemsCollector getRelatedStreams() throws IOException, ExtractionException { + public StreamInfoItemsCollector getRelatedStreams() throws ExtractionException { assertPageFetched(); try { StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); + JsonArray results = ytInitialData.getObject("contents").getObject("twoColumnWatchNextResults") + .getObject("secondaryResults").getObject("secondaryResults").getArray("results"); + final TimeAgoParser timeAgoParser = getTimeAgoParser(); - Element ul = doc.select("ul[id=\"watch-related\"]").first(); - if (ul != null) { - for (Element li : ul.children()) { - // first check if we have a playlist. If so leave them out - if (li.select("a[class*=\"content-link\"]").first() != null) { - collector.commit(extractVideoPreviewInfo(li, timeAgoParser)); + for (Object ul : results) { + final JsonObject videoInfo = ((JsonObject) ul).getObject("compactVideoRenderer"); + + if (videoInfo != null) collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser) { + @Override + public StreamType getStreamType() { + return StreamType.VIDEO_STREAM; } - } + + @Override + public boolean isAd() { + return false; + } + + @Override + public String getUrl() throws ParsingException { + try { + String videoId = videoInfo.getString("videoId"); + return YoutubeStreamLinkHandlerFactory.getInstance().getUrl(videoId); + } catch (Exception e) { + throw new ParsingException("Could not get url", e); + } + } + + @Override + public String getName() throws ParsingException { + String name = null; + try { + name = videoInfo.getObject("title").getString("simpleText"); + } catch (Exception ignored) {} + if (name != null && !name.isEmpty()) return name; + throw new ParsingException("Could not get title"); + } + + @Override + public long getDuration() throws ParsingException { + try { + return YoutubeParsingHelper.parseDurationString(videoInfo.getObject("lengthText").getString("simpleText")); + } catch (Exception e) { + throw new ParsingException("Could not get duration", e); + } + } + + @Override + public String getUploaderUrl() throws ParsingException { + try { + String id = videoInfo.getObject("longBylineText").getArray("runs") + .getObject(0).getObject("navigationEndpoint") + .getObject("browseEndpoint").getString("browseId"); + if (id == null || id.isEmpty()) { + throw new IllegalArgumentException("is empty"); + } + return YoutubeChannelLinkHandlerFactory.getInstance().getUrl(id); + } catch (Exception e) { + throw new ParsingException("Could not get uploader url"); + } + } + + @Nullable + @Override + public String getTextualUploadDate() { + return null; + } + + @Nullable + @Override + public DateWrapper getUploadDate() { + return null; + } + + @Override + public long getViewCount() throws ParsingException { + try { + String viewCount = videoInfo.getObject("viewCountText").getString("simpleText"); + if (viewCount.equals("Recommended for you")) return -1; + return Long.parseLong(Utils.removeNonDigitCharacters(viewCount)); + } catch (Exception e) { + throw new ParsingException("Could not get view count", e); + } + } + + @Override + public String getUploaderName() throws ParsingException { + try { + return videoInfo.getObject("longBylineText").getArray("runs") + .getObject(0).getString("text"); + } catch (Exception e) { + throw new ParsingException("Could not get uploader name", e); + } + } + + @Override + public String getThumbnailUrl() throws ParsingException { + try { + return videoInfo.getObject("thumbnail").getArray("thumbnails") + .getObject(0).getString("url"); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } + }); } return collector; } catch (Exception e) { From 127c4d589323a48c5561e39a82c6a32e51afaa7f Mon Sep 17 00:00:00 2001 From: wb9688 Date: Wed, 19 Feb 2020 19:14:05 +0100 Subject: [PATCH 11/53] Fix getNextStream() in YoutubeStreamExtractor --- .../extractors/YoutubeStreamExtractor.java | 211 +++++++----------- 1 file changed, 82 insertions(+), 129 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 7c69d2e30..d8059ed47 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -640,15 +640,14 @@ public class YoutubeStreamExtractor extends StreamExtractor { public StreamInfoItem getNextStream() throws IOException, ExtractionException { assertPageFetched(); try { - StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); + final JsonObject videoInfo = ytInitialData.getObject("contents").getObject("twoColumnWatchNextResults") + .getObject("secondaryResults").getObject("secondaryResults").getArray("results") + .getObject(0).getObject("compactAutoplayRenderer").getArray("contents") + .getObject(0).getObject("compactVideoRenderer"); final TimeAgoParser timeAgoParser = getTimeAgoParser(); + StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); - Elements watch = doc.select("div[class=\"watch-sidebar-section\"]"); - if (watch.size() < 1) { - return null;// prevent the snackbar notification "report error" on age-restricted videos - } - - collector.commit(extractVideoPreviewInfo(watch.first().select("li").first(), timeAgoParser)); + collector.commit(extractVideoPreviewInfo(videoInfo, timeAgoParser)); return collector.getItems().get(0); } catch (Exception e) { throw new ParsingException("Could not get next video", e); @@ -669,104 +668,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { for (Object ul : results) { final JsonObject videoInfo = ((JsonObject) ul).getObject("compactVideoRenderer"); - if (videoInfo != null) collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser) { - @Override - public StreamType getStreamType() { - return StreamType.VIDEO_STREAM; - } - - @Override - public boolean isAd() { - return false; - } - - @Override - public String getUrl() throws ParsingException { - try { - String videoId = videoInfo.getString("videoId"); - return YoutubeStreamLinkHandlerFactory.getInstance().getUrl(videoId); - } catch (Exception e) { - throw new ParsingException("Could not get url", e); - } - } - - @Override - public String getName() throws ParsingException { - String name = null; - try { - name = videoInfo.getObject("title").getString("simpleText"); - } catch (Exception ignored) {} - if (name != null && !name.isEmpty()) return name; - throw new ParsingException("Could not get title"); - } - - @Override - public long getDuration() throws ParsingException { - try { - return YoutubeParsingHelper.parseDurationString(videoInfo.getObject("lengthText").getString("simpleText")); - } catch (Exception e) { - throw new ParsingException("Could not get duration", e); - } - } - - @Override - public String getUploaderUrl() throws ParsingException { - try { - String id = videoInfo.getObject("longBylineText").getArray("runs") - .getObject(0).getObject("navigationEndpoint") - .getObject("browseEndpoint").getString("browseId"); - if (id == null || id.isEmpty()) { - throw new IllegalArgumentException("is empty"); - } - return YoutubeChannelLinkHandlerFactory.getInstance().getUrl(id); - } catch (Exception e) { - throw new ParsingException("Could not get uploader url"); - } - } - - @Nullable - @Override - public String getTextualUploadDate() { - return null; - } - - @Nullable - @Override - public DateWrapper getUploadDate() { - return null; - } - - @Override - public long getViewCount() throws ParsingException { - try { - String viewCount = videoInfo.getObject("viewCountText").getString("simpleText"); - if (viewCount.equals("Recommended for you")) return -1; - return Long.parseLong(Utils.removeNonDigitCharacters(viewCount)); - } catch (Exception e) { - throw new ParsingException("Could not get view count", e); - } - } - - @Override - public String getUploaderName() throws ParsingException { - try { - return videoInfo.getObject("longBylineText").getArray("runs") - .getObject(0).getString("text"); - } catch (Exception e) { - throw new ParsingException("Could not get uploader name", e); - } - } - - @Override - public String getThumbnailUrl() throws ParsingException { - try { - return videoInfo.getObject("thumbnail").getArray("thumbnails") - .getObject(0).getString("url"); - } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); - } - } - }); + if (videoInfo != null) collector.commit(extractVideoPreviewInfo(videoInfo, timeAgoParser)); } return collector; } catch (Exception e) { @@ -1150,52 +1052,103 @@ public class YoutubeStreamExtractor extends StreamExtractor { * Provides information about links to other videos on the video page, such as related videos. * This is encapsulated in a StreamInfoItem object, which is a subset of the fields in a full StreamInfo. */ - private StreamInfoItemExtractor extractVideoPreviewInfo(final Element li, final TimeAgoParser timeAgoParser) { - return new YoutubeStreamInfoItemExtractor(li, timeAgoParser) { + private StreamInfoItemExtractor extractVideoPreviewInfo(final JsonObject videoInfo, final TimeAgoParser timeAgoParser) { + return new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser) { + @Override + public StreamType getStreamType() { + return StreamType.VIDEO_STREAM; + } + + @Override + public boolean isAd() { + return false; + } @Override public String getUrl() throws ParsingException { - return li.select("a.content-link").first().attr("abs:href"); + try { + String videoId = videoInfo.getString("videoId"); + return YoutubeStreamLinkHandlerFactory.getInstance().getUrl(videoId); + } catch (Exception e) { + throw new ParsingException("Could not get url", e); + } } @Override public String getName() throws ParsingException { - //todo: check NullPointerException causing - return li.select("span.title").first().text(); - //this page causes the NullPointerException, after finding it by searching for "tjvg": - //https://www.youtube.com/watch?v=Uqg0aEhLFAg + String name = null; + try { + name = videoInfo.getObject("title").getString("simpleText"); + } catch (Exception ignored) {} + if (name != null && !name.isEmpty()) return name; + throw new ParsingException("Could not get title"); } @Override - public String getUploaderName() throws ParsingException { - return li.select("span[class*=\"attribution\"").first() - .select("span").first().text(); + public long getDuration() throws ParsingException { + try { + return YoutubeParsingHelper.parseDurationString(videoInfo.getObject("lengthText").getString("simpleText")); + } catch (Exception e) { + throw new ParsingException("Could not get duration", e); + } } @Override public String getUploaderUrl() throws ParsingException { - return ""; // The uploader is not linked + try { + String id = videoInfo.getObject("longBylineText").getArray("runs") + .getObject(0).getObject("navigationEndpoint") + .getObject("browseEndpoint").getString("browseId"); + if (id == null || id.isEmpty()) { + throw new IllegalArgumentException("is empty"); + } + return YoutubeChannelLinkHandlerFactory.getInstance().getUrl(id); + } catch (Exception e) { + throw new ParsingException("Could not get uploader url"); + } + } + + @Nullable + @Override + public String getTextualUploadDate() { + return null; + } + + @Nullable + @Override + public DateWrapper getUploadDate() { + return null; } @Override - public String getTextualUploadDate() throws ParsingException { - return ""; + public long getViewCount() throws ParsingException { + try { + String viewCount = videoInfo.getObject("viewCountText").getString("simpleText"); + if (viewCount.equals("Recommended for you")) return -1; + return Long.parseLong(Utils.removeNonDigitCharacters(viewCount)); + } catch (Exception e) { + throw new ParsingException("Could not get view count", e); + } + } + + @Override + public String getUploaderName() throws ParsingException { + try { + return videoInfo.getObject("longBylineText").getArray("runs") + .getObject(0).getString("text"); + } catch (Exception e) { + throw new ParsingException("Could not get uploader name", e); + } } @Override public String getThumbnailUrl() throws ParsingException { - Element img = li.select("img").first(); - String thumbnailUrl = img.attr("abs:src"); - // Sometimes youtube sends links to gif files which somehow seem to not exist - // anymore. Items with such gif also offer a secondary image source. So we are going - // to use that if we caught such an item. - if (thumbnailUrl.contains(".gif")) { - thumbnailUrl = img.attr("data-thumb"); + try { + return videoInfo.getObject("thumbnail").getArray("thumbnails") + .getObject(0).getString("url"); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); } - if (thumbnailUrl.startsWith("//")) { - thumbnailUrl = HTTPS + thumbnailUrl; - } - return thumbnailUrl; } }; } From 10e9c16d8cca05e6fa76e18667de064c38c06edc Mon Sep 17 00:00:00 2001 From: TobiGr Date: Thu, 20 Feb 2020 13:26:38 +0100 Subject: [PATCH 12/53] Fix LiveStream view count --- .../extractors/YoutubeStreamExtractor.java | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index d8059ed47..802a28f3b 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -323,7 +323,23 @@ public class YoutubeStreamExtractor extends StreamExtractor { assertPageFetched(); try { if (getStreamType().equals(StreamType.LIVE_STREAM)) { - return getLiveStreamWatchingCount(); + // The array index is variable, therefore we loop throw the complete array. + // videoPrimaryInfoRenderer is often stored at index 1 + JsonArray contents = ytInitialData.getObject("contents").getObject("twoColumnWatchNextResults") + .getObject("results").getObject("results").getArray("contents"); + for (Object c : contents) { + try { + // this gets current view count, but there is also an overall view count which is stored here: + // contents.twoColumnWatchNextResults.secondaryResults.secondaryResults.results[0] + // .compactAutoplayRenderer.contents[0].compactVideoRenderer.viewCountText.simpleText + String views = ((JsonObject) c).getObject("videoPrimaryInfoRenderer") + .getObject("viewCount").getObject("videoViewCountRenderer").getObject("viewCount") + .getArray("runs").getObject(0).getString("text"); + return Long.parseLong(Utils.removeNonDigitCharacters(views)); + } catch (Exception ignored) {} + } + throw new ParsingException("Could not get view count from live stream"); + } else { return Long.parseLong(playerResponse.getObject("videoDetails").getString("viewCount")); } From f33f9466ce6a09437523e0803ff56ee59b6d1679 Mon Sep 17 00:00:00 2001 From: TobiGr Date: Thu, 20 Feb 2020 13:27:38 +0100 Subject: [PATCH 13/53] Fix testDescription() in ChannelExtractor --- .../extractor/services/youtube/YoutubeChannelExtractorTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java index 317bd4fa4..fc4ffff31 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java @@ -170,7 +170,7 @@ public class YoutubeChannelExtractorTest { @Test public void testDescription() throws Exception { assertTrue("What it actually was: " + extractor.getDescription(), - extractor.getDescription().contains("Our World is Amazing. Questions? Ideas? Tweet me:")); + extractor.getDescription().contains("Our World is Amazing. \n\nQuestions? Ideas? Tweet me:")); } @Test From af49b3c48777dfb002401cc7680766312ae96a6b Mon Sep 17 00:00:00 2001 From: wb9688 Date: Fri, 21 Feb 2020 20:08:34 +0100 Subject: [PATCH 14/53] Fix live streams in extractVideoPreviewInfo() --- .../youtube/extractors/YoutubeStreamExtractor.java | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 802a28f3b..53f1585d2 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -1072,6 +1072,11 @@ public class YoutubeStreamExtractor extends StreamExtractor { return new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser) { @Override public StreamType getStreamType() { + try { + if (videoInfo.getArray("badges").getObject(0).getObject("metadataBadgeRenderer").getString("label").equals("LIVE NOW")) { + return StreamType.LIVE_STREAM; + } + } catch (Exception ignored) {} return StreamType.VIDEO_STREAM; } @@ -1103,6 +1108,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Override public long getDuration() throws ParsingException { try { + if (getStreamType() == StreamType.LIVE_STREAM) return -1; return YoutubeParsingHelper.parseDurationString(videoInfo.getObject("lengthText").getString("simpleText")); } catch (Exception e) { throw new ParsingException("Could not get duration", e); @@ -1139,7 +1145,13 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Override public long getViewCount() throws ParsingException { try { - String viewCount = videoInfo.getObject("viewCountText").getString("simpleText"); + String viewCount; + if (getStreamType() == StreamType.LIVE_STREAM) { + viewCount = videoInfo.getObject("viewCountText") + .getArray("runs").getObject(0).getString("text"); + } else { + viewCount = videoInfo.getObject("viewCountText").getString("simpleText"); + } if (viewCount.equals("Recommended for you")) return -1; return Long.parseLong(Utils.removeNonDigitCharacters(viewCount)); } catch (Exception e) { From b88188d4198ec81d0d4d78c2b55a07a53a073769 Mon Sep 17 00:00:00 2001 From: wb9688 Date: Sat, 22 Feb 2020 20:19:41 +0100 Subject: [PATCH 15/53] Move stuff from extractVideoPreviewInfo() into YoutubeStreamInfoItemExtractor and partially fix search --- .../extractors/YoutubePlaylistExtractor.java | 6 +- .../extractors/YoutubeSearchExtractor.java | 59 ++-- .../extractors/YoutubeStreamExtractor.java | 149 ++-------- .../YoutubeStreamInfoItemExtractor.java | 258 ++++-------------- 4 files changed, 118 insertions(+), 354 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java index 692b9cc3a..1498ef684 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java @@ -4,6 +4,7 @@ import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParserException; + import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; @@ -23,9 +24,10 @@ import org.schabi.newpipe.extractor.stream.StreamType; import org.schabi.newpipe.extractor.utils.Parser; import org.schabi.newpipe.extractor.utils.Utils; +import java.io.IOException; + import javax.annotation.Nonnull; import javax.annotation.Nullable; -import java.io.IOException; @SuppressWarnings("WeakerAccess") public class YoutubePlaylistExtractor extends PlaylistExtractor { @@ -318,7 +320,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { } @Override - public String getTextualUploadDate() throws ParsingException { + public String getTextualUploadDate() { return ""; } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java index 48420814b..c8ed9efd6 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java @@ -1,5 +1,10 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; +import com.grack.nanojson.JsonArray; +import com.grack.nanojson.JsonObject; +import com.grack.nanojson.JsonParser; +import com.grack.nanojson.JsonParserException; + import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; @@ -16,12 +21,13 @@ import org.schabi.newpipe.extractor.search.SearchExtractor; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; import org.schabi.newpipe.extractor.utils.Parser; -import javax.annotation.Nonnull; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URL; +import javax.annotation.Nonnull; + /* * Created by Christian Schabesberger on 22.07.2018 * @@ -45,6 +51,7 @@ import java.net.URL; public class YoutubeSearchExtractor extends SearchExtractor { private Document doc; + private JsonObject ytInitialData; public YoutubeSearchExtractor(StreamingService service, SearchQueryHandler linkHandler) { super(service, linkHandler); @@ -55,6 +62,7 @@ public class YoutubeSearchExtractor extends SearchExtractor { final String url = getUrl(); final Response response = downloader.get(url, getExtractorLocalization()); doc = YoutubeParsingHelper.parseAndCheckPage(url, response); + ytInitialData = getInitialData(); } @Nonnull @@ -86,6 +94,7 @@ public class YoutubeSearchExtractor extends SearchExtractor { @Override public InfoItemsPage getPage(String pageUrl) throws IOException, ExtractionException { + // TODO: Get extracting next pages working final String response = getDownloader().get(pageUrl, getExtractorLocalization()).responseBody(); doc = Jsoup.parse(response, pageUrl); @@ -108,37 +117,33 @@ public class YoutubeSearchExtractor extends SearchExtractor { InfoItemsSearchCollector collector = getInfoItemSearchCollector(); collector.reset(); - Element list = doc.select("ol[class=\"item-section\"]").first(); final TimeAgoParser timeAgoParser = getTimeAgoParser(); - for (Element item : list.children()) { - /* First we need to determine which kind of item we are working with. - Youtube depicts five different kinds of items on its search result page. These are - regular videos, playlists, channels, two types of video suggestions, and a "no video - found" item. Since we only want videos, we need to filter out all the others. - An example for this can be seen here: - https://www.youtube.com/results?search_query=asdf&page=1 + JsonArray list = ytInitialData.getObject("contents").getObject("twoColumnSearchResultsRenderer") + .getObject("primaryContents").getObject("sectionListRenderer").getArray("contents") + .getObject(0).getObject("itemSectionRenderer").getArray("contents"); - We already applied a filter to the url, so we don't need to care about channels and - playlists now. - */ - - Element el; - - if ((el = item.select("div[class*=\"search-message\"]").first()) != null) { - throw new NothingFoundException(el.text()); - - // video item type - } else if ((el = item.select("div[class*=\"yt-lockup-video\"]").first()) != null) { - collector.commit(new YoutubeStreamInfoItemExtractor(el, timeAgoParser)); - } else if ((el = item.select("div[class*=\"yt-lockup-channel\"]").first()) != null) { - collector.commit(new YoutubeChannelInfoItemExtractor(el)); - } else if ((el = item.select("div[class*=\"yt-lockup-playlist\"]").first()) != null && - item.select(".yt-pl-icon-mix").isEmpty()) { - collector.commit(new YoutubePlaylistInfoItemExtractor(el)); + for (Object item : list) { + if (((JsonObject) item).getObject("backgroundPromoRenderer") != null) { + throw new NothingFoundException(((JsonObject) item).getObject("backgroundPromoRenderer") + .getObject("bodyText").getArray("runs").getObject(0).getString("text")); + } else if (((JsonObject) item).getObject("videoRenderer") != null) { + collector.commit(new YoutubeStreamInfoItemExtractor(((JsonObject) item).getObject("videoRenderer"), timeAgoParser)); + } else if (((JsonObject) item).getObject("channelRenderer") != null) { +// collector.commit(new YoutubeChannelInfoItemExtractor(((JsonObject) item).getObject("channelRenderer"))); + } else if (((JsonObject) item).getObject("playlistRenderer") != null) { +// collector.commit(new YoutubePlaylistInfoItemExtractor(((JsonObject) item).getObject("playlistRenderer"))); } } - return collector; } + + private JsonObject getInitialData() throws ParsingException { + try { + String initialData = Parser.matchGroup1("window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", doc.toString()); + return JsonParser.object().from(initialData); + } catch (JsonParserException | Parser.RegexException e) { + throw new ParsingException("Could not get ytInitialData", e); + } + } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 53f1585d2..a6c947321 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -4,10 +4,10 @@ import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParserException; + import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; import org.mozilla.javascript.Context; import org.mozilla.javascript.Function; import org.mozilla.javascript.ScriptableObject; @@ -25,24 +25,38 @@ import org.schabi.newpipe.extractor.linkhandler.LinkHandler; import org.schabi.newpipe.extractor.localization.DateWrapper; import org.schabi.newpipe.extractor.localization.TimeAgoParser; import org.schabi.newpipe.extractor.services.youtube.ItagItem; -import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; -import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory; -import org.schabi.newpipe.extractor.stream.*; +import org.schabi.newpipe.extractor.stream.AudioStream; +import org.schabi.newpipe.extractor.stream.Description; +import org.schabi.newpipe.extractor.stream.Frameset; +import org.schabi.newpipe.extractor.stream.Stream; +import org.schabi.newpipe.extractor.stream.StreamExtractor; +import org.schabi.newpipe.extractor.stream.StreamInfoItem; +import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; +import org.schabi.newpipe.extractor.stream.StreamType; +import org.schabi.newpipe.extractor.stream.SubtitlesStream; +import org.schabi.newpipe.extractor.stream.VideoStream; import org.schabi.newpipe.extractor.utils.JsonUtils; import org.schabi.newpipe.extractor.utils.Parser; import org.schabi.newpipe.extractor.utils.Utils; -import javax.annotation.Nonnull; -import javax.annotation.Nullable; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URL; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + /* * Created by Christian Schabesberger on 06.08.15. * @@ -663,7 +677,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { final TimeAgoParser timeAgoParser = getTimeAgoParser(); StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); - collector.commit(extractVideoPreviewInfo(videoInfo, timeAgoParser)); + collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser)); return collector.getItems().get(0); } catch (Exception e) { throw new ParsingException("Could not get next video", e); @@ -684,7 +698,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { for (Object ul : results) { final JsonObject videoInfo = ((JsonObject) ul).getObject("compactVideoRenderer"); - if (videoInfo != null) collector.commit(extractVideoPreviewInfo(videoInfo, timeAgoParser)); + if (videoInfo != null) collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser)); } return collector; } catch (Exception e) { @@ -1064,123 +1078,6 @@ public class YoutubeStreamExtractor extends StreamExtractor { return urlAndItags; } - /** - * Provides information about links to other videos on the video page, such as related videos. - * This is encapsulated in a StreamInfoItem object, which is a subset of the fields in a full StreamInfo. - */ - private StreamInfoItemExtractor extractVideoPreviewInfo(final JsonObject videoInfo, final TimeAgoParser timeAgoParser) { - return new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser) { - @Override - public StreamType getStreamType() { - try { - if (videoInfo.getArray("badges").getObject(0).getObject("metadataBadgeRenderer").getString("label").equals("LIVE NOW")) { - return StreamType.LIVE_STREAM; - } - } catch (Exception ignored) {} - return StreamType.VIDEO_STREAM; - } - - @Override - public boolean isAd() { - return false; - } - - @Override - public String getUrl() throws ParsingException { - try { - String videoId = videoInfo.getString("videoId"); - return YoutubeStreamLinkHandlerFactory.getInstance().getUrl(videoId); - } catch (Exception e) { - throw new ParsingException("Could not get url", e); - } - } - - @Override - public String getName() throws ParsingException { - String name = null; - try { - name = videoInfo.getObject("title").getString("simpleText"); - } catch (Exception ignored) {} - if (name != null && !name.isEmpty()) return name; - throw new ParsingException("Could not get title"); - } - - @Override - public long getDuration() throws ParsingException { - try { - if (getStreamType() == StreamType.LIVE_STREAM) return -1; - return YoutubeParsingHelper.parseDurationString(videoInfo.getObject("lengthText").getString("simpleText")); - } catch (Exception e) { - throw new ParsingException("Could not get duration", e); - } - } - - @Override - public String getUploaderUrl() throws ParsingException { - try { - String id = videoInfo.getObject("longBylineText").getArray("runs") - .getObject(0).getObject("navigationEndpoint") - .getObject("browseEndpoint").getString("browseId"); - if (id == null || id.isEmpty()) { - throw new IllegalArgumentException("is empty"); - } - return YoutubeChannelLinkHandlerFactory.getInstance().getUrl(id); - } catch (Exception e) { - throw new ParsingException("Could not get uploader url"); - } - } - - @Nullable - @Override - public String getTextualUploadDate() { - return null; - } - - @Nullable - @Override - public DateWrapper getUploadDate() { - return null; - } - - @Override - public long getViewCount() throws ParsingException { - try { - String viewCount; - if (getStreamType() == StreamType.LIVE_STREAM) { - viewCount = videoInfo.getObject("viewCountText") - .getArray("runs").getObject(0).getString("text"); - } else { - viewCount = videoInfo.getObject("viewCountText").getString("simpleText"); - } - if (viewCount.equals("Recommended for you")) return -1; - return Long.parseLong(Utils.removeNonDigitCharacters(viewCount)); - } catch (Exception e) { - throw new ParsingException("Could not get view count", e); - } - } - - @Override - public String getUploaderName() throws ParsingException { - try { - return videoInfo.getObject("longBylineText").getArray("runs") - .getObject(0).getString("text"); - } catch (Exception e) { - throw new ParsingException("Could not get uploader name", e); - } - } - - @Override - public String getThumbnailUrl() throws ParsingException { - try { - return videoInfo.getObject("thumbnail").getArray("thumbnails") - .getObject(0).getString("url"); - } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); - } - } - }; - } - @Nonnull @Override public List getFrames() throws ExtractionException { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java index 1c1271804..8970fd5d9 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java @@ -2,19 +2,17 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; import com.grack.nanojson.JsonObject; import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.localization.DateWrapper; import org.schabi.newpipe.extractor.localization.TimeAgoParser; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory; import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor; import org.schabi.newpipe.extractor.stream.StreamType; import org.schabi.newpipe.extractor.utils.Utils; import javax.annotation.Nullable; -import java.text.SimpleDateFormat; -import java.util.Calendar; -import java.util.Date; /* * Copyright (C) Christian Schabesberger 2016 @@ -36,20 +34,10 @@ import java.util.Date; public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { - private JsonObject videoInfoItem; - private Element item; + private JsonObject videoInfo; private final TimeAgoParser timeAgoParser; - private String cachedUploadDate; - - /** - * Creates an extractor of StreamInfoItems from a YouTube page. - * - * @param item The page element - * @param timeAgoParser A parser of the textual dates or {@code null}. - */ - public YoutubeStreamInfoItemExtractor(Element item, @Nullable TimeAgoParser timeAgoParser) { - this.item = item; + public YoutubeStreamInfoItemExtractor(Element a, @Nullable TimeAgoParser timeAgoParser) { this.timeAgoParser = timeAgoParser; } @@ -60,251 +48,123 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { * @param timeAgoParser A parser of the textual dates or {@code null}. */ public YoutubeStreamInfoItemExtractor(JsonObject videoInfoItem, @Nullable TimeAgoParser timeAgoParser) { - this.videoInfoItem = videoInfoItem; + this.videoInfo = videoInfoItem; this.timeAgoParser = timeAgoParser; } @Override - public StreamType getStreamType() throws ParsingException { - if (isLiveStream(item)) { - return StreamType.LIVE_STREAM; - } else { - return StreamType.VIDEO_STREAM; - } + public StreamType getStreamType() { + try { + if (videoInfo.getArray("badges").getObject(0).getObject("metadataBadgeRenderer").getString("label").equals("LIVE NOW")) { + return StreamType.LIVE_STREAM; + } + } catch (Exception ignored) {} + return StreamType.VIDEO_STREAM; } @Override - public boolean isAd() throws ParsingException { - return !item.select("span[class*=\"icon-not-available\"]").isEmpty() - || !item.select("span[class*=\"yt-badge-ad\"]").isEmpty() - || isPremiumVideo(); - } - - private boolean isPremiumVideo() { - Element premiumSpan = item.select("span[class=\"standalone-collection-badge-renderer-red-text\"]").first(); - if (premiumSpan == null) return false; - - // if this span has text it most likely says ("Free Video") so we can play this - if (premiumSpan.hasText()) return false; - return true; + public boolean isAd() { + return false; } @Override public String getUrl() throws ParsingException { try { - Element el = item.select("div[class*=\"yt-lockup-video\"]").first(); - Element dl = el.select("h3").first().select("a").first(); - return dl.attr("abs:href"); + String videoId = videoInfo.getString("videoId"); + return YoutubeStreamLinkHandlerFactory.getInstance().getUrl(videoId); } catch (Exception e) { - throw new ParsingException("Could not get web page url for the video", e); + throw new ParsingException("Could not get url", e); } } @Override public String getName() throws ParsingException { + String name = null; try { - Element el = item.select("div[class*=\"yt-lockup-video\"]").first(); - Element dl = el.select("h3").first().select("a").first(); - return dl.text(); - } catch (Exception e) { - throw new ParsingException("Could not get title", e); + name = videoInfo.getObject("title").getString("simpleText"); + } catch (Exception ignored) {} + if (name == null) { + try { + name = videoInfo.getObject("title").getArray("runs").getObject(0).getString("text"); + } catch (Exception ignored) {} } + if (name != null && !name.isEmpty()) return name; + throw new ParsingException("Could not get name"); } @Override public long getDuration() throws ParsingException { try { if (getStreamType() == StreamType.LIVE_STREAM) return -1; - - final Element duration = item.select("span[class*=\"video-time\"]").first(); - // apparently on youtube, video-time element will not show up if the video has a duration of 00:00 - // see: https://www.youtube.com/results?sp=EgIQAVAU&q=asdfgf - return duration == null ? 0 : YoutubeParsingHelper.parseDurationString(duration.text()); + return YoutubeParsingHelper.parseDurationString(videoInfo.getObject("lengthText").getString("simpleText")); } catch (Exception e) { - throw new ParsingException("Could not get Duration: " + getUrl(), e); + throw new ParsingException("Could not get duration", e); } } @Override public String getUploaderName() throws ParsingException { try { - return item.select("div[class=\"yt-lockup-byline\"]").first() - .select("a").first() - .text(); + return videoInfo.getObject("longBylineText").getArray("runs") + .getObject(0).getString("text"); } catch (Exception e) { - throw new ParsingException("Could not get uploader", e); + throw new ParsingException("Could not get uploader name", e); } } @Override public String getUploaderUrl() throws ParsingException { - // this url is not always in the form "/channel/..." - // sometimes Youtube provides urls in the from "/user/..." try { - try { - return item.select("div[class=\"yt-lockup-byline\"]").first() - .select("a").first() - .attr("abs:href"); - } catch (Exception e){} - - // try this if the first didn't work - return item.select("span[class=\"title\"") - .text().split(" - ")[0]; - } catch (Exception e) { - System.out.println(item.html()); - throw new ParsingException("Could not get uploader url", e); - } - } - - @Nullable - @Override - public String getTextualUploadDate() throws ParsingException { - if (getStreamType().equals(StreamType.LIVE_STREAM)) { - return null; - } - - if (cachedUploadDate != null) { - return cachedUploadDate; - } - - try { - if (isVideoReminder()) { - final Calendar calendar = getDateFromReminder(); - if (calendar != null) { - return cachedUploadDate = new SimpleDateFormat("yyyy-MM-dd HH:mm") - .format(calendar.getTime()); - } + String id = videoInfo.getObject("longBylineText").getArray("runs") + .getObject(0).getObject("navigationEndpoint") + .getObject("browseEndpoint").getString("browseId"); + if (id == null || id.isEmpty()) { + throw new IllegalArgumentException("is empty"); } - - - Element meta = item.select("div[class=\"yt-lockup-meta\"]").first(); - if (meta == null) return ""; - - final Elements li = meta.select("li"); - if (li.isEmpty()) return ""; - - return cachedUploadDate = li.first().text(); + return YoutubeChannelLinkHandlerFactory.getInstance().getUrl(id); } catch (Exception e) { - throw new ParsingException("Could not get upload date", e); + throw new ParsingException("Could not get uploader url"); } } @Nullable @Override - public DateWrapper getUploadDate() throws ParsingException { - if (getStreamType().equals(StreamType.LIVE_STREAM)) { - return null; - } + public String getTextualUploadDate() { + // TODO: Get upload date in case of a videoRenderer (not available in case of a compactVideoRenderer) + return null; + } - if (isVideoReminder()) { - return new DateWrapper(getDateFromReminder()); - } - - String textualUploadDate = getTextualUploadDate(); - if (timeAgoParser != null && textualUploadDate != null && !textualUploadDate.isEmpty()) { - return timeAgoParser.parse(textualUploadDate); - } else { - return null; - } + @Nullable + @Override + public DateWrapper getUploadDate() { + return null; } @Override public long getViewCount() throws ParsingException { - String input; - - final Element spanViewCount = item.select("span.view-count").first(); - if (spanViewCount != null) { - input = spanViewCount.text(); - - } else if (getStreamType().equals(StreamType.LIVE_STREAM)) { - Element meta = item.select("ul.yt-lockup-meta-info").first(); - if (meta == null) return 0; - - final Elements li = meta.select("li"); - if (li.isEmpty()) return 0; - - input = li.first().text(); - } else { - try { - Element meta = item.select("div.yt-lockup-meta").first(); - if (meta == null) return -1; - - // This case can happen if google releases a special video - if (meta.select("li").size() < 2) return -1; - - input = meta.select("li").get(1).text(); - } catch (IndexOutOfBoundsException e) { - throw new ParsingException("Could not parse yt-lockup-meta although available: " + getUrl(), e); - } - } - - if (input == null) { - throw new ParsingException("Input is null"); - } - try { - - return Long.parseLong(Utils.removeNonDigitCharacters(input)); - } catch (NumberFormatException e) { - // if this happens the video probably has no views - if (!input.isEmpty()) { - return 0; + String viewCount; + if (getStreamType() == StreamType.LIVE_STREAM) { + viewCount = videoInfo.getObject("viewCountText") + .getArray("runs").getObject(0).getString("text"); + } else { + viewCount = videoInfo.getObject("viewCountText").getString("simpleText"); } - - throw new ParsingException("Could not handle input: " + input, e); + if (viewCount.equals("Recommended for you")) return -1; + return Long.parseLong(Utils.removeNonDigitCharacters(viewCount)); + } catch (Exception e) { + throw new ParsingException("Could not get view count", e); } } @Override public String getThumbnailUrl() throws ParsingException { try { - String url; - Element te = item.select("div[class=\"yt-thumb video-thumb\"]").first() - .select("img").first(); - url = te.attr("abs:src"); - // Sometimes youtube sends links to gif files which somehow seem to not exist - // anymore. Items with such gif also offer a secondary image source. So we are going - // to use that if we've caught such an item. - if (url.contains(".gif")) { - url = te.attr("abs:data-thumb"); - } - return url; + // TODO: Don't simply get the first item, but look at all thumbnails and their resolution + return videoInfo.getObject("thumbnail").getArray("thumbnails") + .getObject(0).getString("url"); } catch (Exception e) { throw new ParsingException("Could not get thumbnail url", e); } } - - - private boolean isVideoReminder() { - return !item.select("span.yt-uix-livereminder").isEmpty(); - } - - private Calendar getDateFromReminder() throws ParsingException { - final Element timeFuture = item.select("span.yt-badge.localized-date").first(); - - if (timeFuture == null) { - throw new ParsingException("Span timeFuture is null"); - } - - final String timestamp = timeFuture.attr("data-timestamp"); - if (!timestamp.isEmpty()) { - try { - final Calendar calendar = Calendar.getInstance(); - calendar.setTime(new Date(Long.parseLong(timestamp) * 1000L)); - return calendar; - } catch (Exception e) { - throw new ParsingException("Could not parse = \"" + timestamp + "\""); - } - } - - throw new ParsingException("Could not parse date from reminder element: \"" + timeFuture + "\""); - } - - /** - * Generic method that checks if the element contains any clues that it's a livestream item - */ - protected static boolean isLiveStream(Element item) { - return !item.select("span[class*=\"yt-badge-live\"]").isEmpty() - || !item.select("span[class*=\"video-time-overlay-live\"]").isEmpty(); - } } From 957db062efeade07fb5c121308236a06c52fdad6 Mon Sep 17 00:00:00 2001 From: wb9688 Date: Sat, 22 Feb 2020 20:33:05 +0100 Subject: [PATCH 16/53] Fix getting uploader for videoRenderer --- .../YoutubeStreamInfoItemExtractor.java | 29 +++++++++++++++---- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java index 8970fd5d9..91124fb43 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java @@ -104,20 +104,37 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { @Override public String getUploaderName() throws ParsingException { + String name = null; try { - return videoInfo.getObject("longBylineText").getArray("runs") + name = videoInfo.getObject("longBylineText").getArray("runs") .getObject(0).getString("text"); - } catch (Exception e) { - throw new ParsingException("Could not get uploader name", e); + } catch (Exception ignored) {} + if (name == null) { + try { + name = videoInfo.getObject("ownerText").getArray("runs") + .getObject(0).getString("text"); + } catch (Exception ignored) {} } + if (name != null && !name.isEmpty()) return name; + throw new ParsingException("Could not get uploader name"); } @Override public String getUploaderUrl() throws ParsingException { try { - String id = videoInfo.getObject("longBylineText").getArray("runs") - .getObject(0).getObject("navigationEndpoint") - .getObject("browseEndpoint").getString("browseId"); + String id = null; + try { + id = videoInfo.getObject("longBylineText").getArray("runs") + .getObject(0).getObject("navigationEndpoint") + .getObject("browseEndpoint").getString("browseId"); + } catch (Exception ignored) {} + if (id == null) { + try { + id = videoInfo.getObject("ownerText").getArray("runs") + .getObject(0).getObject("navigationEndpoint") + .getObject("browseEndpoint").getString("browseId"); + } catch (Exception ignored) {} + } if (id == null || id.isEmpty()) { throw new IllegalArgumentException("is empty"); } From 38aabc6acac601b1d0343517ae0357bfcf76f9d6 Mon Sep 17 00:00:00 2001 From: wb9688 Date: Sat, 22 Feb 2020 20:33:48 +0100 Subject: [PATCH 17/53] Use plain YoutubeStreamInfoItemExtractor in YoutubeTrendingExtractor --- .../extractors/YoutubeTrendingExtractor.java | 70 ++----------------- 1 file changed, 4 insertions(+), 66 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java index 05a33a356..467a1fed3 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java @@ -24,9 +24,8 @@ import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParserException; + import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.downloader.Downloader; import org.schabi.newpipe.extractor.downloader.Response; @@ -35,16 +34,15 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.kiosk.KioskExtractor; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; import org.schabi.newpipe.extractor.localization.TimeAgoParser; -import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; -import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory; import org.schabi.newpipe.extractor.stream.StreamInfoItem; import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; import org.schabi.newpipe.extractor.utils.Parser; -import javax.annotation.Nonnull; import java.io.IOException; +import javax.annotation.Nonnull; + public class YoutubeTrendingExtractor extends KioskExtractor { private Document doc; @@ -115,67 +113,7 @@ public class YoutubeTrendingExtractor extends KioskExtractor { for (Object ul : firstPageElements) { final JsonObject videoInfo = ((JsonObject) ul).getObject("videoRenderer"); - collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser) { - @Override - public String getUrl() throws ParsingException { - try { - String videoId = videoInfo.getString("videoId"); - return YoutubeStreamLinkHandlerFactory.getInstance().getUrl(videoId); - } catch (Exception e) { - throw new ParsingException("Could not get web page url for the video", e); - } - } - - @Override - public String getName() throws ParsingException { - String name = null; - try { - name = videoInfo.getObject("title").getObject("accessibility") - .getObject("accessibilityData").getString("label"); - } catch (Exception ignored) { - } - if (name != null && !name.isEmpty()) return name; - try { - name = videoInfo.getObject("title").getArray("runs").getObject(0).getString("text"); - } catch (Exception e) { - throw new ParsingException("Could not get web page url for the video", e); - } - if (name != null && !name.isEmpty()) return name; - throw new ParsingException("Could not get web page url for the video"); - } - - @Override - public String getUploaderUrl() throws ParsingException { - try { - String id = videoInfo.getObject("ownerText").getArray("runs").getObject(0). - getObject("browseEndpoint").getString("browseId"); - if (id == null || id.isEmpty()) { - throw new IllegalArgumentException("is empty"); - } - return YoutubeChannelLinkHandlerFactory.getInstance().getUrl(id); - } catch (Exception e) { - throw new ParsingException("Could not get Uploader url"); - } - } - - @Override - public String getUploaderName() throws ParsingException { - try { - return videoInfo.getObject("ownerText").getArray("runs").getObject(0).getString("text"); - } catch (Exception e) { - throw new ParsingException("Could not get uploader name"); - } - } - - @Override - public String getThumbnailUrl() throws ParsingException { - try { - return videoInfo.getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url"); - } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); - } - } - }); + collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser)); } return new InfoItemsPage<>(collector, getNextPageUrl()); From 5816202cc7a6d055a996f9783f701cd60cb1cbe2 Mon Sep 17 00:00:00 2001 From: TobiGr Date: Sat, 22 Feb 2020 23:51:02 +0100 Subject: [PATCH 18/53] Move getInitialData() method to YouTubeParsingHelper Rename ytInitialData to initialData --- .../extractors/YoutubeChannelExtractor.java | 22 +++++----------- .../extractors/YoutubePlaylistExtractor.java | 11 +------- .../extractors/YoutubeSearchExtractor.java | 14 +++------- .../extractors/YoutubeStreamExtractor.java | 26 +++++++------------ .../extractors/YoutubeTrendingExtractor.java | 11 +------- .../linkHandler/YoutubeParsingHelper.java | 14 ++++++++++ 6 files changed, 35 insertions(+), 63 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index 544a9e461..8e47e779e 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -52,7 +52,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor { private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000"; private Document doc; - private JsonObject ytInitialData; + private JsonObject initialData; public YoutubeChannelExtractor(StreamingService service, ListLinkHandler linkHandler) { super(service, linkHandler); @@ -63,17 +63,9 @@ public class YoutubeChannelExtractor extends ChannelExtractor { String channelUrl = super.getUrl() + CHANNEL_URL_PARAMETERS; final Response response = downloader.get(channelUrl, getExtractorLocalization()); doc = YoutubeParsingHelper.parseAndCheckPage(channelUrl, response); - ytInitialData = getInitialData(); + initialData = YoutubeParsingHelper.getInitialData(response.responseBody()); } - private JsonObject getInitialData() throws ParsingException { - try { - String initialData = Parser.matchGroup1("window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", doc.toString()); - return JsonParser.object().from(initialData); - } catch (JsonParserException | Parser.RegexException e) { - throw new ParsingException("Could not get ytInitialData", e); - } - } @Override public String getNextPageUrl() throws ExtractionException { @@ -97,7 +89,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor { return doc.select("meta[property=\"og:url\"]").first().attr("content").replace(CHANNEL_URL_BASE, ""); } catch (Exception ignored) {} try { - return ytInitialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("navigationEndpoint").getObject("browseEndpoint").getString("browseId"); + return initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("navigationEndpoint").getObject("browseEndpoint").getString("browseId"); } catch (Exception ignored) {} // fallback method; does not work with channels that have no "Subscribe" button (e.g. EminemVEVO) @@ -124,7 +116,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public String getAvatarUrl() throws ParsingException { try { - return ytInitialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("avatar").getArray("thumbnails").getObject(0).getString("url"); + return initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("avatar").getArray("thumbnails").getObject(0).getString("url"); } catch (Exception e) { throw new ParsingException("Could not get avatar", e); } @@ -133,7 +125,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public String getBannerUrl() throws ParsingException { try { - String url = ytInitialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("banner").getArray("thumbnails").getObject(0).getString("url"); + String url = initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("banner").getArray("thumbnails").getObject(0).getString("url"); if (url.contains("s.ytimg.com") || url.contains("default_banner")) { return null; } @@ -165,7 +157,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public long getSubscriberCount() throws ParsingException { - final JsonObject subscriberInfo = ytInitialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("subscriberCountText"); + final JsonObject subscriberInfo = initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("subscriberCountText"); if (subscriberInfo != null) { try { @@ -182,7 +174,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public String getDescription() throws ParsingException { try { - return ytInitialData.getObject("metadata").getObject("channelMetadataRenderer").getString("description"); + return initialData.getObject("metadata").getObject("channelMetadataRenderer").getString("description"); } catch (Exception e) { throw new ParsingException("Could not get channel description", e); } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java index 1498ef684..3a4cbffd3 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java @@ -47,21 +47,12 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { final String url = getUrl(); final Response response = downloader.get(url, getExtractorLocalization()); doc = YoutubeParsingHelper.parseAndCheckPage(url, response); - initialData = getInitialData(); + initialData = YoutubeParsingHelper.getInitialData(response.responseBody()); uploaderInfo = getUploaderInfo(); playlistInfo = getPlaylistInfo(); playlistVideos = getPlaylistVideos(); } - private JsonObject getInitialData() throws ParsingException { - try { - String initialData = Parser.matchGroup1("window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", doc.toString()); - return JsonParser.object().from(initialData); - } catch (JsonParserException | Parser.RegexException e) { - throw new ParsingException("Could not get ytInitialData", e); - } - } - private JsonObject getUploaderInfo() throws ParsingException { JsonArray items = initialData.getObject("sidebar").getObject("playlistSidebarRenderer").getArray("items"); try { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java index c8ed9efd6..57ce3aabd 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java @@ -51,7 +51,7 @@ import javax.annotation.Nonnull; public class YoutubeSearchExtractor extends SearchExtractor { private Document doc; - private JsonObject ytInitialData; + private JsonObject initialData; public YoutubeSearchExtractor(StreamingService service, SearchQueryHandler linkHandler) { super(service, linkHandler); @@ -62,7 +62,7 @@ public class YoutubeSearchExtractor extends SearchExtractor { final String url = getUrl(); final Response response = downloader.get(url, getExtractorLocalization()); doc = YoutubeParsingHelper.parseAndCheckPage(url, response); - ytInitialData = getInitialData(); + initialData = YoutubeParsingHelper.getInitialData(response.responseBody()); } @Nonnull @@ -119,7 +119,7 @@ public class YoutubeSearchExtractor extends SearchExtractor { final TimeAgoParser timeAgoParser = getTimeAgoParser(); - JsonArray list = ytInitialData.getObject("contents").getObject("twoColumnSearchResultsRenderer") + JsonArray list = initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer") .getObject("primaryContents").getObject("sectionListRenderer").getArray("contents") .getObject(0).getObject("itemSectionRenderer").getArray("contents"); @@ -138,12 +138,4 @@ public class YoutubeSearchExtractor extends SearchExtractor { return collector; } - private JsonObject getInitialData() throws ParsingException { - try { - String initialData = Parser.matchGroup1("window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", doc.toString()); - return JsonParser.object().from(initialData); - } catch (JsonParserException | Parser.RegexException e) { - throw new ParsingException("Could not get ytInitialData", e); - } - } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index a6c947321..a0bf1c8f6 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -104,7 +104,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Nonnull private final Map videoInfoPage = new HashMap<>(); private JsonObject playerResponse; - private JsonObject ytInitialData; + private JsonObject initialData; @Nonnull private List subtitlesInfos = new ArrayList<>(); @@ -339,7 +339,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { if (getStreamType().equals(StreamType.LIVE_STREAM)) { // The array index is variable, therefore we loop throw the complete array. // videoPrimaryInfoRenderer is often stored at index 1 - JsonArray contents = ytInitialData.getObject("contents").getObject("twoColumnWatchNextResults") + JsonArray contents = initialData.getObject("contents").getObject("twoColumnWatchNextResults") .getObject("results").getObject("results").getArray("contents"); for (Object c : contents) { try { @@ -421,7 +421,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { String likesString = ""; try { try { - likesString = ytInitialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("results").getObject("results").getArray("contents").getObject(0).getObject("videoPrimaryInfoRenderer").getObject("sentimentBar").getObject("sentimentBarRenderer").getString("tooltip").split("/")[0]; + likesString = initialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("results").getObject("results").getArray("contents").getObject(0).getObject("videoPrimaryInfoRenderer").getObject("sentimentBar").getObject("sentimentBarRenderer").getString("tooltip").split("/")[0]; } catch (NullPointerException e) { //if this kicks in our button has no content and therefore ratings must be disabled if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) { @@ -444,7 +444,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { try { Element button = doc.select("button.like-button-renderer-dislike-button").first(); try { - dislikesString = ytInitialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("results").getObject("results").getArray("contents").getObject(0).getObject("videoPrimaryInfoRenderer").getObject("sentimentBar").getObject("sentimentBarRenderer").getString("tooltip").split("/")[1]; + dislikesString = initialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("results").getObject("results").getArray("contents").getObject(0).getObject("videoPrimaryInfoRenderer").getObject("sentimentBar").getObject("sentimentBarRenderer").getString("tooltip").split("/")[1]; } catch (NullPointerException e) { //if this kicks in our button has no content and therefore ratings must be disabled if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) { @@ -507,7 +507,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { String uploaderAvatarUrl = null; try { - uploaderAvatarUrl = ytInitialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("secondaryResults") + uploaderAvatarUrl = initialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("secondaryResults") .getObject("secondaryResults").getArray("results").getObject(0).getObject("compactAutoplayRenderer") .getArray("contents").getObject(0).getObject("compactVideoRenderer").getObject("channelThumbnail") .getArray("thumbnails").getObject(0).getString("url"); @@ -517,7 +517,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { } catch (Exception ignored) {} try { - uploaderAvatarUrl = ytInitialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("results") + uploaderAvatarUrl = initialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("results") .getObject("results").getArray("contents").getObject(1).getObject("videoSecondaryInfoRenderer") .getObject("owner").getObject("videoOwnerRenderer").getObject("thumbnail").getArray("thumbnails") .getObject(0).getString("url"); @@ -670,7 +670,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { public StreamInfoItem getNextStream() throws IOException, ExtractionException { assertPageFetched(); try { - final JsonObject videoInfo = ytInitialData.getObject("contents").getObject("twoColumnWatchNextResults") + final JsonObject videoInfo = initialData.getObject("contents").getObject("twoColumnWatchNextResults") .getObject("secondaryResults").getObject("secondaryResults").getArray("results") .getObject(0).getObject("compactAutoplayRenderer").getArray("contents") .getObject(0).getObject("compactVideoRenderer"); @@ -690,7 +690,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { assertPageFetched(); try { StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); - JsonArray results = ytInitialData.getObject("contents").getObject("twoColumnWatchNextResults") + JsonArray results = initialData.getObject("contents").getObject("twoColumnWatchNextResults") .getObject("secondaryResults").getObject("secondaryResults").getArray("results"); final TimeAgoParser timeAgoParser = getTimeAgoParser(); @@ -778,7 +778,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { isAgeRestricted = false; } playerResponse = getPlayerResponse(); - ytInitialData = getInitialData(); + initialData = YoutubeParsingHelper.getInitialData(pageHtml); if (decryptionCode.isEmpty()) { decryptionCode = loadDecryptionCode(playerUrl); @@ -852,14 +852,6 @@ public class YoutubeStreamExtractor extends StreamExtractor { } } - private JsonObject getInitialData() throws ParsingException { - try { - String initialData = Parser.matchGroup1("window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", doc.toString()); - return JsonParser.object().from(initialData); - } catch (JsonParserException | Parser.RegexException e) { - throw new ParsingException("Could not get ytInitialData", e); - } - } @Nonnull private EmbeddedInfo getEmbeddedInfo() throws ParsingException, ReCaptchaException { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java index 467a1fed3..d815e9e14 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java @@ -61,16 +61,7 @@ public class YoutubeTrendingExtractor extends KioskExtractor { final Response response = downloader.get(url, getExtractorLocalization()); doc = YoutubeParsingHelper.parseAndCheckPage(url, response); - initialData = getInitialData(); - } - - private JsonObject getInitialData() throws ParsingException { - try { - String initialData = Parser.matchGroup1("window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", doc.toString()); - return JsonParser.object().from(initialData); - } catch (JsonParserException | Parser.RegexException e) { - throw new ParsingException("Could not get ytInitialData", e); - } + initialData = YoutubeParsingHelper.getInitialData(response.responseBody()); } @Override diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java index 65ec7e3f6..78516f6df 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java @@ -1,11 +1,15 @@ package org.schabi.newpipe.extractor.services.youtube.linkHandler; +import com.grack.nanojson.JsonObject; +import com.grack.nanojson.JsonParser; +import com.grack.nanojson.JsonParserException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.schabi.newpipe.extractor.downloader.Response; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; +import org.schabi.newpipe.extractor.utils.Parser; import java.net.URL; import java.text.ParseException; @@ -143,4 +147,14 @@ public class YoutubeParsingHelper { uploadDate.setTime(date); return uploadDate; } + + public static JsonObject getInitialData(String html) throws ParsingException { + try { + String initialData = Parser.matchGroup1("window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", html); + return JsonParser.object().from(initialData); + } catch (JsonParserException | Parser.RegexException e) { + throw new ParsingException("Could not get ytInitialData", e); + } + } + } From 4e57e589cee1842104fcb9af2b8e1d668c304187 Mon Sep 17 00:00:00 2001 From: wb9688 Date: Sun, 23 Feb 2020 11:23:33 +0100 Subject: [PATCH 19/53] Fix first channel page --- .../extractors/YoutubeChannelExtractor.java | 62 +++++-------------- 1 file changed, 15 insertions(+), 47 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index 8e47e779e..1cb448e4e 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -1,8 +1,10 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; +import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParserException; + import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; @@ -17,12 +19,12 @@ import org.schabi.newpipe.extractor.localization.TimeAgoParser; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; import org.schabi.newpipe.extractor.stream.StreamInfoItem; import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; -import org.schabi.newpipe.extractor.utils.Parser; import org.schabi.newpipe.extractor.utils.Utils; -import javax.annotation.Nonnull; import java.io.IOException; +import javax.annotation.Nonnull; + import static org.schabi.newpipe.extractor.utils.Utils.HTTP; import static org.schabi.newpipe.extractor.utils.Utils.HTTPS; @@ -191,6 +193,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public InfoItemsPage getPage(String pageUrl) throws IOException, ExtractionException { + // TODO: Get extracting next pages working if (pageUrl == null || pageUrl.isEmpty()) { throw new ExtractionException(new IllegalArgumentException("Page url is empty or null")); } @@ -245,59 +248,24 @@ public class YoutubeChannelExtractor extends ChannelExtractor { final String uploaderUrl = getUrl(); final TimeAgoParser timeAgoParser = getTimeAgoParser(); - for (final Element li : element.children()) { - if (li.select("div[class=\"feed-item-dismissable\"]").first() != null) { - collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) { - @Override - public String getUrl() throws ParsingException { - try { - Element el = li.select("div[class=\"feed-item-dismissable\"]").first(); - Element dl = el.select("h3").first().select("a").first(); - return dl.attr("abs:href"); - } catch (Exception e) { - throw new ParsingException("Could not get web page url for the video", e); - } - } + JsonArray videos = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer") + .getArray("tabs").getObject(1).getObject("tabRenderer").getObject("content") + .getObject("sectionListRenderer").getArray("contents"); + for (Object video : videos) { + JsonObject videoInfo = ((JsonObject) video).getObject("itemSectionRenderer") + .getArray("contents").getObject(0); + if (videoInfo.getObject("videoRenderer") != null) { + collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo.getObject("videoRenderer"), timeAgoParser) { @Override - public String getName() throws ParsingException { - try { - Element el = li.select("div[class=\"feed-item-dismissable\"]").first(); - Element dl = el.select("h3").first().select("a").first(); - return dl.text(); - } catch (Exception e) { - throw new ParsingException("Could not get title", e); - } - } - - @Override - public String getUploaderName() throws ParsingException { + public String getUploaderName() { return uploaderName; } @Override - public String getUploaderUrl() throws ParsingException { + public String getUploaderUrl() { return uploaderUrl; } - - @Override - public String getThumbnailUrl() throws ParsingException { - try { - String url; - Element te = li.select("span[class=\"yt-thumb-clip\"]").first() - .select("img").first(); - url = te.attr("abs:src"); - // Sometimes youtube sends links to gif files which somehow seem to not exist - // anymore. Items with such gif also offer a secondary image source. So we are going - // to use that if we've caught such an item. - if (url.contains(".gif")) { - url = te.attr("abs:data-thumb"); - } - return url; - } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); - } - } }); } } From 316fe0109df23dd3e6db25274d85508203308090 Mon Sep 17 00:00:00 2001 From: wb9688 Date: Sun, 23 Feb 2020 13:48:54 +0100 Subject: [PATCH 20/53] Fix first playlist page --- .../extractors/YoutubePlaylistExtractor.java | 120 ++---------------- .../YoutubeStreamInfoItemExtractor.java | 19 ++- 2 files changed, 28 insertions(+), 111 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java index 3a4cbffd3..f0fb91a17 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java @@ -13,15 +13,12 @@ import org.schabi.newpipe.extractor.downloader.Downloader; import org.schabi.newpipe.extractor.downloader.Response; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; -import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; import org.schabi.newpipe.extractor.localization.TimeAgoParser; import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; import org.schabi.newpipe.extractor.stream.StreamInfoItem; import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; -import org.schabi.newpipe.extractor.stream.StreamType; -import org.schabi.newpipe.extractor.utils.Parser; import org.schabi.newpipe.extractor.utils.Utils; import java.io.IOException; @@ -233,112 +230,23 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { private void collectStreamsFrom(@Nonnull StreamInfoItemsCollector collector, @Nullable Element element) { collector.reset(); - if (element == null) { - return; - } - - final LinkHandlerFactory streamLinkHandlerFactory = getService().getStreamLHFactory(); final TimeAgoParser timeAgoParser = getTimeAgoParser(); - for (final Element li : element.children()) { - if (isDeletedItem(li)) { - continue; + JsonArray videos = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer") + .getArray("tabs").getObject(0).getObject("tabRenderer").getObject("content") + .getObject("sectionListRenderer").getArray("contents").getObject(0) + .getObject("itemSectionRenderer").getArray("contents").getObject(0) + .getObject("playlistVideoListRenderer").getArray("contents"); + + for (Object video : videos) { + if (((JsonObject) video).getObject("playlistVideoRenderer") != null) { + collector.commit(new YoutubeStreamInfoItemExtractor(((JsonObject) video).getObject("playlistVideoRenderer"), timeAgoParser) { + @Override + public long getViewCount() { + return -1; + } + }); } - - collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) { - public Element uploaderLink; - - @Override - public boolean isAd() { - return false; - } - - @Override - public String getUrl() throws ParsingException { - try { - return streamLinkHandlerFactory.fromId(li.attr("data-video-id")).getUrl(); - } catch (Exception e) { - throw new ParsingException("Could not get web page url for the video", e); - } - } - - @Override - public String getName() throws ParsingException { - try { - return li.attr("data-title"); - } catch (Exception e) { - throw new ParsingException("Could not get title", e); - } - } - - @Override - public long getDuration() throws ParsingException { - try { - if (getStreamType() == StreamType.LIVE_STREAM) return -1; - - Element first = li.select("div[class=\"timestamp\"] span").first(); - if (first == null) { - // Video unavailable (private, deleted, etc.), this is a thing that happens specifically with playlists, - // because in other cases, those videos don't even show up - return -1; - } - - return YoutubeParsingHelper.parseDurationString(first.text()); - } catch (Exception e) { - throw new ParsingException("Could not get duration" + getUrl(), e); - } - } - - - private Element getUploaderLink() { - // should always be present since we filter deleted items - if (uploaderLink == null) { - uploaderLink = li.select("div[class=pl-video-owner] a").first(); - } - return uploaderLink; - } - - @Override - public String getUploaderName() throws ParsingException { - return getUploaderLink().text(); - } - - @Override - public String getUploaderUrl() throws ParsingException { - // this url is not always in the form "/channel/..." - // sometimes Youtube provides urls in the from "/user/..." - return getUploaderLink().attr("abs:href"); - } - - @Override - public String getTextualUploadDate() { - return ""; - } - - @Override - public long getViewCount() throws ParsingException { - return -1; - } - - @Override - public String getThumbnailUrl() throws ParsingException { - try { - return "https://i.ytimg.com/vi/" + streamLinkHandlerFactory.fromUrl(getUrl()).getId() + "/hqdefault.jpg"; - } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); - } - } - }); } } - - /** - * Check if the playlist item is deleted - * - * @param li the list item - * @return true if the item is deleted - */ - private boolean isDeletedItem(Element li) { - return li.select("div[class=pl-video-owner] a").isEmpty(); - } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java index 91124fb43..a53194f14 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java @@ -1,7 +1,7 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; import com.grack.nanojson.JsonObject; -import org.jsoup.nodes.Element; + import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.localization.DateWrapper; import org.schabi.newpipe.extractor.localization.TimeAgoParser; @@ -37,10 +37,6 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { private JsonObject videoInfo; private final TimeAgoParser timeAgoParser; - public YoutubeStreamInfoItemExtractor(Element a, @Nullable TimeAgoParser timeAgoParser) { - this.timeAgoParser = timeAgoParser; - } - /** * Creates an extractor of StreamInfoItems from a YouTube page. * @@ -115,6 +111,12 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { .getObject(0).getString("text"); } catch (Exception ignored) {} } + if (name == null) { + try { + name = videoInfo.getObject("shortBylineText").getArray("runs") + .getObject(0).getString("text"); + } catch (Exception ignored) {} + } if (name != null && !name.isEmpty()) return name; throw new ParsingException("Could not get uploader name"); } @@ -135,6 +137,13 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { .getObject("browseEndpoint").getString("browseId"); } catch (Exception ignored) {} } + if (id == null) { + try { + id = videoInfo.getObject("shortBylineText").getArray("runs") + .getObject(0).getObject("navigationEndpoint") + .getObject("browseEndpoint").getString("browseId"); + } catch (Exception ignored) {} + } if (id == null || id.isEmpty()) { throw new IllegalArgumentException("is empty"); } From 7442f91aa619a57b73ece4ca43160420585e1464 Mon Sep 17 00:00:00 2001 From: wb9688 Date: Sun, 23 Feb 2020 14:18:21 +0100 Subject: [PATCH 21/53] Don't assume which channel tab is videos --- .../extractors/YoutubeChannelExtractor.java | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index 1cb448e4e..ed987f243 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -248,9 +248,22 @@ public class YoutubeChannelExtractor extends ChannelExtractor { final String uploaderUrl = getUrl(); final TimeAgoParser timeAgoParser = getTimeAgoParser(); - JsonArray videos = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer") - .getArray("tabs").getObject(1).getObject("tabRenderer").getObject("content") - .getObject("sectionListRenderer").getArray("contents"); + JsonArray tabs = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer") + .getArray("tabs"); + JsonArray videos = null; + + for (Object tab : tabs) { + if (((JsonObject) tab).getObject("tabRenderer") != null) { + if (((JsonObject) tab).getObject("tabRenderer").getString("title").equals("Videos")) { + videos = ((JsonObject) tab).getObject("tabRenderer").getObject("content") + .getObject("sectionListRenderer").getArray("contents"); + } + } + } + + if (videos == null) { + throw new ParsingException("Could not find Videos tab"); + } for (Object video : videos) { JsonObject videoInfo = ((JsonObject) video).getObject("itemSectionRenderer") From 3187116a63cda7ddc2e7f68d3571fc7f353f476e Mon Sep 17 00:00:00 2001 From: wb9688 Date: Sun, 23 Feb 2020 14:19:13 +0100 Subject: [PATCH 22/53] Handle premium videos --- .../YoutubeStreamInfoItemExtractor.java | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java index a53194f14..bf06774c8 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java @@ -1,5 +1,6 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; +import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; import org.schabi.newpipe.extractor.exceptions.ParsingException; @@ -60,7 +61,7 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { @Override public boolean isAd() { - return false; + return isPremium(); } @Override @@ -169,6 +170,9 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { @Override public long getViewCount() throws ParsingException { try { + if (videoInfo.getObject("topStandaloneBadge") != null || isPremium()) { + return -1; + } String viewCount; if (getStreamType() == StreamType.LIVE_STREAM) { viewCount = videoInfo.getObject("viewCountText") @@ -193,4 +197,16 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { throw new ParsingException("Could not get thumbnail url", e); } } + + private boolean isPremium() { + try { + JsonArray badges = videoInfo.getArray("badges"); + for (Object badge : badges) { + if (((JsonObject) badge).getObject("metadataBadgeRenderer").getString("label").equals("Premium")) { + return true; + } + } + } catch (Exception ignored) {} + return false; + } } From 8aea4d445b62b92ffeeb7fc22b3d0af5e244e809 Mon Sep 17 00:00:00 2001 From: wb9688 Date: Sun, 23 Feb 2020 18:27:28 +0100 Subject: [PATCH 23/53] Reimplement YoutubeChannelInfoItemExtractor --- .../YoutubeChannelInfoItemExtractor.java | 86 +++++++------------ .../extractors/YoutubeSearchExtractor.java | 2 +- 2 files changed, 31 insertions(+), 57 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelInfoItemExtractor.java index a687c0504..d0df15817 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelInfoItemExtractor.java @@ -1,13 +1,12 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; -import org.jsoup.nodes.Element; +import com.grack.nanojson.JsonObject; + import org.schabi.newpipe.extractor.channel.ChannelInfoItemExtractor; import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory; import org.schabi.newpipe.extractor.utils.Utils; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - /* * Created by Christian Schabesberger on 12.02.17. * @@ -29,87 +28,62 @@ import java.util.regex.Pattern; */ public class YoutubeChannelInfoItemExtractor implements ChannelInfoItemExtractor { - private final Element el; + private JsonObject channelInfoItem; - public YoutubeChannelInfoItemExtractor(Element el) { - this.el = el; + public YoutubeChannelInfoItemExtractor(JsonObject channelInfoItem) { + this.channelInfoItem = channelInfoItem; } @Override public String getThumbnailUrl() throws ParsingException { - Element img = el.select("span[class*=\"yt-thumb-simple\"]").first() - .select("img").first(); - - String url = img.attr("abs:src"); - - if (url.contains("gif")) { - url = img.attr("abs:data-thumb"); + try { + return channelInfoItem.getObject("thumbnails").getArray("thumbnails").getObject(0).getString("url"); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); } - return url; } @Override public String getName() throws ParsingException { - return el.select("a[class*=\"yt-uix-tile-link\"]").first() - .text(); + try { + return channelInfoItem.getObject("title").getString("simpleText"); + } catch (Exception e) { + throw new ParsingException("Could not get name", e); + } } @Override public String getUrl() throws ParsingException { try { - String buttonTrackingUrl = el.select("button[class*=\"yt-uix-button\"]").first() - .attr("abs:data-href"); - - Pattern channelIdPattern = Pattern.compile("(?:.*?)\\%252Fchannel\\%252F([A-Za-z0-9\\-\\_]+)(?:.*)"); - Matcher match = channelIdPattern.matcher(buttonTrackingUrl); - - if (match.matches()) { - return YoutubeChannelExtractor.CHANNEL_URL_BASE + match.group(1); - } - } catch(Exception ignored) {} - - // fallback method for channels without "Subscribe" button (or just in case yt changes things) - // provides an url with "/user/NAME", inconsistent with stream and channel extractor: tests will fail - try { - return el.select("a[class*=\"yt-uix-tile-link\"]").first() - .attr("abs:href"); + String id = "channel/" + channelInfoItem.getString("channelId"); // Does prepending 'channel/' always work? + return YoutubeChannelLinkHandlerFactory.getInstance().getUrl(id); } catch (Exception e) { - throw new ParsingException("Could not get channel url", e); + throw new ParsingException("Could not get url", e); } } @Override - public long getSubscriberCount() throws ParsingException { - final Element subsEl = el.select("span[class*=\"yt-subscriber-count\"]").first(); - if (subsEl != null) { - try { - return Long.parseLong(Utils.removeNonDigitCharacters(subsEl.text())); - } catch (NumberFormatException e) { - throw new ParsingException("Could not get subscriber count", e); - } - } else { - // If the element is null, the channel have the subscriber count disabled - return -1; - } + public long getSubscriberCount() { + // TODO: get subscriber count, it's in subscriberCountText.simpleText as a string like "103M subscribers" + return -1; } @Override public long getStreamCount() throws ParsingException { - Element metaEl = el.select("ul[class*=\"yt-lockup-meta-info\"]").first(); - if (metaEl == null) { - return 0; - } else { - return Long.parseLong(Utils.removeNonDigitCharacters(metaEl.text())); + try { + return Long.parseLong(Utils.removeNonDigitCharacters(channelInfoItem.getObject("videoCountText") + .getArray("runs").getObject(0).getString("text"))); + } catch (Exception e) { + throw new ParsingException("Could not get name", e); } } @Override public String getDescription() throws ParsingException { - Element desEl = el.select("div[class*=\"yt-lockup-description\"]").first(); - if (desEl == null) { - return ""; - } else { - return desEl.text(); + try { + return channelInfoItem.getObject("descriptionSnippet").getArray("runs").getObject(0).getString("text"); + } catch (Exception e) { + throw new ParsingException("Could not get description url", e); } } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java index 57ce3aabd..fdf09e4d7 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java @@ -130,7 +130,7 @@ public class YoutubeSearchExtractor extends SearchExtractor { } else if (((JsonObject) item).getObject("videoRenderer") != null) { collector.commit(new YoutubeStreamInfoItemExtractor(((JsonObject) item).getObject("videoRenderer"), timeAgoParser)); } else if (((JsonObject) item).getObject("channelRenderer") != null) { -// collector.commit(new YoutubeChannelInfoItemExtractor(((JsonObject) item).getObject("channelRenderer"))); + collector.commit(new YoutubeChannelInfoItemExtractor(((JsonObject) item).getObject("channelRenderer"))); } else if (((JsonObject) item).getObject("playlistRenderer") != null) { // collector.commit(new YoutubePlaylistInfoItemExtractor(((JsonObject) item).getObject("playlistRenderer"))); } From 4462cbe3f154e8fbf08c9d5f0bb1be9febe8ee71 Mon Sep 17 00:00:00 2001 From: wb9688 Date: Sun, 23 Feb 2020 19:45:45 +0100 Subject: [PATCH 24/53] Reimplement YoutubePlaylistInfoItemExtractor --- .../YoutubeChannelInfoItemExtractor.java | 6 +- .../YoutubePlaylistInfoItemExtractor.java | 70 +++++-------------- .../extractors/YoutubeSearchExtractor.java | 2 +- 3 files changed, 22 insertions(+), 56 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelInfoItemExtractor.java index d0df15817..d987af0db 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelInfoItemExtractor.java @@ -37,7 +37,7 @@ public class YoutubeChannelInfoItemExtractor implements ChannelInfoItemExtractor @Override public String getThumbnailUrl() throws ParsingException { try { - return channelInfoItem.getObject("thumbnails").getArray("thumbnails").getObject(0).getString("url"); + return channelInfoItem.getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url"); } catch (Exception e) { throw new ParsingException("Could not get thumbnail url", e); } @@ -74,7 +74,7 @@ public class YoutubeChannelInfoItemExtractor implements ChannelInfoItemExtractor return Long.parseLong(Utils.removeNonDigitCharacters(channelInfoItem.getObject("videoCountText") .getArray("runs").getObject(0).getString("text"))); } catch (Exception e) { - throw new ParsingException("Could not get name", e); + throw new ParsingException("Could not get stream count", e); } } @@ -83,7 +83,7 @@ public class YoutubeChannelInfoItemExtractor implements ChannelInfoItemExtractor try { return channelInfoItem.getObject("descriptionSnippet").getArray("runs").getObject(0).getString("text"); } catch (Exception e) { - throw new ParsingException("Could not get description url", e); + throw new ParsingException("Could not get description", e); } } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistInfoItemExtractor.java index 63fef225f..358fa2e69 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistInfoItemExtractor.java @@ -1,97 +1,63 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; -import org.jsoup.nodes.Element; +import com.grack.nanojson.JsonObject; + import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.playlist.PlaylistInfoItemExtractor; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubePlaylistLinkHandlerFactory; import org.schabi.newpipe.extractor.utils.Utils; public class YoutubePlaylistInfoItemExtractor implements PlaylistInfoItemExtractor { - private final Element el; + private JsonObject playlistInfoItem; - public YoutubePlaylistInfoItemExtractor(Element el) { - this.el = el; + public YoutubePlaylistInfoItemExtractor(JsonObject playlistInfoItem) { + this.playlistInfoItem = playlistInfoItem; } @Override public String getThumbnailUrl() throws ParsingException { - String url; - try { - Element te = el.select("div[class=\"yt-thumb video-thumb\"]").first() - .select("img").first(); - url = te.attr("abs:src"); - - if (url.contains(".gif")) { - url = te.attr("abs:data-thumb"); - } + return playlistInfoItem.getArray("thumbnails").getObject(0).getArray("thumbnails") + .getObject(0).getString("url"); } catch (Exception e) { - throw new ParsingException("Failed to extract playlist thumbnail url", e); + throw new ParsingException("Could not get thumbnail url", e); } - - return url; } @Override public String getName() throws ParsingException { - String name; try { - final Element title = el.select("[class=\"yt-lockup-title\"]").first() - .select("a").first(); - - name = title == null ? "" : title.text(); + return playlistInfoItem.getObject("title").getString("simpleText"); } catch (Exception e) { - throw new ParsingException("Failed to extract playlist name", e); + throw new ParsingException("Could not get name", e); } - - return name; } @Override public String getUrl() throws ParsingException { try { - final Element a = el.select("div[class=\"yt-lockup-meta\"]") - .select("ul[class=\"yt-lockup-meta-info\"]") - .select("li").select("a").first(); - - if (a != null) { - return a.attr("abs:href"); - } - - // this is for yt premium playlists - return el.select("h3[class=\"yt-lockup-title\"").first() - .select("a").first() - .attr("abs:href"); - + String id = playlistInfoItem.getString("playlistId"); + return YoutubePlaylistLinkHandlerFactory.getInstance().getUrl(id); } catch (Exception e) { - throw new ParsingException("Failed to extract playlist url", e); + throw new ParsingException("Could not get url", e); } } @Override public String getUploaderName() throws ParsingException { - String name; - try { - final Element div = el.select("div[class=\"yt-lockup-byline\"]").first() - .select("a").first(); - - name = div.text(); + return playlistInfoItem.getObject("longBylineText").getArray("runs").getObject(0).getString("text"); } catch (Exception e) { - throw new ParsingException("Failed to extract playlist uploader", e); + throw new ParsingException("Could not get uploader name", e); } - - return name; } @Override public long getStreamCount() throws ParsingException { try { - final Element count = el.select("span[class=\"formatted-video-count-label\"]").first() - .select("b").first(); - - return count == null ? 0 : Long.parseLong(Utils.removeNonDigitCharacters(count.text())); + return Long.parseLong(Utils.removeNonDigitCharacters(playlistInfoItem.getString("videoCount"))); } catch (Exception e) { - throw new ParsingException("Failed to extract playlist stream count", e); + throw new ParsingException("Could not get stream count", e); } } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java index fdf09e4d7..02dd2adaa 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java @@ -132,7 +132,7 @@ public class YoutubeSearchExtractor extends SearchExtractor { } else if (((JsonObject) item).getObject("channelRenderer") != null) { collector.commit(new YoutubeChannelInfoItemExtractor(((JsonObject) item).getObject("channelRenderer"))); } else if (((JsonObject) item).getObject("playlistRenderer") != null) { -// collector.commit(new YoutubePlaylistInfoItemExtractor(((JsonObject) item).getObject("playlistRenderer"))); + collector.commit(new YoutubePlaylistInfoItemExtractor(((JsonObject) item).getObject("playlistRenderer"))); } } return collector; From 6d20b2b1d0011f6f26adfbd02715cd3d8b7b80c6 Mon Sep 17 00:00:00 2001 From: TobiGr Date: Sun, 23 Feb 2020 22:24:47 +0100 Subject: [PATCH 25/53] Fix NPE on collecting page items --- .../services/youtube/extractors/YoutubeSearchExtractor.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java index 02dd2adaa..5d04a3891 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java @@ -113,12 +113,13 @@ public class YoutubeSearchExtractor extends SearchExtractor { "&page=" + Integer.toString(pageNr + 1)); } - private InfoItemsSearchCollector collectItems(Document doc) throws NothingFoundException { + private InfoItemsSearchCollector collectItems(Document doc) throws NothingFoundException, ParsingException { InfoItemsSearchCollector collector = getInfoItemSearchCollector(); collector.reset(); final TimeAgoParser timeAgoParser = getTimeAgoParser(); + if (initialData == null) initialData = YoutubeParsingHelper.getInitialData(doc.toString()); JsonArray list = initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer") .getObject("primaryContents").getObject("sectionListRenderer").getArray("contents") .getObject(0).getObject("itemSectionRenderer").getArray("contents"); From 51fb26625a9db58de7c7647ae4461cd3c09fe98e Mon Sep 17 00:00:00 2001 From: wb9688 Date: Mon, 24 Feb 2020 10:39:52 +0100 Subject: [PATCH 26/53] Implement getSubscriberCount() in YoutubeChannelInfoItemExtractor --- .../extractors/YoutubeChannelInfoItemExtractor.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelInfoItemExtractor.java index d987af0db..7f1e01298 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelInfoItemExtractor.java @@ -63,9 +63,13 @@ public class YoutubeChannelInfoItemExtractor implements ChannelInfoItemExtractor } @Override - public long getSubscriberCount() { - // TODO: get subscriber count, it's in subscriberCountText.simpleText as a string like "103M subscribers" - return -1; + public long getSubscriberCount() throws ParsingException { + try { + String subscribers = channelInfoItem.getObject("subscriberCountText").getString("simpleText").split(" ")[0]; + return Utils.mixedNumberWordToLong(subscribers); + } catch (Exception e) { + throw new ParsingException("Could not get subscriber count", e); + } } @Override From c7360e4a468b5c657b2a95e640282d0773881156 Mon Sep 17 00:00:00 2001 From: wb9688 Date: Mon, 24 Feb 2020 12:48:16 +0100 Subject: [PATCH 27/53] Fix getThumbnailUrl() in YoutubeChannelInfoItemExtractor --- .../YoutubeChannelInfoItemExtractor.java | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelInfoItemExtractor.java index 7f1e01298..483cd894c 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelInfoItemExtractor.java @@ -7,6 +7,9 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory; import org.schabi.newpipe.extractor.utils.Utils; +import static org.schabi.newpipe.extractor.utils.Utils.HTTP; +import static org.schabi.newpipe.extractor.utils.Utils.HTTPS; + /* * Created by Christian Schabesberger on 12.02.17. * @@ -37,7 +40,16 @@ public class YoutubeChannelInfoItemExtractor implements ChannelInfoItemExtractor @Override public String getThumbnailUrl() throws ParsingException { try { - return channelInfoItem.getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url"); + String url = channelInfoItem.getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url"); + if (url.startsWith("//")) { + url = url.substring(2); + } + if (url.startsWith(HTTP)) { + url = Utils.replaceHttpWithHttps(url); + } else if (!url.startsWith(HTTPS)) { + url = HTTPS + url; + } + return url; } catch (Exception e) { throw new ParsingException("Could not get thumbnail url", e); } From 1eca63089b88e4c27c38e70fe17760cf89ab00db Mon Sep 17 00:00:00 2001 From: wb9688 Date: Mon, 24 Feb 2020 12:55:51 +0100 Subject: [PATCH 28/53] Count private and deleted videos as ad --- .../youtube/extractors/YoutubeStreamInfoItemExtractor.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java index bf06774c8..605053bb3 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java @@ -60,8 +60,8 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { } @Override - public boolean isAd() { - return isPremium(); + public boolean isAd() throws ParsingException { + return isPremium() || getName().equals("[Private video]") || getName().equals("[Deleted video]"); } @Override From a38ab9b791ac118e3fbd7e33fc5d2dc9e3bb795c Mon Sep 17 00:00:00 2001 From: wb9688 Date: Mon, 24 Feb 2020 13:01:13 +0100 Subject: [PATCH 29/53] Return null if channel has no banner --- .../youtube/extractors/YoutubeChannelExtractor.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index ed987f243..839f9203d 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -127,8 +127,11 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public String getBannerUrl() throws ParsingException { try { - String url = initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("banner").getArray("thumbnails").getObject(0).getString("url"); - if (url.contains("s.ytimg.com") || url.contains("default_banner")) { + String url = null; + try { + url = initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("banner").getArray("thumbnails").getObject(0).getString("url"); + } catch (Exception ignored) {} + if (url == null || url.contains("s.ytimg.com") || url.contains("default_banner")) { return null; } // the first characters of the banner URLs are different for each channel and some are not even valid URLs From 2dfa2187ffb6c8a6a04143d2811aa9e368936fbb Mon Sep 17 00:00:00 2001 From: wb9688 Date: Mon, 24 Feb 2020 15:48:23 +0100 Subject: [PATCH 30/53] Implement pagination in YoutubeChannelExtractor --- .../extractors/YoutubeChannelExtractor.java | 101 +++++++++--------- 1 file changed, 50 insertions(+), 51 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index 839f9203d..2e68a276f 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -5,7 +5,6 @@ import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParserException; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.schabi.newpipe.extractor.StreamingService; @@ -22,6 +21,10 @@ import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; import org.schabi.newpipe.extractor.utils.Utils; import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import javax.annotation.Nonnull; @@ -71,7 +74,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public String getNextPageUrl() throws ExtractionException { - return getNextPageUrlFrom(doc); + return getNextPageUrlFrom(getVideoTab().getObject("content").getObject("sectionListRenderer").getArray("continuations")); } @Nonnull @@ -189,8 +192,10 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public InfoItemsPage getInitialPage() throws ExtractionException { StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); - Element ul = doc.select("ul[id=\"browse-items-primary\"]").first(); - collectStreamsFrom(collector, ul); + + JsonArray videos = getVideoTab().getObject("content").getObject("sectionListRenderer").getArray("contents"); + collectStreamsFrom(collector, videos); + return new InfoItemsPage<>(collector, getNextPageUrl()); } @@ -203,71 +208,44 @@ public class YoutubeChannelExtractor extends ChannelExtractor { // Unfortunately, we have to fetch the page even if we are only getting next streams, // as they don't deliver enough information on their own (the channel name, for example). - fetchPage(); +// fetchPage(); StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); - JsonObject ajaxJson; + JsonArray ajaxJson; try { - final String response = getDownloader().get(pageUrl, getExtractorLocalization()).responseBody(); - ajaxJson = JsonParser.object().from(response); + Map> headers = new HashMap<>(); + headers.put("X-YouTube-Client-Name", Collections.singletonList("1")); + headers.put("X-YouTube-Client-Version", Collections.singletonList("2.20200221.03.00")); // TODO: Automatically get YouTube client version somehow + final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody(); + ajaxJson = JsonParser.array().from(response); } catch (JsonParserException pe) { throw new ParsingException("Could not parse json data for next streams", pe); } - final Document ajaxHtml = Jsoup.parse(ajaxJson.getString("content_html"), pageUrl); - collectStreamsFrom(collector, ajaxHtml.select("body").first()); + JsonObject sectionListContinuation = ajaxJson.getObject(1).getObject("response") + .getObject("continuationContents").getObject("sectionListContinuation"); - return new InfoItemsPage<>(collector, getNextPageUrlFromAjaxPage(ajaxJson, pageUrl)); + collectStreamsFrom(collector, sectionListContinuation.getArray("contents")); + + return new InfoItemsPage<>(collector, getNextPageUrlFrom(sectionListContinuation.getArray("continuations"))); } - private String getNextPageUrlFromAjaxPage(final JsonObject ajaxJson, final String pageUrl) - throws ParsingException { - String loadMoreHtmlDataRaw = ajaxJson.getString("load_more_widget_html"); - if (!loadMoreHtmlDataRaw.isEmpty()) { - return getNextPageUrlFrom(Jsoup.parse(loadMoreHtmlDataRaw, pageUrl)); - } else { - return ""; - } + + private String getNextPageUrlFrom(JsonArray continuations) { + JsonObject nextContinuationData = continuations.getObject(0).getObject("nextContinuationData"); + String continuation = nextContinuationData.getString("continuation"); + String clickTrackingParams = nextContinuationData.getString("clickTrackingParams"); + return "https://www.youtube.com/browse_ajax?ctoken=" + continuation + "&continuation=" + continuation + + "&itct=" + clickTrackingParams; } - private String getNextPageUrlFrom(Document d) throws ParsingException { - try { - Element button = d.select("button[class*=\"yt-uix-load-more\"]").first(); - if (button != null) { - return button.attr("abs:data-uix-load-more-href"); - } else { - // Sometimes channels are simply so small, they don't have a more streams/videos - return ""; - } - } catch (Exception e) { - throw new ParsingException("Could not get next page url", e); - } - } - - private void collectStreamsFrom(StreamInfoItemsCollector collector, Element element) throws ParsingException { + private void collectStreamsFrom(StreamInfoItemsCollector collector, JsonArray videos) throws ParsingException { collector.reset(); final String uploaderName = getName(); final String uploaderUrl = getUrl(); final TimeAgoParser timeAgoParser = getTimeAgoParser(); - JsonArray tabs = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer") - .getArray("tabs"); - JsonArray videos = null; - - for (Object tab : tabs) { - if (((JsonObject) tab).getObject("tabRenderer") != null) { - if (((JsonObject) tab).getObject("tabRenderer").getString("title").equals("Videos")) { - videos = ((JsonObject) tab).getObject("tabRenderer").getObject("content") - .getObject("sectionListRenderer").getArray("contents"); - } - } - } - - if (videos == null) { - throw new ParsingException("Could not find Videos tab"); - } - for (Object video : videos) { JsonObject videoInfo = ((JsonObject) video).getObject("itemSectionRenderer") .getArray("contents").getObject(0); @@ -286,4 +264,25 @@ public class YoutubeChannelExtractor extends ChannelExtractor { } } } + + private JsonObject getVideoTab() throws ParsingException { + JsonArray tabs = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer") + .getArray("tabs"); + JsonObject videoTab = null; + + for (Object tab : tabs) { + if (((JsonObject) tab).getObject("tabRenderer") != null) { + if (((JsonObject) tab).getObject("tabRenderer").getString("title").equals("Videos")) { + videoTab = ((JsonObject) tab).getObject("tabRenderer"); + break; + } + } + } + + if (videoTab == null) { + throw new ParsingException("Could not find Videos tab"); + } + + return videoTab; + } } From 627ab2459f90969c987e7463fcfcaf2fc93c8a10 Mon Sep 17 00:00:00 2001 From: wb9688 Date: Mon, 24 Feb 2020 15:51:08 +0100 Subject: [PATCH 31/53] Uncomment fetchPage() --- .../services/youtube/extractors/YoutubeChannelExtractor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index 2e68a276f..b42514fc7 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -208,7 +208,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor { // Unfortunately, we have to fetch the page even if we are only getting next streams, // as they don't deliver enough information on their own (the channel name, for example). -// fetchPage(); + fetchPage(); StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); JsonArray ajaxJson; From 4039409820e9a3f82f9727c25d4bb1759c8576ec Mon Sep 17 00:00:00 2001 From: wb9688 Date: Mon, 24 Feb 2020 16:04:01 +0100 Subject: [PATCH 32/53] Fix getLikeCount() for certain streams --- .../extractors/YoutubeChannelExtractor.java | 1 - .../extractors/YoutubeStreamExtractor.java | 26 ++++++++++++++++--- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index b42514fc7..c60e1c8fc 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -201,7 +201,6 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public InfoItemsPage getPage(String pageUrl) throws IOException, ExtractionException { - // TODO: Get extracting next pages working if (pageUrl == null || pageUrl.isEmpty()) { throw new ExtractionException(new IllegalArgumentException("Page url is empty or null")); } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index a0bf1c8f6..dd2987a4b 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -415,13 +415,33 @@ public class YoutubeStreamExtractor extends StreamExtractor { throw new ExtractionException("Could not find correct results in response"); } + private JsonObject getVideoPrimaryInfoRenderer() throws ParsingException { + JsonArray contents = initialData.getObject("contents").getObject("twoColumnWatchNextResults") + .getObject("results").getObject("results").getArray("contents"); + JsonObject videoPrimaryInfoRenderer = null; + + for (Object content : contents) { + if (((JsonObject) content).getObject("videoPrimaryInfoRenderer") != null) { + videoPrimaryInfoRenderer = ((JsonObject) content).getObject("videoPrimaryInfoRenderer"); + break; + } + } + + if (videoPrimaryInfoRenderer == null) { + throw new ParsingException("Could not find videoPrimaryInfoRenderer"); + } + + return videoPrimaryInfoRenderer; + } + @Override public long getLikeCount() throws ParsingException { assertPageFetched(); String likesString = ""; try { try { - likesString = initialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("results").getObject("results").getArray("contents").getObject(0).getObject("videoPrimaryInfoRenderer").getObject("sentimentBar").getObject("sentimentBarRenderer").getString("tooltip").split("/")[0]; + likesString = getVideoPrimaryInfoRenderer().getObject("sentimentBar") + .getObject("sentimentBarRenderer").getString("tooltip").split("/")[0]; } catch (NullPointerException e) { //if this kicks in our button has no content and therefore ratings must be disabled if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) { @@ -442,9 +462,9 @@ public class YoutubeStreamExtractor extends StreamExtractor { assertPageFetched(); String dislikesString = ""; try { - Element button = doc.select("button.like-button-renderer-dislike-button").first(); try { - dislikesString = initialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("results").getObject("results").getArray("contents").getObject(0).getObject("videoPrimaryInfoRenderer").getObject("sentimentBar").getObject("sentimentBarRenderer").getString("tooltip").split("/")[1]; + dislikesString = getVideoPrimaryInfoRenderer().getObject("sentimentBar") + .getObject("sentimentBarRenderer").getString("tooltip").split("/")[1]; } catch (NullPointerException e) { //if this kicks in our button has no content and therefore ratings must be disabled if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) { From c0a8e0188967db4e8ae84b5f0acbe2fd61b1119c Mon Sep 17 00:00:00 2001 From: wb9688 Date: Mon, 24 Feb 2020 16:45:36 +0100 Subject: [PATCH 33/53] Implement pagination in YoutubePlaylistExtractor --- .../extractors/YoutubeChannelExtractor.java | 4 + .../extractors/YoutubePlaylistExtractor.java | 79 +++++++++---------- 2 files changed, 41 insertions(+), 42 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index c60e1c8fc..27678657c 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -231,6 +231,10 @@ public class YoutubeChannelExtractor extends ChannelExtractor { private String getNextPageUrlFrom(JsonArray continuations) { + if (continuations == null) { + return ""; + } + JsonObject nextContinuationData = continuations.getObject(0).getObject("nextContinuationData"); String continuation = nextContinuationData.getString("continuation"); String clickTrackingParams = nextContinuationData.getString("clickTrackingParams"); diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java index f0fb91a17..8abee5f1d 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java @@ -5,9 +5,7 @@ import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParserException; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.downloader.Downloader; import org.schabi.newpipe.extractor.downloader.Response; @@ -22,9 +20,12 @@ import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; import org.schabi.newpipe.extractor.utils.Utils; import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import javax.annotation.Nonnull; -import javax.annotation.Nullable; @SuppressWarnings("WeakerAccess") public class YoutubePlaylistExtractor extends PlaylistExtractor { @@ -95,7 +96,11 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { @Override public String getNextPageUrl() throws ExtractionException { - return getNextPageUrlFrom(doc); + return getNextPageUrlFrom(initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer") + .getArray("tabs").getObject(0).getObject("tabRenderer").getObject("content") + .getObject("sectionListRenderer").getArray("contents").getObject(0) + .getObject("itemSectionRenderer").getArray("contents").getObject(0) + .getObject("playlistVideoListRenderer").getArray("continuations")); } @Nonnull @@ -174,8 +179,14 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { @Override public InfoItemsPage getInitialPage() throws ExtractionException { StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); - Element tbody = doc.select("tbody[id=\"pl-load-more-destination\"]").first(); - collectStreamsFrom(collector, tbody); + + JsonArray videos = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer") + .getArray("tabs").getObject(0).getObject("tabRenderer").getObject("content") + .getObject("sectionListRenderer").getArray("contents").getObject(0) + .getObject("itemSectionRenderer").getArray("contents").getObject(0) + .getObject("playlistVideoListRenderer").getArray("contents"); + + collectStreamsFrom(collector, videos); return new InfoItemsPage<>(collector, getNextPageUrl()); } @@ -186,58 +197,42 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { } StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); - JsonObject pageJson; + JsonArray ajaxJson; try { - final String responseBody = getDownloader().get(pageUrl, getExtractorLocalization()).responseBody(); - pageJson = JsonParser.object().from(responseBody); + Map> headers = new HashMap<>(); + headers.put("X-YouTube-Client-Name", Collections.singletonList("1")); + headers.put("X-YouTube-Client-Version", Collections.singletonList("2.20200221.03.00")); // TODO: Automatically get YouTube client version somehow + final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody(); + ajaxJson = JsonParser.array().from(response); } catch (JsonParserException pe) { - throw new ParsingException("Could not parse ajax json", pe); + throw new ParsingException("Could not parse json data for next streams", pe); } - final Document pageHtml = Jsoup.parse("" - + pageJson.getString("content_html") - + "
", pageUrl); + JsonObject sectionListContinuation = ajaxJson.getObject(1).getObject("response") + .getObject("continuationContents").getObject("playlistVideoListContinuation"); - collectStreamsFrom(collector, pageHtml.select("tbody[id=\"pl-load-more-destination\"]").first()); + collectStreamsFrom(collector, sectionListContinuation.getArray("contents")); - return new InfoItemsPage<>(collector, getNextPageUrlFromAjax(pageJson, pageUrl)); + return new InfoItemsPage<>(collector, getNextPageUrlFrom(sectionListContinuation.getArray("continuations"))); } - private String getNextPageUrlFromAjax(final JsonObject pageJson, final String pageUrl) - throws ParsingException { - String nextPageHtml = pageJson.getString("load_more_widget_html"); - if (!nextPageHtml.isEmpty()) { - return getNextPageUrlFrom(Jsoup.parse(nextPageHtml, pageUrl)); - } else { + private String getNextPageUrlFrom(JsonArray continuations) { + if (continuations == null) { return ""; } + + JsonObject nextContinuationData = continuations.getObject(0).getObject("nextContinuationData"); + String continuation = nextContinuationData.getString("continuation"); + String clickTrackingParams = nextContinuationData.getString("clickTrackingParams"); + return "https://www.youtube.com/browse_ajax?ctoken=" + continuation + "&continuation=" + continuation + + "&itct=" + clickTrackingParams; } - private String getNextPageUrlFrom(Document d) throws ParsingException { - try { - Element button = d.select("button[class*=\"yt-uix-load-more\"]").first(); - if (button != null) { - return button.attr("abs:data-uix-load-more-href"); - } else { - // Sometimes playlists are simply so small, they don't have a more streams/videos - return ""; - } - } catch (Exception e) { - throw new ParsingException("could not get next streams' url", e); - } - } - - private void collectStreamsFrom(@Nonnull StreamInfoItemsCollector collector, @Nullable Element element) { + private void collectStreamsFrom(StreamInfoItemsCollector collector, JsonArray videos) { collector.reset(); final TimeAgoParser timeAgoParser = getTimeAgoParser(); - JsonArray videos = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer") - .getArray("tabs").getObject(0).getObject("tabRenderer").getObject("content") - .getObject("sectionListRenderer").getArray("contents").getObject(0) - .getObject("itemSectionRenderer").getArray("contents").getObject(0) - .getObject("playlistVideoListRenderer").getArray("contents"); - for (Object video : videos) { if (((JsonObject) video).getObject("playlistVideoRenderer") != null) { collector.commit(new YoutubeStreamInfoItemExtractor(((JsonObject) video).getObject("playlistVideoRenderer"), timeAgoParser) { From 5d883d100c96233a4f068f1853e11297bb5533fe Mon Sep 17 00:00:00 2001 From: wb9688 Date: Mon, 24 Feb 2020 18:24:36 +0100 Subject: [PATCH 34/53] Implement pagination in YoutubeSearchExtractor --- .../extractors/YoutubeSearchExtractor.java | 96 +++++++++++-------- .../YoutubeSearchQueryHandlerFactory.java | 8 +- 2 files changed, 62 insertions(+), 42 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java index 5d04a3891..07954334f 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java @@ -5,9 +5,7 @@ import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParserException; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; import org.schabi.newpipe.extractor.InfoItem; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.downloader.Downloader; @@ -19,12 +17,12 @@ import org.schabi.newpipe.extractor.localization.TimeAgoParser; import org.schabi.newpipe.extractor.search.InfoItemsSearchCollector; import org.schabi.newpipe.extractor.search.SearchExtractor; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; -import org.schabi.newpipe.extractor.utils.Parser; import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.net.MalformedURLException; -import java.net.URL; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import javax.annotation.Nonnull; @@ -73,58 +71,70 @@ public class YoutubeSearchExtractor extends SearchExtractor { @Override public String getSearchSuggestion() { - final Element el = doc.select("div[class*=\"spell-correction\"]").first(); - if (el != null) { - return el.select("a").first().text(); - } else { + JsonObject showingResultsForRenderer = initialData.getObject("contents") + .getObject("twoColumnSearchResultsRenderer").getObject("primaryContents") + .getObject("sectionListRenderer").getArray("contents").getObject(0) + .getObject("itemSectionRenderer").getArray("contents").getObject(0) + .getObject("showingResultsForRenderer"); + if (showingResultsForRenderer == null) { return ""; + } else { + return showingResultsForRenderer.getObject("correctedQuery").getArray("runs") + .getObject(0).getString("text"); } } @Nonnull @Override public InfoItemsPage getInitialPage() throws ExtractionException { - return new InfoItemsPage<>(collectItems(doc), getNextPageUrl()); + InfoItemsSearchCollector collector = getInfoItemSearchCollector(); + JsonArray videos = initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer") + .getObject("primaryContents").getObject("sectionListRenderer").getArray("contents") + .getObject(0).getObject("itemSectionRenderer").getArray("contents"); + + collectStreamsFrom(collector, videos); + return new InfoItemsPage<>(collector, getNextPageUrl()); } @Override public String getNextPageUrl() throws ExtractionException { - return getUrl() + "&page=" + 2; + return getNextPageUrlFrom(initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer") + .getObject("primaryContents").getObject("sectionListRenderer").getArray("contents") + .getObject(0).getObject("itemSectionRenderer").getArray("continuations")); } @Override public InfoItemsPage getPage(String pageUrl) throws IOException, ExtractionException { - // TODO: Get extracting next pages working - final String response = getDownloader().get(pageUrl, getExtractorLocalization()).responseBody(); - doc = Jsoup.parse(response, pageUrl); + if (pageUrl == null || pageUrl.isEmpty()) { + throw new ExtractionException(new IllegalArgumentException("Page url is empty or null")); + } - return new InfoItemsPage<>(collectItems(doc), getNextPageUrlFromCurrentUrl(pageUrl)); - } - - private String getNextPageUrlFromCurrentUrl(String currentUrl) - throws MalformedURLException, UnsupportedEncodingException { - final int pageNr = Integer.parseInt( - Parser.compatParseMap( - new URL(currentUrl) - .getQuery()) - .get("page")); - - return currentUrl.replace("&page=" + pageNr, - "&page=" + Integer.toString(pageNr + 1)); - } - - private InfoItemsSearchCollector collectItems(Document doc) throws NothingFoundException, ParsingException { InfoItemsSearchCollector collector = getInfoItemSearchCollector(); + JsonArray ajaxJson; + try { + Map> headers = new HashMap<>(); + headers.put("X-YouTube-Client-Name", Collections.singletonList("1")); + headers.put("X-YouTube-Client-Version", Collections.singletonList("2.20200221.03.00")); // TODO: Automatically get YouTube client version somehow + final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody(); + ajaxJson = JsonParser.array().from(response); + } catch (JsonParserException pe) { + throw new ParsingException("Could not parse json data for next streams", pe); + } + + JsonObject itemSectionRenderer = ajaxJson.getObject(1).getObject("response") + .getObject("continuationContents").getObject("itemSectionContinuation"); + + collectStreamsFrom(collector, itemSectionRenderer.getArray("contents")); + + return new InfoItemsPage<>(collector, getNextPageUrlFrom(itemSectionRenderer.getArray("continuations"))); + } + + private void collectStreamsFrom(InfoItemsSearchCollector collector, JsonArray videos) throws NothingFoundException, ParsingException { collector.reset(); final TimeAgoParser timeAgoParser = getTimeAgoParser(); - if (initialData == null) initialData = YoutubeParsingHelper.getInitialData(doc.toString()); - JsonArray list = initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer") - .getObject("primaryContents").getObject("sectionListRenderer").getArray("contents") - .getObject(0).getObject("itemSectionRenderer").getArray("contents"); - - for (Object item : list) { + for (Object item : videos) { if (((JsonObject) item).getObject("backgroundPromoRenderer") != null) { throw new NothingFoundException(((JsonObject) item).getObject("backgroundPromoRenderer") .getObject("bodyText").getArray("runs").getObject(0).getString("text")); @@ -136,7 +146,17 @@ public class YoutubeSearchExtractor extends SearchExtractor { collector.commit(new YoutubePlaylistInfoItemExtractor(((JsonObject) item).getObject("playlistRenderer"))); } } - return collector; } + private String getNextPageUrlFrom(JsonArray continuations) throws ParsingException { + if (continuations == null) { + return ""; + } + + JsonObject nextContinuationData = continuations.getObject(0).getObject("nextContinuationData"); + String continuation = nextContinuationData.getString("continuation"); + String clickTrackingParams = nextContinuationData.getString("clickTrackingParams"); + return getUrl() + "&pbj=1&ctoken=" + continuation + "&continuation=" + continuation + + "&itct=" + clickTrackingParams; + } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeSearchQueryHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeSearchQueryHandlerFactory.java index c17600742..13481b345 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeSearchQueryHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeSearchQueryHandlerFactory.java @@ -24,13 +24,13 @@ public class YoutubeSearchQueryHandlerFactory extends SearchQueryHandlerFactory public String getUrl(String searchString, List contentFilters, String sortFilter) throws ParsingException { try { final String url = "https://www.youtube.com/results" - + "?q=" + URLEncoder.encode(searchString, CHARSET_UTF_8); + + "?search_query=" + URLEncoder.encode(searchString, CHARSET_UTF_8); if (contentFilters.size() > 0) { switch (contentFilters.get(0)) { - case VIDEOS: return url + "&sp=EgIQAVAU"; - case CHANNELS: return url + "&sp=EgIQAlAU"; - case PLAYLISTS: return url + "&sp=EgIQA1AU"; + case VIDEOS: return url + "&sp=EgIQAQ%253D%253D"; + case CHANNELS: return url + "&sp=EgIQAg%253D%253D"; + case PLAYLISTS: return url + "&sp=EgIQAw%253D%253D"; case ALL: default: } From 5842b9ad37f760289f74856832e1db0076a5502e Mon Sep 17 00:00:00 2001 From: TobiGr Date: Mon, 24 Feb 2020 19:03:54 +0100 Subject: [PATCH 35/53] Add getClientVersion() and HARDCODED_CLIENT_VERSION to YouTubeParsingHelper Prefer hardcoded client version above the current one when making requests to retrieve the same JSON structure for each request. --- .../extractors/YoutubeChannelExtractor.java | 26 +++++++-- .../extractors/YoutubePlaylistExtractor.java | 26 +++++++-- .../linkHandler/YoutubeParsingHelper.java | 58 +++++++++++++++++++ 3 files changed, 100 insertions(+), 10 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index 27678657c..0cdddde7f 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -211,14 +211,30 @@ public class YoutubeChannelExtractor extends ChannelExtractor { StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); JsonArray ajaxJson; + + Map> headers = new HashMap<>(); + headers.put("X-YouTube-Client-Name", Collections.singletonList("1")); try { - Map> headers = new HashMap<>(); - headers.put("X-YouTube-Client-Name", Collections.singletonList("1")); - headers.put("X-YouTube-Client-Version", Collections.singletonList("2.20200221.03.00")); // TODO: Automatically get YouTube client version somehow + // Use the hardcoded client version first to get JSON with a structure we know + headers.put("X-YouTube-Client-Version", + Collections.singletonList(YoutubeParsingHelper.HARDCODED_CLIENT_VERSION)); final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody(); + if (response.length() > 50) { // ensure to have a valid response + throw new ParsingException("Could not parse json data for next streams"); + } ajaxJson = JsonParser.array().from(response); - } catch (JsonParserException pe) { - throw new ParsingException("Could not parse json data for next streams", pe); + } catch (Exception e) { + try { + headers.put("X-YouTube-Client-Version", + Collections.singletonList(YoutubeParsingHelper.getClientVersion(initialData, doc.toString()))); + final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody(); + if (response.length() > 50) { // ensure to have a valid response + throw new ParsingException("Could not parse json data for next streams"); + } + ajaxJson = JsonParser.array().from(response); + } catch (JsonParserException ignored) { + throw new ParsingException("Could not parse json data for next streams", e); + } } JsonObject sectionListContinuation = ajaxJson.getObject(1).getObject("response") diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java index 8abee5f1d..73a2044bb 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java @@ -198,14 +198,30 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); JsonArray ajaxJson; + + Map> headers = new HashMap<>(); + headers.put("X-YouTube-Client-Name", Collections.singletonList("1")); try { - Map> headers = new HashMap<>(); - headers.put("X-YouTube-Client-Name", Collections.singletonList("1")); - headers.put("X-YouTube-Client-Version", Collections.singletonList("2.20200221.03.00")); // TODO: Automatically get YouTube client version somehow + // Use the hardcoded client version first to get JSON with a structure we know + headers.put("X-YouTube-Client-Version", + Collections.singletonList(YoutubeParsingHelper.HARDCODED_CLIENT_VERSION)); final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody(); + if (response.length() > 50) { // ensure to have a valid response + throw new ParsingException("Could not parse json data for next streams"); + } ajaxJson = JsonParser.array().from(response); - } catch (JsonParserException pe) { - throw new ParsingException("Could not parse json data for next streams", pe); + } catch (Exception e) { + try { + headers.put("X-YouTube-Client-Version", + Collections.singletonList(YoutubeParsingHelper.getClientVersion(initialData, doc.toString()))); + final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody(); + if (response.length() > 50) { // ensure to have a valid response + throw new ParsingException("Could not parse json data for next streams"); + } + ajaxJson = JsonParser.array().from(response); + } catch (JsonParserException ignored) { + throw new ParsingException("Could not parse json data for next streams", e); + } } JsonObject sectionListContinuation = ajaxJson.getObject(1).getObject("response") diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java index 78516f6df..81e4feb35 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java @@ -1,6 +1,7 @@ package org.schabi.newpipe.extractor.services.youtube.linkHandler; +import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParserException; @@ -42,6 +43,8 @@ public class YoutubeParsingHelper { private YoutubeParsingHelper() { } + public static final String HARDCODED_CLIENT_VERSION = "2.20200214.04.00"; + private static final String FEED_BASE_CHANNEL_ID = "https://www.youtube.com/feeds/videos.xml?channel_id="; private static final String FEED_BASE_USER = "https://www.youtube.com/feeds/videos.xml?user="; @@ -157,4 +160,59 @@ public class YoutubeParsingHelper { } } + /** + * Get the client version from a page + * @param initialData + * @param html The page HTML + * @return + * @throws ParsingException + */ + public static String getClientVersion(JsonObject initialData, String html) throws ParsingException { + if (initialData == null) initialData = getInitialData(html); + JsonArray serviceTrackingParams = initialData.getObject("responseContext").getArray("serviceTrackingParams"); + String shortClientVersion = null; + + // try to get version from initial data first + for (Object service : serviceTrackingParams) { + JsonObject s = (JsonObject) service; + if (s.getString("service").equals("CSI")) { + JsonArray params = s.getArray("params"); + for (Object param: params) { + JsonObject p = (JsonObject) param; + String key = p.getString("key"); + if (key != null && key.equals("cver")) { + return p.getString("value"); + } + } + } else if (s.getString("service").equals("ECATCHER")) { + // fallback to get a shortened client version which does not contain the last do digits + JsonArray params = s.getArray("params"); + for (Object param: params) { + JsonObject p = (JsonObject) param; + String key = p.getString("key"); + if (key != null && key.equals("client.version")) { + shortClientVersion = p.getString("value"); + } + } + } + } + + String clientVersion; + String[] patterns = { + "INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"", + "innertube_context_client_version\":\"([0-9\\.]+?)\"", + "client.version=([0-9\\.]+)" + }; + for (String pattern: patterns) { + try { + clientVersion = Parser.matchGroup1(pattern, html); + if (clientVersion != null && !clientVersion.isEmpty()) return clientVersion; + } catch (Exception ignored) {} + } + + if (shortClientVersion != null) return shortClientVersion; + + throw new ParsingException("Could not get client version"); + } + } From 583e9c157182d05f9e8b7bbd5dae211216a7ab76 Mon Sep 17 00:00:00 2001 From: TobiGr Date: Mon, 24 Feb 2020 19:09:27 +0100 Subject: [PATCH 36/53] Fix getPage() Someone should check if there are unstaged changes before committing... --- .../services/youtube/extractors/YoutubeChannelExtractor.java | 4 ++-- .../services/youtube/extractors/YoutubePlaylistExtractor.java | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index 0cdddde7f..930d34414 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -219,7 +219,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor { headers.put("X-YouTube-Client-Version", Collections.singletonList(YoutubeParsingHelper.HARDCODED_CLIENT_VERSION)); final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody(); - if (response.length() > 50) { // ensure to have a valid response + if (response.length() < 50) { // ensure to have a valid response throw new ParsingException("Could not parse json data for next streams"); } ajaxJson = JsonParser.array().from(response); @@ -228,7 +228,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor { headers.put("X-YouTube-Client-Version", Collections.singletonList(YoutubeParsingHelper.getClientVersion(initialData, doc.toString()))); final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody(); - if (response.length() > 50) { // ensure to have a valid response + if (response.length() < 50) { // ensure to have a valid response throw new ParsingException("Could not parse json data for next streams"); } ajaxJson = JsonParser.array().from(response); diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java index 73a2044bb..ce104a1da 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java @@ -206,7 +206,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { headers.put("X-YouTube-Client-Version", Collections.singletonList(YoutubeParsingHelper.HARDCODED_CLIENT_VERSION)); final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody(); - if (response.length() > 50) { // ensure to have a valid response + if (response.length() < 50) { // ensure to have a valid response throw new ParsingException("Could not parse json data for next streams"); } ajaxJson = JsonParser.array().from(response); @@ -215,7 +215,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { headers.put("X-YouTube-Client-Version", Collections.singletonList(YoutubeParsingHelper.getClientVersion(initialData, doc.toString()))); final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody(); - if (response.length() > 50) { // ensure to have a valid response + if (response.length() < 50) { // ensure to have a valid response throw new ParsingException("Could not parse json data for next streams"); } ajaxJson = JsonParser.array().from(response); From dd4dd849dce1238df9a818f0e97a68282037b288 Mon Sep 17 00:00:00 2001 From: TobiGr Date: Mon, 24 Feb 2020 20:02:45 +0100 Subject: [PATCH 37/53] Get client version dynamically in YouTubeSearchExtractor --- .../extractors/YoutubeSearchExtractor.java | 28 +++++++++++++++---- .../linkHandler/YoutubeParsingHelper.java | 1 - 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java index 07954334f..a35529aa3 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java @@ -17,6 +17,7 @@ import org.schabi.newpipe.extractor.localization.TimeAgoParser; import org.schabi.newpipe.extractor.search.InfoItemsSearchCollector; import org.schabi.newpipe.extractor.search.SearchExtractor; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; +import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; import java.io.IOException; import java.util.Collections; @@ -111,14 +112,31 @@ public class YoutubeSearchExtractor extends SearchExtractor { InfoItemsSearchCollector collector = getInfoItemSearchCollector(); JsonArray ajaxJson; + + Map> headers = new HashMap<>(); + headers.put("X-YouTube-Client-Name", Collections.singletonList("1")); + try { - Map> headers = new HashMap<>(); - headers.put("X-YouTube-Client-Name", Collections.singletonList("1")); - headers.put("X-YouTube-Client-Version", Collections.singletonList("2.20200221.03.00")); // TODO: Automatically get YouTube client version somehow + // Use the hardcoded client version first to get JSON with a structure we know + headers.put("X-YouTube-Client-Version", + Collections.singletonList(YoutubeParsingHelper.HARDCODED_CLIENT_VERSION)); final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody(); + if (response.length() < 50) { // ensure to have a valid response + throw new ParsingException("Could not parse json data for next streams"); + } ajaxJson = JsonParser.array().from(response); - } catch (JsonParserException pe) { - throw new ParsingException("Could not parse json data for next streams", pe); + } catch (Exception e) { + try { + headers.put("X-YouTube-Client-Version", + Collections.singletonList(YoutubeParsingHelper.getClientVersion(initialData, doc.toString()))); + final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody(); + if (response.length() < 50) { // ensure to have a valid response + throw new ParsingException("Could not parse json data for next streams"); + } + ajaxJson = JsonParser.array().from(response); + } catch (JsonParserException ignored) { + throw new ParsingException("Could not parse json data for next streams", e); + } } JsonObject itemSectionRenderer = ajaxJson.getObject(1).getObject("response") diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java index 81e4feb35..51347d423 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java @@ -214,5 +214,4 @@ public class YoutubeParsingHelper { throw new ParsingException("Could not get client version"); } - } From 02b59903fa5a3c8d0c83166862d12ecf5febf1ff Mon Sep 17 00:00:00 2001 From: wb9688 Date: Tue, 25 Feb 2020 09:07:22 +0100 Subject: [PATCH 38/53] Remove useless code --- .../extractors/YoutubePlaylistExtractor.java | 17 +-- .../extractors/YoutubeSearchExtractor.java | 3 +- .../extractors/YoutubeStreamExtractor.java | 114 +++++------------- .../extractors/YoutubeTrendingExtractor.java | 9 +- 4 files changed, 36 insertions(+), 107 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java index ce104a1da..0ac2dcf05 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java @@ -34,7 +34,6 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { private JsonObject initialData; private JsonObject uploaderInfo; private JsonObject playlistInfo; - private JsonObject playlistVideos; public YoutubePlaylistExtractor(StreamingService service, ListLinkHandler linkHandler) { super(service, linkHandler); @@ -48,7 +47,6 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { initialData = YoutubeParsingHelper.getInitialData(response.responseBody()); uploaderInfo = getUploaderInfo(); playlistInfo = getPlaylistInfo(); - playlistVideos = getPlaylistVideos(); } private JsonObject getUploaderInfo() throws ParsingException { @@ -83,19 +81,8 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { } } - private JsonObject getPlaylistVideos() throws ParsingException { - try { - return initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer") - .getArray("tabs").getObject(0).getObject("tabRenderer").getObject("content").getObject("sectionListRenderer") - .getArray("contents").getObject(0).getObject("itemSectionRenderer").getArray("contents") - .getObject(0).getObject("playlistVideoListRenderer"); - } catch (Exception e) { - throw new ParsingException("Could not get playlist info", e); - } - } - @Override - public String getNextPageUrl() throws ExtractionException { + public String getNextPageUrl() { return getNextPageUrlFrom(initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer") .getArray("tabs").getObject(0).getObject("tabRenderer").getObject("content") .getObject("sectionListRenderer").getArray("contents").getObject(0) @@ -177,7 +164,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { @Nonnull @Override - public InfoItemsPage getInitialPage() throws ExtractionException { + public InfoItemsPage getInitialPage() { StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); JsonArray videos = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer") diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java index a35529aa3..b06699098 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java @@ -17,7 +17,6 @@ import org.schabi.newpipe.extractor.localization.TimeAgoParser; import org.schabi.newpipe.extractor.search.InfoItemsSearchCollector; import org.schabi.newpipe.extractor.search.SearchExtractor; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; -import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; import java.io.IOException; import java.util.Collections; @@ -147,7 +146,7 @@ public class YoutubeSearchExtractor extends SearchExtractor { return new InfoItemsPage<>(collector, getNextPageUrlFrom(itemSectionRenderer.getArray("continuations"))); } - private void collectStreamsFrom(InfoItemsSearchCollector collector, JsonArray videos) throws NothingFoundException, ParsingException { + private void collectStreamsFrom(InfoItemsSearchCollector collector, JsonArray videos) throws NothingFoundException { collector.reset(); final TimeAgoParser timeAgoParser = getTimeAgoParser(); diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index dd2987a4b..29871b17a 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -3,7 +3,6 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; -import com.grack.nanojson.JsonParserException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; @@ -15,7 +14,6 @@ import org.schabi.newpipe.extractor.MediaFormat; import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.downloader.Downloader; -import org.schabi.newpipe.extractor.downloader.Request; import org.schabi.newpipe.extractor.downloader.Response; import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException; import org.schabi.newpipe.extractor.exceptions.ExtractionException; @@ -36,7 +34,6 @@ import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; import org.schabi.newpipe.extractor.stream.StreamType; import org.schabi.newpipe.extractor.stream.SubtitlesStream; import org.schabi.newpipe.extractor.stream.VideoStream; -import org.schabi.newpipe.extractor.utils.JsonUtils; import org.schabi.newpipe.extractor.utils.Parser; import org.schabi.newpipe.extractor.utils.Utils; @@ -366,55 +363,6 @@ public class YoutubeStreamExtractor extends StreamExtractor { } } - private long getLiveStreamWatchingCount() throws ExtractionException, IOException, JsonParserException { - // https://www.youtube.com/youtubei/v1/updated_metadata?alt=json&key= - String innerTubeKey = null, clientVersion = null; - if (playerArgs != null && !playerArgs.isEmpty()) { - innerTubeKey = playerArgs.getString("innertube_api_key"); - clientVersion = playerArgs.getString("innertube_context_client_version"); - } else if (!videoInfoPage.isEmpty()) { - innerTubeKey = videoInfoPage.get("innertube_api_key"); - clientVersion = videoInfoPage.get("innertube_context_client_version"); - } - - if (innerTubeKey == null || innerTubeKey.isEmpty()) { - throw new ExtractionException("Couldn't get innerTube key"); - } - - if (clientVersion == null || clientVersion.isEmpty()) { - throw new ExtractionException("Couldn't get innerTube client version"); - } - - final String metadataUrl = "https://www.youtube.com/youtubei/v1/updated_metadata?alt=json&key=" + innerTubeKey; - final byte[] dataBody = ("{\"context\":{\"client\":{\"clientName\":1,\"clientVersion\":\"" + clientVersion + "\"}}" + - ",\"videoId\":\"" + getId() + "\"}").getBytes("UTF-8"); - final Response response = getDownloader().execute(Request.newBuilder() - .post(metadataUrl, dataBody) - .addHeader("Content-Type", "application/json") - .build()); - final JsonObject jsonObject = JsonParser.object().from(response.responseBody()); - - for (Object actionEntry : jsonObject.getArray("actions")) { - if (!(actionEntry instanceof JsonObject)) continue; - final JsonObject entry = (JsonObject) actionEntry; - - final JsonObject updateViewershipAction = entry.getObject("updateViewershipAction", null); - if (updateViewershipAction == null) continue; - - final JsonArray viewCountRuns = JsonUtils.getArray(updateViewershipAction, "viewership.videoViewCountRenderer.viewCount.runs"); - if (viewCountRuns.isEmpty()) continue; - - final JsonObject textObject = viewCountRuns.getObject(0); - if (!textObject.has("text")) { - throw new ExtractionException("Response don't have \"text\" element"); - } - - return Long.parseLong(Utils.removeNonDigitCharacters(textObject.getString("text"))); - } - - throw new ExtractionException("Could not find correct results in response"); - } - private JsonObject getVideoPrimaryInfoRenderer() throws ParsingException { JsonArray contents = initialData.getObject("contents").getObject("twoColumnWatchNextResults") .getObject("results").getObject("results").getArray("contents"); @@ -525,7 +473,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { public String getUploaderAvatarUrl() throws ParsingException { assertPageFetched(); - String uploaderAvatarUrl = null; + String uploaderAvatarUrl; try { uploaderAvatarUrl = initialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("secondaryResults") .getObject("secondaryResults").getArray("results").getObject(0).getObject("compactAutoplayRenderer") @@ -657,13 +605,13 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Override @Nonnull - public List getSubtitlesDefault() throws IOException, ExtractionException { + public List getSubtitlesDefault() { return getSubtitles(MediaFormat.TTML); } @Override @Nonnull - public List getSubtitles(final MediaFormat format) throws IOException, ExtractionException { + public List getSubtitles(final MediaFormat format) { assertPageFetched(); List subtitles = new ArrayList<>(); for (final SubtitlesInfo subtitlesInfo : subtitlesInfos) { @@ -687,7 +635,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { } @Override - public StreamInfoItem getNextStream() throws IOException, ExtractionException { + public StreamInfoItem getNextStream() throws ExtractionException { assertPageFetched(); try { final JsonObject videoInfo = initialData.getObject("contents").getObject("twoColumnWatchNextResults") @@ -815,12 +763,10 @@ public class YoutubeStreamExtractor extends StreamExtractor { return JsonParser.object().from(ytPlayerConfigRaw); } catch (Parser.RegexException e) { String errorReason = getErrorMessage(); - switch (errorReason) { - case "": - throw new ContentNotAvailableException("Content not available: player config empty", e); - default: - throw new ContentNotAvailableException("Content not available", e); + if (errorReason.isEmpty()) { + throw new ContentNotAvailableException("Content not available: player config empty", e); } + throw new ContentNotAvailableException("Content not available", e); } catch (Exception e) { throw new ParsingException("Could not parse yt player config", e); } @@ -976,7 +922,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { } @Nonnull - private List getAvailableSubtitlesInfo() throws SubtitlesException { + private List getAvailableSubtitlesInfo() { // If the video is age restricted getPlayerConfig will fail if (isAgeRestricted) return Collections.emptyList(); @@ -990,7 +936,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { final JsonObject renderer = captions.getObject("playerCaptionsTracklistRenderer", new JsonObject()); final JsonArray captionsArray = renderer.getArray("captionTracks", new JsonArray()); // todo: use this to apply auto translation to different language from a source language - final JsonArray autoCaptionsArray = renderer.getArray("translationLanguages", new JsonArray()); +// final JsonArray autoCaptionsArray = renderer.getArray("translationLanguages", new JsonArray()); // This check is necessary since there may be cases where subtitles metadata do not contain caption track info // e.g. https://www.youtube.com/watch?v=-Vpwatutnko @@ -1147,40 +1093,44 @@ public class YoutubeStreamExtractor extends StreamExtractor { } } + @Nonnull @Override - public String getHost() throws ParsingException { + public String getHost() { + return ""; + } + + @Nonnull + @Override + public String getPrivacy() { + return ""; + } + + @Nonnull + @Override + public String getCategory() { + return ""; + } + + @Nonnull + @Override + public String getLicence() { return ""; } @Override - public String getPrivacy() throws ParsingException { - return ""; - } - - @Override - public String getCategory() throws ParsingException { - return ""; - } - - @Override - public String getLicence() throws ParsingException { - return ""; - } - - @Override - public Locale getLanguageInfo() throws ParsingException { + public Locale getLanguageInfo() { return null; } @Nonnull @Override - public List getTags() throws ParsingException { + public List getTags() { return new ArrayList<>(); } @Nonnull @Override - public String getSupportInfo() throws ParsingException { + public String getSupportInfo() { return ""; } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java index d815e9e14..649cdf4e7 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java @@ -22,10 +22,7 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; -import com.grack.nanojson.JsonParser; -import com.grack.nanojson.JsonParserException; -import org.jsoup.nodes.Document; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.downloader.Downloader; import org.schabi.newpipe.extractor.downloader.Response; @@ -37,15 +34,12 @@ import org.schabi.newpipe.extractor.localization.TimeAgoParser; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; import org.schabi.newpipe.extractor.stream.StreamInfoItem; import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; -import org.schabi.newpipe.extractor.utils.Parser; import java.io.IOException; import javax.annotation.Nonnull; public class YoutubeTrendingExtractor extends KioskExtractor { - - private Document doc; private JsonObject initialData; public YoutubeTrendingExtractor(StreamingService service, @@ -60,7 +54,6 @@ public class YoutubeTrendingExtractor extends KioskExtractor { "?gl=" + getExtractorContentCountry().getCountryCode(); final Response response = downloader.get(url, getExtractorLocalization()); - doc = YoutubeParsingHelper.parseAndCheckPage(url, response); initialData = YoutubeParsingHelper.getInitialData(response.responseBody()); } @@ -92,7 +85,7 @@ public class YoutubeTrendingExtractor extends KioskExtractor { @Nonnull @Override - public InfoItemsPage getInitialPage() throws ParsingException { + public InfoItemsPage getInitialPage() { StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); JsonArray firstPageElements = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer") .getArray("tabs").getObject(0).getObject("tabRenderer").getObject("content") From f13c0288cc0c8a94f741095bc207c0e744a3f5bf Mon Sep 17 00:00:00 2001 From: wb9688 Date: Tue, 25 Feb 2020 09:50:22 +0100 Subject: [PATCH 39/53] Reimplement some methods in YoutubeStreamExtractor --- .../extractors/YoutubeStreamExtractor.java | 221 +++++------------- 1 file changed, 52 insertions(+), 169 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 29871b17a..caea8ac53 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -4,7 +4,6 @@ import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.mozilla.javascript.Context; @@ -39,8 +38,6 @@ import org.schabi.newpipe.extractor.utils.Utils; import java.io.IOException; import java.io.UnsupportedEncodingException; -import java.net.MalformedURLException; -import java.net.URL; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -48,8 +45,6 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Locale; import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -75,8 +70,6 @@ import javax.annotation.Nullable; */ public class YoutubeStreamExtractor extends StreamExtractor { - private static final String TAG = YoutubeStreamExtractor.class.getSimpleName(); - /*////////////////////////////////////////////////////////////////////////// // Exceptions //////////////////////////////////////////////////////////////////////////*/ @@ -87,12 +80,6 @@ public class YoutubeStreamExtractor extends StreamExtractor { } } - public class SubtitlesException extends ContentNotAvailableException { - SubtitlesException(String message, Throwable cause) { - super(message, cause); - } - } - /*//////////////////////////////////////////////////////////////////////////*/ private Document doc; @@ -120,22 +107,17 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Override public String getName() throws ParsingException { assertPageFetched(); + String title = null; try { - return playerResponse.getObject("videoDetails").getString("title"); - - } catch (Exception e) { - // fallback HTML method - String name = null; + title = getVideoPrimaryInfoRenderer().getObject("title").getArray("runs").getObject(0).getString("text"); + } catch (Exception ignored) {} + if (title == null) { try { - name = doc.select("meta[name=title]").attr(CONTENT); - } catch (Exception ignored) { - } - - if (name == null) { - throw new ParsingException("Could not get name", e); - } - return name; + title = playerResponse.getObject("videoDetails").getString("title"); + } catch (Exception ignored) {} } + if (title != null) return title; + throw new ParsingException("Could not get name"); } @Override @@ -144,19 +126,12 @@ public class YoutubeStreamExtractor extends StreamExtractor { return null; } + // TODO: try videoPrimaryInfoRenderer.dateText.simpleText + try { return playerResponse.getObject("microformat").getObject("playerMicroformatRenderer").getString("publishDate"); } catch (Exception e) { - String uploadDate = null; - try { - uploadDate = doc.select("meta[itemprop=datePublished]").attr(CONTENT); - } catch (Exception ignored) { - } - - if (uploadDate == null) { - throw new ParsingException("Could not get upload date", e); - } - return uploadDate; + throw new ParsingException("Could not get upload date"); } } @@ -181,15 +156,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { return thumbnails.getObject(thumbnails.size() - 1).getString("url"); } catch (Exception e) { - String url = null; - try { - url = doc.select("link[itemprop=\"thumbnailUrl\"]").first().attr("abs:href"); - } catch (Exception ignored) {} - - if (url == null) { - throw new ParsingException("Could not get thumbnail url", e); - } - return url; + throw new ParsingException("Could not get thumbnail url"); } } @@ -198,93 +165,19 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Override public Description getDescription() throws ParsingException { assertPageFetched(); + // TODO: Parse videoSecondaryInfoRenderer.description try { - // first try to get html-formatted description - return new Description(parseHtmlAndGetFullLinks(doc.select("p[id=\"eow-description\"]").first().html()), Description.HTML); - } catch (Exception e) { - try { - // fallback to raw non-html description - return new Description(playerResponse.getObject("videoDetails").getString("shortDescription"), Description.PLAIN_TEXT); - } catch (Exception ignored) { - throw new ParsingException("Could not get the description", e); - } + // raw non-html description + return new Description(playerResponse.getObject("videoDetails").getString("shortDescription"), Description.PLAIN_TEXT); + } catch (Exception ignored) { + throw new ParsingException("Could not get the description"); } } - // onclick="yt.www.watch.player.seekTo(0*3600+00*60+00);return false;" - // :00 is NOT recognized as a timestamp in description or comments. - // 0:00 is recognized in both description and comments. - // https://www.youtube.com/watch?v=4cccfDXu1vA - private final static Pattern DESCRIPTION_TIMESTAMP_ONCLICK_REGEX = Pattern.compile( - "seekTo\\(" - + "(?:(\\d+)\\*3600\\+)?" // hours? - + "(\\d+)\\*60\\+" // minutes - + "(\\d+)" // seconds - + "\\)"); - - @SafeVarargs - private static T coalesce(T... args) { - for (T arg : args) { - if (arg != null) return arg; - } - throw new IllegalArgumentException("all arguments to coalesce() were null"); - } - - private String parseHtmlAndGetFullLinks(String descriptionHtml) - throws MalformedURLException, UnsupportedEncodingException, ParsingException { - final Document description = Jsoup.parse(descriptionHtml, getUrl()); - for (Element a : description.select("a")) { - final String rawUrl = a.attr("abs:href"); - final URL redirectLink = new URL(rawUrl); - - final Matcher onClickTimestamp; - final String queryString; - if ((onClickTimestamp = DESCRIPTION_TIMESTAMP_ONCLICK_REGEX.matcher(a.attr("onclick"))) - .find()) { - a.removeAttr("onclick"); - - String hours = coalesce(onClickTimestamp.group(1), "0"); - String minutes = onClickTimestamp.group(2); - String seconds = onClickTimestamp.group(3); - - int timestamp = 0; - timestamp += Integer.parseInt(hours) * 3600; - timestamp += Integer.parseInt(minutes) * 60; - timestamp += Integer.parseInt(seconds); - - String setTimestamp = "&t=" + timestamp; - - // Even after clicking https://youtu.be/...?t=6, - // getUrl() is https://www.youtube.com/watch?v=..., never youtu.be, never &t=. - a.attr("href", getUrl() + setTimestamp); - - } else if ((queryString = redirectLink.getQuery()) != null) { - // if the query string is null we are not dealing with a redirect link, - // so we don't need to override it. - final String link = - Parser.compatParseMap(queryString).get("q"); - - if (link != null) { - // if link is null the a tag is a hashtag. - // They refer to the youtube search. We do not handle them. - a.text(link); - a.attr("href", link); - } else if (redirectLink.toString().contains("https://www.youtube.com/")) { - a.text(redirectLink.toString()); - a.attr("href", redirectLink.toString()); - } - } else if (redirectLink.toString().contains("https://www.youtube.com/")) { - descriptionHtml = descriptionHtml.replace(rawUrl, redirectLink.toString()); - a.text(redirectLink.toString()); - a.attr("href", redirectLink.toString()); - } - } - return description.select("body").first().html(); - } - @Override public int getAgeLimit() throws ParsingException { assertPageFetched(); + // TODO: Find new way to get age limit if (!isAgeRestricted) { return NO_AGE_LIMIT; } @@ -332,54 +225,25 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Override public long getViewCount() throws ParsingException { assertPageFetched(); + String views = null; try { - if (getStreamType().equals(StreamType.LIVE_STREAM)) { - // The array index is variable, therefore we loop throw the complete array. - // videoPrimaryInfoRenderer is often stored at index 1 - JsonArray contents = initialData.getObject("contents").getObject("twoColumnWatchNextResults") - .getObject("results").getObject("results").getArray("contents"); - for (Object c : contents) { - try { - // this gets current view count, but there is also an overall view count which is stored here: - // contents.twoColumnWatchNextResults.secondaryResults.secondaryResults.results[0] - // .compactAutoplayRenderer.contents[0].compactVideoRenderer.viewCountText.simpleText - String views = ((JsonObject) c).getObject("videoPrimaryInfoRenderer") - .getObject("viewCount").getObject("videoViewCountRenderer").getObject("viewCount") - .getArray("runs").getObject(0).getString("text"); - return Long.parseLong(Utils.removeNonDigitCharacters(views)); - } catch (Exception ignored) {} - } - throw new ParsingException("Could not get view count from live stream"); - - } else { - return Long.parseLong(playerResponse.getObject("videoDetails").getString("viewCount")); - } - } catch (Exception e) { + views = getVideoPrimaryInfoRenderer().getObject("viewCount") + .getObject("videoViewCountRenderer").getObject("viewCount") + .getArray("runs").getObject(0).getString("text"); + } catch (Exception ignored) {} + if (views == null) { try { - return Long.parseLong(doc.select("meta[itemprop=interactionCount]").attr(CONTENT)); - } catch (Exception ignored) { - throw new ParsingException("Could not get view count", e); - } + views = getVideoPrimaryInfoRenderer().getObject("viewCount") + .getObject("videoViewCountRenderer").getObject("viewCount").getString("simpleText"); + } catch (Exception ignored) {} } - } - - private JsonObject getVideoPrimaryInfoRenderer() throws ParsingException { - JsonArray contents = initialData.getObject("contents").getObject("twoColumnWatchNextResults") - .getObject("results").getObject("results").getArray("contents"); - JsonObject videoPrimaryInfoRenderer = null; - - for (Object content : contents) { - if (((JsonObject) content).getObject("videoPrimaryInfoRenderer") != null) { - videoPrimaryInfoRenderer = ((JsonObject) content).getObject("videoPrimaryInfoRenderer"); - break; - } + if (views == null) { + try { + views = playerResponse.getObject("videoDetails").getString("viewCount"); + } catch (Exception ignored) {} } - - if (videoPrimaryInfoRenderer == null) { - throw new ParsingException("Could not find videoPrimaryInfoRenderer"); - } - - return videoPrimaryInfoRenderer; + if (views != null) return Long.parseLong(views); + throw new ParsingException("Could not get view count"); } @Override @@ -993,6 +857,25 @@ public class YoutubeStreamExtractor extends StreamExtractor { // Utils //////////////////////////////////////////////////////////////////////////*/ + private JsonObject getVideoPrimaryInfoRenderer() throws ParsingException { + JsonArray contents = initialData.getObject("contents").getObject("twoColumnWatchNextResults") + .getObject("results").getObject("results").getArray("contents"); + JsonObject videoPrimaryInfoRenderer = null; + + for (Object content : contents) { + if (((JsonObject) content).getObject("videoPrimaryInfoRenderer") != null) { + videoPrimaryInfoRenderer = ((JsonObject) content).getObject("videoPrimaryInfoRenderer"); + break; + } + } + + if (videoPrimaryInfoRenderer == null) { + throw new ParsingException("Could not find videoPrimaryInfoRenderer"); + } + + return videoPrimaryInfoRenderer; + } + @Nonnull private static String getVideoInfoUrl(final String id, final String sts) { return "https://www.youtube.com/get_video_info?" + "video_id=" + id + From 76d54abdbf00729c7ced10e753a13c39e11f7794 Mon Sep 17 00:00:00 2001 From: wb9688 Date: Tue, 25 Feb 2020 10:05:53 +0100 Subject: [PATCH 40/53] Reimplement more methods in YoutubeStreamExtractor --- .../extractors/YoutubeStreamExtractor.java | 57 ++++++++++++------- 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index caea8ac53..4afc7c1d2 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -113,7 +113,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { } catch (Exception ignored) {} if (title == null) { try { - title = playerResponse.getObject("videoDetails").getString("title"); + title = playerResponse.getObject("videoDetails").getString("title"); } catch (Exception ignored) {} } if (title != null) return title; @@ -296,40 +296,36 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Override public String getUploaderUrl() throws ParsingException { assertPageFetched(); + String uploaderId = null; try { - return "https://www.youtube.com/channel/" + - playerResponse.getObject("videoDetails").getString("channelId"); - } catch (Exception e) { - String uploaderUrl = null; + uploaderId = getVideoSecondaryInfoRenderer().getObject("owner").getObject("videoOwnerRenderer") + .getObject("navigationEndpoint").getObject("browseEndpoint").getString("browseId"); + } catch (Exception ignored) {} + if (uploaderId == null) { try { - uploaderUrl = doc.select("div[class=\"yt-user-info\"]").first().children() - .select("a").first().attr("abs:href"); + uploaderId = playerResponse.getObject("videoDetails").getString("channelId"); } catch (Exception ignored) {} - - if (uploaderUrl == null) { - throw new ParsingException("Could not get channel link", e); - } - return uploaderUrl; } + if (uploaderId != null) return "https://www.youtube.com/channel/" + uploaderId; + throw new ParsingException("Could not get uploader url"); } @Nonnull @Override public String getUploaderName() throws ParsingException { assertPageFetched(); + String uploaderName = null; try { - return playerResponse.getObject("videoDetails").getString("author"); - } catch (Exception e) { - String name = null; + uploaderName = getVideoSecondaryInfoRenderer().getObject("owner").getObject("videoOwnerRenderer") + .getObject("title").getArray("runs").getObject(0).getString("text"); + } catch (Exception ignored) {} + if (uploaderName == null) { try { - name = doc.select("div.yt-user-info").first().text(); + uploaderName = playerResponse.getObject("videoDetails").getString("author"); } catch (Exception ignored) {} - - if (name == null) { - throw new ParsingException("Could not get uploader name"); - } - return name; } + if (uploaderName != null) return uploaderName; + throw new ParsingException("Could not get uploader name"); } @Nonnull @@ -876,6 +872,25 @@ public class YoutubeStreamExtractor extends StreamExtractor { return videoPrimaryInfoRenderer; } + private JsonObject getVideoSecondaryInfoRenderer() throws ParsingException { + JsonArray contents = initialData.getObject("contents").getObject("twoColumnWatchNextResults") + .getObject("results").getObject("results").getArray("contents"); + JsonObject videoSecondaryInfoRenderer = null; + + for (Object content : contents) { + if (((JsonObject) content).getObject("videoSecondaryInfoRenderer") != null) { + videoSecondaryInfoRenderer = ((JsonObject) content).getObject("videoSecondaryInfoRenderer"); + break; + } + } + + if (videoSecondaryInfoRenderer == null) { + throw new ParsingException("Could not find videoSecondaryInfoRenderer"); + } + + return videoSecondaryInfoRenderer; + } + @Nonnull private static String getVideoInfoUrl(final String id, final String sts) { return "https://www.youtube.com/get_video_info?" + "video_id=" + id + From 26ea3dceb6ea696b09a018f009c14d9cbe210d4d Mon Sep 17 00:00:00 2001 From: wb9688 Date: Tue, 25 Feb 2020 10:08:52 +0100 Subject: [PATCH 41/53] Fix getViewCount() in YoutubeStreamExtractor --- .../services/youtube/extractors/YoutubeStreamExtractor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 4afc7c1d2..929c6c4df 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -242,7 +242,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { views = playerResponse.getObject("videoDetails").getString("viewCount"); } catch (Exception ignored) {} } - if (views != null) return Long.parseLong(views); + if (views != null) return Long.parseLong(Utils.removeNonDigitCharacters(views)); throw new ParsingException("Could not get view count"); } From f39603f6ef75dbdcde341b723868c89f086e8d6c Mon Sep 17 00:00:00 2001 From: TobiGr Date: Tue, 25 Feb 2020 10:38:54 +0100 Subject: [PATCH 42/53] Implement getUploadDate() in YouTubeStreamInfoItemExtractor --- .../extractors/YoutubeChannelExtractor.java | 2 -- .../extractors/YoutubeStreamExtractor.java | 2 -- .../YoutubeStreamInfoItemExtractor.java | 18 ++++++++++++++++-- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index 930d34414..4c7185f1d 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -164,11 +164,9 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public long getSubscriberCount() throws ParsingException { - final JsonObject subscriberInfo = initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("subscriberCountText"); if (subscriberInfo != null) { try { - return Utils.mixedNumberWordToLong(subscriberInfo.getArray("runs").getObject(0).getString("text")); } catch (NumberFormatException e) { throw new ParsingException("Could not get subscriber count", e); diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 929c6c4df..62876d38a 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -512,7 +512,6 @@ public class YoutubeStreamExtractor extends StreamExtractor { } } - @Override public StreamInfoItemsCollector getRelatedStreams() throws ExtractionException { assertPageFetched(); @@ -678,7 +677,6 @@ public class YoutubeStreamExtractor extends StreamExtractor { } } - @Nonnull private EmbeddedInfo getEmbeddedInfo() throws ParsingException, ReCaptchaException { try { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java index 605053bb3..950bab0f2 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java @@ -158,12 +158,26 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { @Override public String getTextualUploadDate() { // TODO: Get upload date in case of a videoRenderer (not available in case of a compactVideoRenderer) - return null; + try { + String s =videoInfo.getObject("publishedTimeText").getString("simpleText"); + return s; + } catch (Exception e) { + // upload date is not always available, e.g. in playlists + return null; + } } @Nullable @Override - public DateWrapper getUploadDate() { + public DateWrapper getUploadDate() throws ParsingException { + String textualUploadDate = getTextualUploadDate(); + if (timeAgoParser != null && textualUploadDate != null && !textualUploadDate.isEmpty()) { + try { + return timeAgoParser.parse(textualUploadDate); + } catch (ParsingException e) { + throw new ParsingException("Could not get upload date", e); + } + } return null; } From 9efcc61ca6ed25499d4367f8ff95c34ea8db6047 Mon Sep 17 00:00:00 2001 From: TobiGr Date: Tue, 25 Feb 2020 10:40:06 +0100 Subject: [PATCH 43/53] Adapt some tests to match the new URLs --- .../search/YoutubeSearchExtractorChannelOnlyTest.java | 9 ++++++++- .../services/youtube/search/YoutubeSearchQHTest.java | 8 ++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchExtractorChannelOnlyTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchExtractorChannelOnlyTest.java index 63fc0375a..7add41262 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchExtractorChannelOnlyTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchExtractorChannelOnlyTest.java @@ -12,6 +12,8 @@ import org.schabi.newpipe.extractor.channel.ChannelInfoItem; import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSearchExtractor; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeSearchQueryHandlerFactory; +import java.util.regex.Pattern; + import static java.util.Arrays.asList; import static org.junit.Assert.*; import static org.schabi.newpipe.extractor.ServiceList.YouTube; @@ -51,7 +53,12 @@ public class YoutubeSearchExtractorChannelOnlyTest extends YoutubeSearchExtracto @Test public void testGetSecondPageUrl() throws Exception { - assertEquals("https://www.youtube.com/results?q=pewdiepie&sp=EgIQAlAU&gl=GB&page=2", extractor.getNextPageUrl()); + // check that ctoken, continuation and itct are longer than 5 characters + Pattern pattern = Pattern.compile( + "https:\\/\\/www.youtube.com\\/results\\?search_query=pewdiepie&sp=EgIQAg%253D%253D&gl=GB&pbj=1" + + "&ctoken=[\\w%]{5,}?&continuation=[\\w%]{5,}?&itct=[\\w]{5,}?" + ); + assertTrue(pattern.matcher(extractor.getNextPageUrl()).find()); } @Ignore diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchQHTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchQHTest.java index 60bbf2ff7..8777cc701 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchQHTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchQHTest.java @@ -28,13 +28,13 @@ public class YoutubeSearchQHTest { @Test public void testWithContentfilter() throws Exception { - assertEquals("https://www.youtube.com/results?q=asdf&sp=EgIQAVAU", YouTube.getSearchQHFactory() + assertEquals("https://www.youtube.com/results?search_query=asdf&sp=EgIQAQ%253D%253D", YouTube.getSearchQHFactory() .fromQuery("asdf", asList(new String[]{VIDEOS}), "").getUrl()); - assertEquals("https://www.youtube.com/results?q=asdf&sp=EgIQAlAU", YouTube.getSearchQHFactory() + assertEquals("https://www.youtube.com/results?search_query=asdf&sp=EgIQAg%253D%253D", YouTube.getSearchQHFactory() .fromQuery("asdf", asList(new String[]{CHANNELS}), "").getUrl()); - assertEquals("https://www.youtube.com/results?q=asdf&sp=EgIQA1AU", YouTube.getSearchQHFactory() + assertEquals("https://www.youtube.com/results?search_query=asdf&sp=EgIQAw%253D%253D", YouTube.getSearchQHFactory() .fromQuery("asdf", asList(new String[]{PLAYLISTS}), "").getUrl()); - assertEquals("https://www.youtube.com/results?q=asdf", YouTube.getSearchQHFactory() + assertEquals("https://www.youtube.com/results?search_query=asdf", YouTube.getSearchQHFactory() .fromQuery("asdf", asList(new String[]{"fjiijie"}), "").getUrl()); } From eed29ea734ae3008d80f5754ca2cab7eed0578df Mon Sep 17 00:00:00 2001 From: wb9688 Date: Tue, 25 Feb 2020 16:14:49 +0100 Subject: [PATCH 44/53] Return null for related streams at age restricted videos --- .../youtube/extractors/YoutubeStreamExtractor.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 62876d38a..aa610d36d 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -177,7 +177,6 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Override public int getAgeLimit() throws ParsingException { assertPageFetched(); - // TODO: Find new way to get age limit if (!isAgeRestricted) { return NO_AGE_LIMIT; } @@ -497,6 +496,9 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Override public StreamInfoItem getNextStream() throws ExtractionException { assertPageFetched(); + if (isAgeRestricted) { + return null; + } try { final JsonObject videoInfo = initialData.getObject("contents").getObject("twoColumnWatchNextResults") .getObject("secondaryResults").getObject("secondaryResults").getArray("results") @@ -515,6 +517,9 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Override public StreamInfoItemsCollector getRelatedStreams() throws ExtractionException { assertPageFetched(); + if (isAgeRestricted) { + return null; + } try { StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); JsonArray results = initialData.getObject("contents").getObject("twoColumnWatchNextResults") From ecfc27a92c9a57345eca43305d7eb3ce625ce2ce Mon Sep 17 00:00:00 2001 From: TobiGr Date: Tue, 25 Feb 2020 16:24:18 +0100 Subject: [PATCH 45/53] Implement fallback methods for getDescription() and getTextualUploadDate() in YouTubeStreamExtractor --- .../extractors/YoutubeStreamExtractor.java | 49 +++++++++++++------ 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index aa610d36d..7f8e84d30 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -38,13 +38,8 @@ import org.schabi.newpipe.extractor.utils.Utils; import java.io.IOException; import java.io.UnsupportedEncodingException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Locale; -import java.util.Map; +import java.text.SimpleDateFormat; +import java.util.*; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -126,13 +121,28 @@ public class YoutubeStreamExtractor extends StreamExtractor { return null; } - // TODO: try videoPrimaryInfoRenderer.dateText.simpleText + try { + //return playerResponse.getObject("microformat").getObject("playerMicroformatRenderer").getString("publishDate"); + } catch (Exception ignored) {} try { - return playerResponse.getObject("microformat").getObject("playerMicroformatRenderer").getString("publishDate"); + JsonArray contents = initialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("results") + .getObject("results").getArray("contents"); + for (Object c: contents) { + String unformattedDate = ""; + try { + JsonObject o = (JsonObject) c; + unformattedDate = o.getObject("videoPrimaryInfoRenderer").getObject("dateText").getString("simpleText"); + + } catch (Exception ignored) {/* we got the wrong element form the array */} + // TODO this parses English formatted dates only, we need a better approach to parse teh textual date + Date d = new SimpleDateFormat("dd MMM yyy").parse(unformattedDate); + return new SimpleDateFormat("yyyy-MM-dd").format(d); + } } catch (Exception e) { - throw new ParsingException("Could not get upload date"); + throw new ParsingException("Could not get upload date", e); } + throw new ParsingException("Could not get upload date"); } @Override @@ -165,13 +175,22 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Override public Description getDescription() throws ParsingException { assertPageFetched(); - // TODO: Parse videoSecondaryInfoRenderer.description + // raw non-html description try { - // raw non-html description return new Description(playerResponse.getObject("videoDetails").getString("shortDescription"), Description.PLAIN_TEXT); - } catch (Exception ignored) { - throw new ParsingException("Could not get the description"); - } + } catch (Exception ignored) { } + try { + JsonArray descriptions = getVideoSecondaryInfoRenderer().getObject("description").getArray("runs"); + StringBuilder descriptionBuilder = new StringBuilder(descriptions.size()); + for (Object textObjectHolder : descriptions) { + JsonObject textHolder = (JsonObject) textObjectHolder; + String text = textHolder.getString("text"); + if (text != null) descriptionBuilder.append(text); + } + String description = descriptionBuilder.toString(); + if (!description.isEmpty()) return new Description(description, Description.PLAIN_TEXT); + } catch (Exception ignored) { } + throw new ParsingException("Could not get description"); } @Override From 8e6be880d7641d3a722a12da308cb5046ab49249 Mon Sep 17 00:00:00 2001 From: wb9688 Date: Tue, 25 Feb 2020 16:32:08 +0100 Subject: [PATCH 46/53] Simplify getId() in YoutubeChannelExtractor --- .../extractors/YoutubeChannelExtractor.java | 24 ++++++------------- .../YoutubeStreamInfoItemExtractor.java | 4 +--- 2 files changed, 8 insertions(+), 20 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index 4c7185f1d..7c448662f 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -6,7 +6,6 @@ import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParserException; import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.channel.ChannelExtractor; import org.schabi.newpipe.extractor.downloader.Downloader; @@ -91,18 +90,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public String getId() throws ParsingException { try { - return doc.select("meta[property=\"og:url\"]").first().attr("content").replace(CHANNEL_URL_BASE, ""); - } catch (Exception ignored) {} - try { - return initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("navigationEndpoint").getObject("browseEndpoint").getString("browseId"); - } catch (Exception ignored) {} - - // fallback method; does not work with channels that have no "Subscribe" button (e.g. EminemVEVO) - try { - Element element = doc.getElementsByClass("yt-uix-subscription-button").first(); - if (element == null) element = doc.getElementsByClass("yt-uix-subscription-preferences-button").first(); - - return element.attr("data-channel-external-id"); + return initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getString("channelId"); } catch (Exception e) { throw new ParsingException("Could not get channel id", e); } @@ -112,7 +100,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public String getName() throws ParsingException { try { - return doc.select("meta[property=\"og:title\"]").first().attr("content"); + return initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getString("title"); } catch (Exception e) { throw new ParsingException("Could not get channel name", e); } @@ -121,7 +109,8 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public String getAvatarUrl() throws ParsingException { try { - return initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("avatar").getArray("thumbnails").getObject(0).getString("url"); + return initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("avatar") + .getArray("thumbnails").getObject(0).getString("url"); } catch (Exception e) { throw new ParsingException("Could not get avatar", e); } @@ -132,7 +121,8 @@ public class YoutubeChannelExtractor extends ChannelExtractor { try { String url = null; try { - url = initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("banner").getArray("thumbnails").getObject(0).getString("url"); + url = initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("banner") + .getArray("thumbnails").getObject(0).getString("url"); } catch (Exception ignored) {} if (url == null || url.contains("s.ytimg.com") || url.contains("default_banner")) { return null; @@ -149,7 +139,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor { return url; } catch (Exception e) { - throw new ParsingException("Could not get Banner", e); + throw new ParsingException("Could not get banner", e); } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java index 950bab0f2..2010cfb5e 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java @@ -157,10 +157,8 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { @Nullable @Override public String getTextualUploadDate() { - // TODO: Get upload date in case of a videoRenderer (not available in case of a compactVideoRenderer) try { - String s =videoInfo.getObject("publishedTimeText").getString("simpleText"); - return s; + return videoInfo.getObject("publishedTimeText").getString("simpleText"); } catch (Exception e) { // upload date is not always available, e.g. in playlists return null; From bbe1a3cd6263388876397a6a14940e76933f0bd3 Mon Sep 17 00:00:00 2001 From: wb9688 Date: Tue, 25 Feb 2020 17:40:23 +0100 Subject: [PATCH 47/53] Use getVideo(Primary|Secondary)InfoRenderer() --- .../extractors/YoutubeStreamExtractor.java | 33 +++++-------------- 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 7f8e84d30..5d2b73bda 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -126,22 +126,11 @@ public class YoutubeStreamExtractor extends StreamExtractor { } catch (Exception ignored) {} try { - JsonArray contents = initialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("results") - .getObject("results").getArray("contents"); - for (Object c: contents) { - String unformattedDate = ""; - try { - JsonObject o = (JsonObject) c; - unformattedDate = o.getObject("videoPrimaryInfoRenderer").getObject("dateText").getString("simpleText"); - - } catch (Exception ignored) {/* we got the wrong element form the array */} - // TODO this parses English formatted dates only, we need a better approach to parse teh textual date - Date d = new SimpleDateFormat("dd MMM yyy").parse(unformattedDate); - return new SimpleDateFormat("yyyy-MM-dd").format(d); - } - } catch (Exception e) { - throw new ParsingException("Could not get upload date", e); - } + // TODO this parses English formatted dates only, we need a better approach to parse the textual date + Date d = new SimpleDateFormat("dd MMM yyy").parse(getVideoPrimaryInfoRenderer() + .getObject("dateText").getString("simpleText")); + return new SimpleDateFormat("yyyy-MM-dd").format(d); + } catch (Exception ignored) {} throw new ParsingException("Could not get upload date"); } @@ -351,7 +340,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { public String getUploaderAvatarUrl() throws ParsingException { assertPageFetched(); - String uploaderAvatarUrl; + String uploaderAvatarUrl = null; try { uploaderAvatarUrl = initialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("secondaryResults") .getObject("secondaryResults").getArray("results").getObject(0).getObject("compactAutoplayRenderer") @@ -363,13 +352,9 @@ public class YoutubeStreamExtractor extends StreamExtractor { } catch (Exception ignored) {} try { - uploaderAvatarUrl = initialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("results") - .getObject("results").getArray("contents").getObject(1).getObject("videoSecondaryInfoRenderer") - .getObject("owner").getObject("videoOwnerRenderer").getObject("thumbnail").getArray("thumbnails") - .getObject(0).getString("url"); - } catch (Exception e) { - throw new ParsingException("Could not get uploader avatar url", e); - } + uploaderAvatarUrl = getVideoSecondaryInfoRenderer().getObject("owner").getObject("videoOwnerRenderer") + .getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url"); + } catch (Exception ignored) {} if (uploaderAvatarUrl == null) { throw new ParsingException("Could not get uploader avatar url"); From 5d08c343227e44ebc6bb8acca73072e2e2878fa5 Mon Sep 17 00:00:00 2001 From: wb9688 Date: Tue, 25 Feb 2020 19:47:26 +0100 Subject: [PATCH 48/53] Fix parsing upload date in YoutubeStreamExtractor --- .../services/youtube/extractors/YoutubeStreamExtractor.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 5d2b73bda..26dc24437 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -127,8 +127,8 @@ public class YoutubeStreamExtractor extends StreamExtractor { try { // TODO this parses English formatted dates only, we need a better approach to parse the textual date - Date d = new SimpleDateFormat("dd MMM yyy").parse(getVideoPrimaryInfoRenderer() - .getObject("dateText").getString("simpleText")); + Date d = new SimpleDateFormat("dd MMM yyyy", Locale.ENGLISH).parse( + getVideoPrimaryInfoRenderer().getObject("dateText").getString("simpleText")); return new SimpleDateFormat("yyyy-MM-dd").format(d); } catch (Exception ignored) {} throw new ParsingException("Could not get upload date"); From 0fff03038ed285a2e1f2610b5d6eaab97c75ee35 Mon Sep 17 00:00:00 2001 From: TobiGr Date: Tue, 25 Feb 2020 18:27:39 +0100 Subject: [PATCH 49/53] Get description with correct links --- .../extractors/YoutubeStreamExtractor.java | 50 ++++++++++++++++--- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 26dc24437..09a00ef69 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -38,6 +38,7 @@ import org.schabi.newpipe.extractor.utils.Utils; import java.io.IOException; import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; import java.text.SimpleDateFormat; import java.util.*; @@ -164,22 +165,57 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Override public Description getDescription() throws ParsingException { assertPageFetched(); - // raw non-html description - try { - return new Description(playerResponse.getObject("videoDetails").getString("shortDescription"), Description.PLAIN_TEXT); - } catch (Exception ignored) { } + + // description with more info on links try { + boolean htmlConversionRequired = false; JsonArray descriptions = getVideoSecondaryInfoRenderer().getObject("description").getArray("runs"); StringBuilder descriptionBuilder = new StringBuilder(descriptions.size()); for (Object textObjectHolder : descriptions) { JsonObject textHolder = (JsonObject) textObjectHolder; String text = textHolder.getString("text"); - if (text != null) descriptionBuilder.append(text); + if (textHolder.getObject("navigationEndpoint") != null) { + // The text is a link. Get the URL it points to and generate a HTML link of it + String internUrl = textHolder.getObject("navigationEndpoint").getObject("urlEndpoint").getString("url"); + if (internUrl.startsWith("/redirect?")) { + // q parameter can be the first parameter + internUrl = internUrl.substring(10); + } + String[] params = internUrl.split("&"); + for (String param : params) { + if (param.charAt(0) == 'q') { + String url = java.net.URLDecoder.decode(param.substring(2), StandardCharsets.UTF_8.name()); + if (url != null && !url.isEmpty()) { + descriptionBuilder.append("").append(text).append(""); + htmlConversionRequired = true; + } else { + descriptionBuilder.append(text); + } + } + } + } else if (text != null) { + descriptionBuilder.append(text); + } } + String description = descriptionBuilder.toString(); - if (!description.isEmpty()) return new Description(description, Description.PLAIN_TEXT); + + if (!description.isEmpty()) { + if (htmlConversionRequired) { + description = description.replaceAll("\\n", "
"); + return new Description(description, Description.HTML); + } + return new Description(description, Description.PLAIN_TEXT); + } + } catch (Exception ignored) { } - throw new ParsingException("Could not get description"); + + // raw non-html description + try { + return new Description(playerResponse.getObject("videoDetails").getString("shortDescription"), Description.PLAIN_TEXT); + } catch (Exception ignored) { + throw new ParsingException("Could not get description"); + } } @Override From 26fb44595f7af1f00d50a0fec2b4ec84a3625337 Mon Sep 17 00:00:00 2001 From: wb9688 Date: Tue, 25 Feb 2020 20:40:47 +0100 Subject: [PATCH 50/53] Fix parsing description --- .../extractors/YoutubeStreamExtractor.java | 38 +++++++++++++------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 09a00ef69..28817ae0a 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -38,9 +38,17 @@ import org.schabi.newpipe.extractor.utils.Utils; import java.io.IOException; import java.io.UnsupportedEncodingException; +import java.net.URLDecoder; import java.nio.charset.StandardCharsets; import java.text.SimpleDateFormat; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -180,18 +188,24 @@ public class YoutubeStreamExtractor extends StreamExtractor { if (internUrl.startsWith("/redirect?")) { // q parameter can be the first parameter internUrl = internUrl.substring(10); - } - String[] params = internUrl.split("&"); - for (String param : params) { - if (param.charAt(0) == 'q') { - String url = java.net.URLDecoder.decode(param.substring(2), StandardCharsets.UTF_8.name()); - if (url != null && !url.isEmpty()) { - descriptionBuilder.append("").append(text).append(""); - htmlConversionRequired = true; - } else { - descriptionBuilder.append(text); + String[] params = internUrl.split("&"); + for (String param : params) { + if (param.split("=")[0].equals("q")) { + String url = URLDecoder.decode(param.split("=")[1], StandardCharsets.UTF_8.name()); + if (url != null && !url.isEmpty()) { + descriptionBuilder.append("").append(text).append(""); + htmlConversionRequired = true; + } else { + descriptionBuilder.append(text); + } + break; } } + } else if (internUrl.startsWith("http")) { + descriptionBuilder.append("").append(text).append(""); + htmlConversionRequired = true; + } else if (text != null) { + descriptionBuilder.append(text); } } else if (text != null) { descriptionBuilder.append(text); @@ -203,11 +217,11 @@ public class YoutubeStreamExtractor extends StreamExtractor { if (!description.isEmpty()) { if (htmlConversionRequired) { description = description.replaceAll("\\n", "
"); + description = description.replaceAll(" ", "  "); return new Description(description, Description.HTML); } return new Description(description, Description.PLAIN_TEXT); } - } catch (Exception ignored) { } // raw non-html description From 729fd2eaeec758019604b987c6d43522f4879c01 Mon Sep 17 00:00:00 2001 From: wb9688 Date: Tue, 25 Feb 2020 20:54:11 +0100 Subject: [PATCH 51/53] Fix parsing description for navigationEndpoint without urlEndpoint --- .../extractors/YoutubeStreamExtractor.java | 44 ++++++++++--------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 28817ae0a..b95ab08c1 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -173,7 +173,6 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Override public Description getDescription() throws ParsingException { assertPageFetched(); - // description with more info on links try { boolean htmlConversionRequired = false; @@ -184,30 +183,33 @@ public class YoutubeStreamExtractor extends StreamExtractor { String text = textHolder.getString("text"); if (textHolder.getObject("navigationEndpoint") != null) { // The text is a link. Get the URL it points to and generate a HTML link of it - String internUrl = textHolder.getObject("navigationEndpoint").getObject("urlEndpoint").getString("url"); - if (internUrl.startsWith("/redirect?")) { - // q parameter can be the first parameter - internUrl = internUrl.substring(10); - String[] params = internUrl.split("&"); - for (String param : params) { - if (param.split("=")[0].equals("q")) { - String url = URLDecoder.decode(param.split("=")[1], StandardCharsets.UTF_8.name()); - if (url != null && !url.isEmpty()) { - descriptionBuilder.append("").append(text).append(""); - htmlConversionRequired = true; - } else { - descriptionBuilder.append(text); + if (textHolder.getObject("navigationEndpoint").getObject("urlEndpoint") != null) { + String internUrl = textHolder.getObject("navigationEndpoint").getObject("urlEndpoint").getString("url"); + if (internUrl.startsWith("/redirect?")) { + // q parameter can be the first parameter + internUrl = internUrl.substring(10); + String[] params = internUrl.split("&"); + for (String param : params) { + if (param.split("=")[0].equals("q")) { + String url = URLDecoder.decode(param.split("=")[1], StandardCharsets.UTF_8.name()); + if (url != null && !url.isEmpty()) { + descriptionBuilder.append("").append(text).append(""); + htmlConversionRequired = true; + } else { + descriptionBuilder.append(text); + } + break; } - break; } + } else if (internUrl.startsWith("http")) { + descriptionBuilder.append("").append(text).append(""); + htmlConversionRequired = true; } - } else if (internUrl.startsWith("http")) { - descriptionBuilder.append("").append(text).append(""); - htmlConversionRequired = true; - } else if (text != null) { - descriptionBuilder.append(text); + continue; } - } else if (text != null) { + continue; + } + if (text != null) { descriptionBuilder.append(text); } } From 5a35300a5e3fc0221a6349351306a964e74adfb7 Mon Sep 17 00:00:00 2001 From: wb9688 Date: Tue, 25 Feb 2020 21:19:53 +0100 Subject: [PATCH 52/53] Fix parsing upload date for premiered streams --- .../youtube/extractors/YoutubeStreamExtractor.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index b95ab08c1..91c088fad 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -20,7 +20,9 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.linkhandler.LinkHandler; import org.schabi.newpipe.extractor.localization.DateWrapper; +import org.schabi.newpipe.extractor.localization.Localization; import org.schabi.newpipe.extractor.localization.TimeAgoParser; +import org.schabi.newpipe.extractor.localization.TimeAgoPatternsManager; import org.schabi.newpipe.extractor.services.youtube.ItagItem; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; import org.schabi.newpipe.extractor.stream.AudioStream; @@ -42,6 +44,7 @@ import java.net.URLDecoder; import java.nio.charset.StandardCharsets; import java.text.SimpleDateFormat; import java.util.ArrayList; +import java.util.Calendar; import java.util.Collections; import java.util.Date; import java.util.HashMap; @@ -134,6 +137,15 @@ public class YoutubeStreamExtractor extends StreamExtractor { //return playerResponse.getObject("microformat").getObject("playerMicroformatRenderer").getString("publishDate"); } catch (Exception ignored) {} + try { + if (getVideoPrimaryInfoRenderer().getObject("dateText").getString("simpleText").startsWith("Premiered")) { + String timeAgo = getVideoPrimaryInfoRenderer().getObject("dateText").getString("simpleText").substring(10); + TimeAgoParser timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor(Localization.fromLocalizationCode("en")); + Calendar parsedTimeAgo = timeAgoParser.parse(timeAgo).date(); + return new SimpleDateFormat("yyyy-MM-dd").format(parsedTimeAgo.getTime()); + } + } catch (Exception ignored) {} + try { // TODO this parses English formatted dates only, we need a better approach to parse the textual date Date d = new SimpleDateFormat("dd MMM yyyy", Locale.ENGLISH).parse( From 96285e08151ef5a48afdd7ba28b7857924ddd6ca Mon Sep 17 00:00:00 2001 From: TobiGr Date: Tue, 25 Feb 2020 21:50:11 +0100 Subject: [PATCH 53/53] Fix getUploadDate() for premiered videos with a given date --- .../extractors/YoutubeStreamExtractor.java | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 91c088fad..2408b5c02 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -134,15 +134,23 @@ public class YoutubeStreamExtractor extends StreamExtractor { } try { - //return playerResponse.getObject("microformat").getObject("playerMicroformatRenderer").getString("publishDate"); + // return playerResponse.getObject("microformat").getObject("playerMicroformatRenderer").getString("publishDate"); } catch (Exception ignored) {} try { if (getVideoPrimaryInfoRenderer().getObject("dateText").getString("simpleText").startsWith("Premiered")) { - String timeAgo = getVideoPrimaryInfoRenderer().getObject("dateText").getString("simpleText").substring(10); - TimeAgoParser timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor(Localization.fromLocalizationCode("en")); - Calendar parsedTimeAgo = timeAgoParser.parse(timeAgo).date(); - return new SimpleDateFormat("yyyy-MM-dd").format(parsedTimeAgo.getTime()); + String time = getVideoPrimaryInfoRenderer().getObject("dateText").getString("simpleText").substring(10); + + try { // Premiered 20 hours ago + TimeAgoParser timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor(Localization.fromLocalizationCode("en")); + Calendar parsedTime = timeAgoParser.parse(time).date(); + return new SimpleDateFormat("yyyy-MM-dd").format(parsedTime.getTime()); + } catch (Exception ignored) {} + + try { // Premiered Premiered Feb 21, 2020 + Date d = new SimpleDateFormat("MMM dd, YYYY", Locale.ENGLISH).parse(time); + return new SimpleDateFormat("yyyy-MM-dd").format(d.getTime()); + } catch (Exception ignored) {} } } catch (Exception ignored) {}