From 00d1ed439b49cdbabcd6933afd7945bc45e9aefc Mon Sep 17 00:00:00 2001 From: Mauricio Colli Date: Sat, 14 Mar 2020 02:29:43 -0300 Subject: [PATCH 1/2] [YouTube] Fix channel extraction when redirects are in the response Some redirects were embed directly into the response as instructions for the page, instead of the usual http redirects. --- .../extractors/YoutubeChannelExtractor.java | 49 ++++++++-- .../extractors/YoutubeStreamExtractor.java | 2 +- .../newpipe/extractor/utils/JsonUtils.java | 4 +- .../youtube/YoutubeChannelExtractorTest.java | 95 +++++++++++++++++++ 4 files changed, 140 insertions(+), 10 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index 11dd8985f..1aeb65ed4 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -2,7 +2,6 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; - import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.channel.ChannelExtractor; import org.schabi.newpipe.extractor.downloader.Downloader; @@ -16,13 +15,11 @@ import org.schabi.newpipe.extractor.stream.StreamInfoItem; import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; import org.schabi.newpipe.extractor.utils.Utils; +import javax.annotation.Nonnull; import java.io.IOException; -import javax.annotation.Nonnull; - -import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper.fixThumbnailUrl; -import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper.getJsonResponse; -import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper.getTextFromObject; +import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper.*; +import static org.schabi.newpipe.extractor.utils.JsonUtils.*; /* * Created by Christian Schabesberger on 25.07.16. @@ -55,9 +52,45 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException { - final String url = super.getUrl() + "/videos?pbj=1&view=0&flow=grid"; + String url = super.getUrl() + "/videos?pbj=1&view=0&flow=grid"; + JsonArray ajaxJson = null; + + int level = 0; + while (level < 3) { + final JsonArray jsonResponse = getJsonResponse(url, getExtractorLocalization()); + + final JsonObject endpoint = jsonResponse.getObject(1, EMPTY_OBJECT) + .getObject("response", EMPTY_OBJECT).getArray("onResponseReceivedActions", EMPTY_ARRAY) + .getObject(0, EMPTY_OBJECT).getObject("navigateAction", EMPTY_OBJECT) + .getObject("endpoint", EMPTY_OBJECT); + + final String webPageType = endpoint + .getObject("commandMetadata", EMPTY_OBJECT) + .getObject("webCommandMetadata", EMPTY_OBJECT) + .getString("webPageType", EMPTY_STRING); + + final String browseId = endpoint + .getObject("browseEndpoint", EMPTY_OBJECT) + .getString("browseId", EMPTY_STRING); + + if (webPageType.equalsIgnoreCase("WEB_PAGE_TYPE_BROWSE") && !browseId.isEmpty()) { + + if (!browseId.startsWith("UC")) { + throw new ExtractionException("Redirected id is not pointing to a channel"); + } + + url = "https://www.youtube.com/channel/" + browseId + "/videos?pbj=1&view=0&flow=grid"; + level++; + } else { + ajaxJson = jsonResponse; + break; + } + } + + if (ajaxJson == null) { + throw new ExtractionException("Could not fetch initial JSON data"); + } - final JsonArray ajaxJson = getJsonResponse(url, getExtractorLocalization()); initialData = ajaxJson.getObject(1).getObject("response"); YoutubeParsingHelper.defaultAlertsCheck(initialData); } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 8ee502013..475557867 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -621,7 +621,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { playerResponse = getPlayerResponse(); - final JsonObject playabilityStatus = playerResponse.getObject("playabilityStatus", JsonUtils.DEFAULT_EMPTY); + final JsonObject playabilityStatus = playerResponse.getObject("playabilityStatus", JsonUtils.EMPTY_OBJECT); final String status = playabilityStatus.getString("status"); // If status exist, and is not "OK", throw a ContentNotAvailableException with the reason. if (status != null && !status.toLowerCase().equals("ok")) { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/JsonUtils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/JsonUtils.java index 25bb3f6c0..e69240a01 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/JsonUtils.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/JsonUtils.java @@ -11,7 +11,9 @@ import java.util.Arrays; import java.util.List; public class JsonUtils { - public static final JsonObject DEFAULT_EMPTY = new JsonObject(); + public static final JsonObject EMPTY_OBJECT = new JsonObject(); + public static final JsonArray EMPTY_ARRAY = new JsonArray(); + public static final String EMPTY_STRING = ""; private JsonUtils() { } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java index dfcaee40a..3615bc63c 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java @@ -5,12 +5,16 @@ import org.junit.Test; import org.schabi.newpipe.DownloaderTestImpl; import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.channel.ChannelExtractor; +import org.schabi.newpipe.extractor.channel.ChannelInfo; import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.services.BaseChannelExtractorTest; import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeChannelExtractor; +import java.util.List; + import static org.junit.Assert.*; +import static org.schabi.newpipe.extractor.ExtractorAsserts.assertEmpty; import static org.schabi.newpipe.extractor.ExtractorAsserts.assertIsSecureUrl; import static org.schabi.newpipe.extractor.ServiceList.YouTube; import static org.schabi.newpipe.extractor.services.DefaultTests.*; @@ -505,6 +509,97 @@ public class YoutubeChannelExtractorTest { } } + /** + * Some VEVO channels will redirect to a new page with a new channel id. + *

+ * Though, it isn't a simple redirect, but a redirect instruction embed in the response itself, this + * test assure that we account for that. + */ + public static class RedirectedChannel implements BaseChannelExtractorTest { + private static YoutubeChannelExtractor extractor; + + @BeforeClass + public static void setUp() throws Exception { + NewPipe.init(DownloaderTestImpl.getInstance()); + extractor = (YoutubeChannelExtractor) YouTube + .getChannelExtractor("https://www.youtube.com/channel/UCITk7Ky4iE5_xISw9IaHqpQ"); + extractor.fetchPage(); + } + + /*////////////////////////////////////////////////////////////////////////// + // Extractor + //////////////////////////////////////////////////////////////////////////*/ + + @Test + public void testServiceId() { + assertEquals(YouTube.getServiceId(), extractor.getServiceId()); + } + + @Test + public void testName() throws Exception { + assertEquals("LordiVEVO", extractor.getName()); + } + + @Test + public void testId() throws Exception { + assertEquals("UCrxkwepj7-4Wz1wHyfzw-sQ", extractor.getId()); + } + + @Test + public void testUrl() throws ParsingException { + assertEquals("https://www.youtube.com/channel/UCrxkwepj7-4Wz1wHyfzw-sQ", extractor.getUrl()); + } + + @Test + public void testOriginalUrl() throws ParsingException { + assertEquals("https://www.youtube.com/channel/UCITk7Ky4iE5_xISw9IaHqpQ", extractor.getOriginalUrl()); + } + + /*////////////////////////////////////////////////////////////////////////// + // ListExtractor + //////////////////////////////////////////////////////////////////////////*/ + + @Test + public void testRelatedItems() throws Exception { + defaultTestRelatedItems(extractor); + } + + @Test + public void testMoreRelatedItems() throws Exception { + assertNoMoreItems(extractor); + } + + /*////////////////////////////////////////////////////////////////////////// + // ChannelExtractor + //////////////////////////////////////////////////////////////////////////*/ + + @Test + public void testDescription() throws Exception { + assertEmpty(extractor.getDescription()); + } + + @Test + public void testAvatarUrl() throws Exception { + String avatarUrl = extractor.getAvatarUrl(); + assertIsSecureUrl(avatarUrl); + assertTrue(avatarUrl, avatarUrl.contains("yt3")); + } + + @Test + public void testBannerUrl() throws Exception { + assertEmpty(extractor.getBannerUrl()); + } + + @Test + public void testFeedUrl() throws Exception { + assertEquals("https://www.youtube.com/feeds/videos.xml?channel_id=UCrxkwepj7-4Wz1wHyfzw-sQ", extractor.getFeedUrl()); + } + + @Test + public void testSubscriberCount() throws Exception { + assertEquals(-1, extractor.getSubscriberCount()); + } + } public static class RandomChannel implements BaseChannelExtractorTest { private static YoutubeChannelExtractor extractor; From b086e9db3fc73c631da8c7c35974b06b3d5a5392 Mon Sep 17 00:00:00 2001 From: Mauricio Colli Date: Sat, 14 Mar 2020 02:34:29 -0300 Subject: [PATCH 2/2] [YouTube] Fix id extraction for some channels Some channels had no reliable way to get the redirected id in the response, so saving it for later was a valid alternative. --- .../extractors/YoutubeChannelExtractor.java | 28 ++++++++++++++++--- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index 1aeb65ed4..385980745 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -46,6 +46,18 @@ public class YoutubeChannelExtractor extends ChannelExtractor { private JsonObject initialData; private JsonObject videoTab; + /** + * Some channels have response redirects and the only way to reliably get the id is by saving it. + *

+ * "Movies & Shows": + *

+     * UCuJcl0Ju-gPDoksRjK1ya-w ┐
+     * UChBfWrfBXL9wS6tQtgjt_OQ ├ UClgRkhTL3_hImCAmdLfDE4g
+     * UCok7UTQQEP1Rsctxiv3gwSQ ┘
+     * 
+ */ + private String redirectedChannelId; + public YoutubeChannelExtractor(StreamingService service, ListLinkHandler linkHandler) { super(service, linkHandler); } @@ -80,6 +92,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor { } url = "https://www.youtube.com/channel/" + browseId + "/videos?pbj=1&view=0&flow=grid"; + redirectedChannelId = browseId; level++; } else { ajaxJson = jsonResponse; @@ -117,10 +130,17 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Nonnull @Override public String getId() throws ParsingException { - try { - return initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getString("channelId"); - } catch (Exception e) { - throw new ParsingException("Could not get channel id", e); + final String channelId = initialData + .getObject("header", EMPTY_OBJECT) + .getObject("c4TabbedHeaderRenderer", EMPTY_OBJECT) + .getString("channelId", EMPTY_STRING); + + if (!channelId.isEmpty()) { + return channelId; + } else if (redirectedChannelId != null && !redirectedChannelId.isEmpty()) { + return redirectedChannelId; + } else { + throw new ParsingException("Could not get channel id"); } }