[YouTube] Fix channel extraction when redirects are in the response

Some redirects were embed directly into the response as instructions
for the page, instead of the usual http redirects.
This commit is contained in:
Mauricio Colli 2020-03-14 02:29:43 -03:00
parent e7be952fbf
commit 00d1ed439b
No known key found for this signature in database
GPG Key ID: F200BFD6F29DDD85
4 changed files with 140 additions and 10 deletions

View File

@ -2,7 +2,6 @@ package org.schabi.newpipe.extractor.services.youtube.extractors;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.channel.ChannelExtractor;
import org.schabi.newpipe.extractor.downloader.Downloader;
@ -16,13 +15,11 @@ import org.schabi.newpipe.extractor.stream.StreamInfoItem;
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
import org.schabi.newpipe.extractor.utils.Utils;
import javax.annotation.Nonnull;
import java.io.IOException;
import javax.annotation.Nonnull;
import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper.fixThumbnailUrl;
import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper.getJsonResponse;
import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper.getTextFromObject;
import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper.*;
import static org.schabi.newpipe.extractor.utils.JsonUtils.*;
/*
* Created by Christian Schabesberger on 25.07.16.
@ -55,9 +52,45 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override
public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException {
final String url = super.getUrl() + "/videos?pbj=1&view=0&flow=grid";
String url = super.getUrl() + "/videos?pbj=1&view=0&flow=grid";
JsonArray ajaxJson = null;
int level = 0;
while (level < 3) {
final JsonArray jsonResponse = getJsonResponse(url, getExtractorLocalization());
final JsonObject endpoint = jsonResponse.getObject(1, EMPTY_OBJECT)
.getObject("response", EMPTY_OBJECT).getArray("onResponseReceivedActions", EMPTY_ARRAY)
.getObject(0, EMPTY_OBJECT).getObject("navigateAction", EMPTY_OBJECT)
.getObject("endpoint", EMPTY_OBJECT);
final String webPageType = endpoint
.getObject("commandMetadata", EMPTY_OBJECT)
.getObject("webCommandMetadata", EMPTY_OBJECT)
.getString("webPageType", EMPTY_STRING);
final String browseId = endpoint
.getObject("browseEndpoint", EMPTY_OBJECT)
.getString("browseId", EMPTY_STRING);
if (webPageType.equalsIgnoreCase("WEB_PAGE_TYPE_BROWSE") && !browseId.isEmpty()) {
if (!browseId.startsWith("UC")) {
throw new ExtractionException("Redirected id is not pointing to a channel");
}
url = "https://www.youtube.com/channel/" + browseId + "/videos?pbj=1&view=0&flow=grid";
level++;
} else {
ajaxJson = jsonResponse;
break;
}
}
if (ajaxJson == null) {
throw new ExtractionException("Could not fetch initial JSON data");
}
final JsonArray ajaxJson = getJsonResponse(url, getExtractorLocalization());
initialData = ajaxJson.getObject(1).getObject("response");
YoutubeParsingHelper.defaultAlertsCheck(initialData);
}

View File

@ -621,7 +621,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
playerResponse = getPlayerResponse();
final JsonObject playabilityStatus = playerResponse.getObject("playabilityStatus", JsonUtils.DEFAULT_EMPTY);
final JsonObject playabilityStatus = playerResponse.getObject("playabilityStatus", JsonUtils.EMPTY_OBJECT);
final String status = playabilityStatus.getString("status");
// If status exist, and is not "OK", throw a ContentNotAvailableException with the reason.
if (status != null && !status.toLowerCase().equals("ok")) {

View File

@ -11,7 +11,9 @@ import java.util.Arrays;
import java.util.List;
public class JsonUtils {
public static final JsonObject DEFAULT_EMPTY = new JsonObject();
public static final JsonObject EMPTY_OBJECT = new JsonObject();
public static final JsonArray EMPTY_ARRAY = new JsonArray();
public static final String EMPTY_STRING = "";
private JsonUtils() {
}

View File

@ -5,12 +5,16 @@ import org.junit.Test;
import org.schabi.newpipe.DownloaderTestImpl;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.channel.ChannelExtractor;
import org.schabi.newpipe.extractor.channel.ChannelInfo;
import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.services.BaseChannelExtractorTest;
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeChannelExtractor;
import java.util.List;
import static org.junit.Assert.*;
import static org.schabi.newpipe.extractor.ExtractorAsserts.assertEmpty;
import static org.schabi.newpipe.extractor.ExtractorAsserts.assertIsSecureUrl;
import static org.schabi.newpipe.extractor.ServiceList.YouTube;
import static org.schabi.newpipe.extractor.services.DefaultTests.*;
@ -505,6 +509,97 @@ public class YoutubeChannelExtractorTest {
}
}
/**
* Some VEVO channels will redirect to a new page with a new channel id.
* <p>
* Though, it isn't a simple redirect, but a redirect instruction embed in the response itself, this
* test assure that we account for that.
*/
public static class RedirectedChannel implements BaseChannelExtractorTest {
private static YoutubeChannelExtractor extractor;
@BeforeClass
public static void setUp() throws Exception {
NewPipe.init(DownloaderTestImpl.getInstance());
extractor = (YoutubeChannelExtractor) YouTube
.getChannelExtractor("https://www.youtube.com/channel/UCITk7Ky4iE5_xISw9IaHqpQ");
extractor.fetchPage();
}
/*//////////////////////////////////////////////////////////////////////////
// Extractor
//////////////////////////////////////////////////////////////////////////*/
@Test
public void testServiceId() {
assertEquals(YouTube.getServiceId(), extractor.getServiceId());
}
@Test
public void testName() throws Exception {
assertEquals("LordiVEVO", extractor.getName());
}
@Test
public void testId() throws Exception {
assertEquals("UCrxkwepj7-4Wz1wHyfzw-sQ", extractor.getId());
}
@Test
public void testUrl() throws ParsingException {
assertEquals("https://www.youtube.com/channel/UCrxkwepj7-4Wz1wHyfzw-sQ", extractor.getUrl());
}
@Test
public void testOriginalUrl() throws ParsingException {
assertEquals("https://www.youtube.com/channel/UCITk7Ky4iE5_xISw9IaHqpQ", extractor.getOriginalUrl());
}
/*//////////////////////////////////////////////////////////////////////////
// ListExtractor
//////////////////////////////////////////////////////////////////////////*/
@Test
public void testRelatedItems() throws Exception {
defaultTestRelatedItems(extractor);
}
@Test
public void testMoreRelatedItems() throws Exception {
assertNoMoreItems(extractor);
}
/*//////////////////////////////////////////////////////////////////////////
// ChannelExtractor
//////////////////////////////////////////////////////////////////////////*/
@Test
public void testDescription() throws Exception {
assertEmpty(extractor.getDescription());
}
@Test
public void testAvatarUrl() throws Exception {
String avatarUrl = extractor.getAvatarUrl();
assertIsSecureUrl(avatarUrl);
assertTrue(avatarUrl, avatarUrl.contains("yt3"));
}
@Test
public void testBannerUrl() throws Exception {
assertEmpty(extractor.getBannerUrl());
}
@Test
public void testFeedUrl() throws Exception {
assertEquals("https://www.youtube.com/feeds/videos.xml?channel_id=UCrxkwepj7-4Wz1wHyfzw-sQ", extractor.getFeedUrl());
}
@Test
public void testSubscriberCount() throws Exception {
assertEquals(-1, extractor.getSubscriberCount());
}
}
public static class RandomChannel implements BaseChannelExtractorTest {
private static YoutubeChannelExtractor extractor;