Merge branch 'dev' of github.com:TeamNewPipe/NewPipeExtractor into channel-tabs

This commit is contained in:
ThetaDev 2023-04-27 11:56:50 +02:00
commit 417b79757f
11 changed files with 1301 additions and 58 deletions

View File

@ -29,7 +29,7 @@ allprojects {
ext {
nanojsonVersion = "1d9e1aea9049fc9f85e68b43ba39fe7be1c1f751"
spotbugsVersion = "4.7.3"
junitVersion = "5.9.2"
junitVersion = "5.9.3"
checkstyleVersion = "10.4"
}
}

View File

@ -14,8 +14,8 @@ import org.jsoup.nodes.Document;
import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.downloader.Downloader;
import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.PaidContentException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
import org.schabi.newpipe.extractor.playlist.PlaylistExtractor;
@ -64,7 +64,7 @@ public class BandcampPlaylistExtractor extends PlaylistExtractor {
if (trackInfo.isEmpty()) {
// Albums without trackInfo need to be purchased before they can be played
throw new ContentNotAvailableException("Album needs to be purchased");
throw new PaidContentException("Album needs to be purchased");
}
}

View File

@ -15,6 +15,7 @@ import org.schabi.newpipe.extractor.MediaFormat;
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.downloader.Downloader;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.PaidContentException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
import org.schabi.newpipe.extractor.localization.DateWrapper;
@ -57,6 +58,10 @@ public class BandcampStreamExtractor extends StreamExtractor {
// In this case, we are actually viewing an album page!
throw new ExtractionException("Page is actually an album, not a track");
}
if (albumJson.getArray("trackinfo").getObject(0).isNull("file")) {
throw new PaidContentException("This track is not available without being purchased");
}
}
/**

View File

@ -841,7 +841,7 @@ public final class YoutubeParsingHelper {
musicKey = getStringResultFromRegexArray(html, INNERTUBE_API_KEY_REGEXES, 1);
musicClientVersion = getStringResultFromRegexArray(html,
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES);
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
musicClientName = Parser.matchGroup1(INNERTUBE_CLIENT_NAME_REGEX, html);
}

View File

@ -3,7 +3,6 @@ package org.schabi.newpipe.extractor.services.youtube.extractors;
import static org.schabi.newpipe.extractor.services.youtube.YouTubeChannelHelper.ChannelResponseData;
import static org.schabi.newpipe.extractor.services.youtube.YouTubeChannelHelper.getChannelResponse;
import static org.schabi.newpipe.extractor.services.youtube.YouTubeChannelHelper.resolveChannelId;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.fixThumbnailUrl;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
@ -28,6 +27,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.function.Consumer;
import java.util.stream.Collectors;
@ -55,6 +55,9 @@ import javax.annotation.Nonnull;
public class YoutubeChannelExtractor extends ChannelExtractor {
private JsonObject initialData;
private Optional<JsonObject> channelHeader;
private boolean isCarouselHeader = false;
private JsonObject videoTab;
/**
* Some channels have response redirects and the only way to reliably get the id is by saving it
@ -86,6 +89,30 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
redirectedChannelId = data.channelId;
}
@Nonnull
private Optional<JsonObject> getChannelHeader() {
if (channelHeader == null) {
final JsonObject h = initialData.getObject("header");
if (h.has("c4TabbedHeaderRenderer")) {
channelHeader = Optional.of(h.getObject("c4TabbedHeaderRenderer"));
} else if (h.has("carouselHeaderRenderer")) {
isCarouselHeader = true;
channelHeader = h.getObject("carouselHeaderRenderer")
.getArray("contents")
.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.filter(itm -> itm.has("topicChannelDetailsRenderer"))
.findFirst()
.map(itm -> itm.getObject("topicChannelDetailsRenderer"));
} else {
channelHeader = Optional.empty();
}
}
return channelHeader;
}
@Nonnull
@Override
public String getUrl() throws ParsingException {
@ -99,17 +126,14 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Nonnull
@Override
public String getId() throws ParsingException {
final String channelId = initialData.getObject("header")
.getObject("c4TabbedHeaderRenderer")
.getString("channelId", "");
if (!channelId.isEmpty()) {
return channelId;
} else if (!isNullOrEmpty(redirectedChannelId)) {
return redirectedChannelId;
} else {
throw new ParsingException("Could not get channel id");
}
return getChannelHeader()
.flatMap(header -> Optional.ofNullable(header.getString("channelId")).or(
() -> Optional.ofNullable(header.getObject("navigationEndpoint")
.getObject("browseEndpoint")
.getString("browseId"))
))
.or(() -> Optional.ofNullable(redirectedChannelId))
.orElseThrow(() -> new ParsingException("Could not get channel id"));
}
@Nonnull
@ -122,44 +146,41 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
return mdName;
}
final String headerName = initialData.getObject("header")
.getObject("c4TabbedHeaderRenderer")
.getString("title");
if (!isNullOrEmpty(headerName)) {
return headerName;
final Optional<JsonObject> header = getChannelHeader();
if (header.isPresent()) {
final Object title = header.get().get("title");
if (title instanceof String) {
return (String) title;
} else if (title instanceof JsonObject) {
final String headerName = getTextFromObject((JsonObject) title);
if (!isNullOrEmpty(headerName)) {
return headerName;
}
}
}
throw new ParsingException("Could not get channel name");
}
@Override
public String getAvatarUrl() throws ParsingException {
try {
final String url = initialData.getObject("header")
.getObject("c4TabbedHeaderRenderer").getObject("avatar").getArray("thumbnails")
.getObject(0).getString("url");
return fixThumbnailUrl(url);
} catch (final Exception e) {
throw new ParsingException("Could not get avatar", e);
}
return getChannelHeader().flatMap(header -> Optional.ofNullable(
header.getObject("avatar").getArray("thumbnails")
.getObject(0).getString("url")
))
.map(YoutubeParsingHelper::fixThumbnailUrl)
.orElseThrow(() -> new ParsingException("Could not get avatar"));
}
@Override
public String getBannerUrl() throws ParsingException {
try {
final String url = initialData.getObject("header")
.getObject("c4TabbedHeaderRenderer").getObject("banner").getArray("thumbnails")
.getObject(0).getString("url");
if (url == null || url.contains("s.ytimg.com") || url.contains("default_banner")) {
return null;
}
return fixThumbnailUrl(url);
} catch (final Exception e) {
throw new ParsingException("Could not get banner", e);
}
return getChannelHeader().flatMap(header -> Optional.ofNullable(
header.getObject("banner").getArray("thumbnails")
.getObject(0).getString("url")
))
.filter(url -> !url.contains("s.ytimg.com") && !url.contains("default_banner"))
.map(YoutubeParsingHelper::fixThumbnailUrl)
.orElseThrow(() -> new ParsingException("Could not get banner"));
}
@Override
@ -173,17 +194,25 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override
public long getSubscriberCount() throws ParsingException {
final JsonObject c4TabbedHeaderRenderer = initialData.getObject("header")
.getObject("c4TabbedHeaderRenderer");
if (!c4TabbedHeaderRenderer.has("subscriberCountText")) {
return UNKNOWN_SUBSCRIBER_COUNT;
}
try {
return Utils.mixedNumberWordToLong(getTextFromObject(c4TabbedHeaderRenderer
.getObject("subscriberCountText")));
} catch (final NumberFormatException e) {
throw new ParsingException("Could not get subscriber count", e);
final Optional<JsonObject> header = getChannelHeader();
if (header.isPresent()) {
JsonObject textObject = null;
if (header.get().has("subscriberCountText")) {
textObject = header.get().getObject("subscriberCountText");
} else if (header.get().has("subtitle")) {
textObject = header.get().getObject("subtitle");
}
if (textObject != null) {
try {
return Utils.mixedNumberWordToLong(getTextFromObject(textObject));
} catch (final NumberFormatException e) {
throw new ParsingException("Could not get subscriber count", e);
}
}
}
return UNKNOWN_SUBSCRIBER_COUNT;
}
@Override
@ -213,11 +242,17 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override
public boolean isVerified() throws ParsingException {
final JsonArray badges = initialData.getObject("header")
.getObject("c4TabbedHeaderRenderer")
.getArray("badges");
// The CarouselHeaderRenderer does not contain any verification badges.
// Since it is only shown on YT-internal channels or on channels of large organizations
// broadcasting live events, we can assume the channel to be verified.
if (isCarouselHeader) {
return true;
}
return YoutubeParsingHelper.isVerified(badges);
return getChannelHeader()
.map(header -> header.getArray("badges"))
.map(YoutubeParsingHelper::isVerified)
.orElse(false);
}
@Nonnull

View File

@ -0,0 +1,25 @@
package org.schabi.newpipe.extractor.services.bandcamp;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.schabi.newpipe.extractor.ServiceList.Bandcamp;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.schabi.newpipe.downloader.DownloaderTestImpl;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.PaidContentException;
public class BandcampPaidStreamExtractorTest {
@BeforeAll
public static void setUp() {
NewPipe.init(DownloaderTestImpl.getInstance());
}
@Test
public void testPaidTrack() throws ExtractionException {
final var extractor = Bandcamp.getStreamExtractor("https://radicaldreamland.bandcamp.com/track/hackmud-continuous-mix");
assertThrows(PaidContentException.class, extractor::fetchPage);
}
}

View File

@ -718,4 +718,94 @@ public class YoutubeChannelExtractorTest {
assertTrue(tabs.contains(ChannelTabs.CHANNELS));
}
}
public static class CarouselHeader implements BaseChannelExtractorTest {
private static YoutubeChannelExtractor extractor;
@BeforeAll
public static void setUp() throws Exception {
YoutubeTestsUtils.ensureStateless();
NewPipe.init(DownloaderFactory.getDownloader(RESOURCE_PATH + "carouselHeader"));
extractor = (YoutubeChannelExtractor) YouTube
.getChannelExtractor("https://www.youtube.com/channel/UCHF66aWLOxBW4l6VkSrS3cQ");
extractor.fetchPage();
}
/*//////////////////////////////////////////////////////////////////////////
// Extractor
//////////////////////////////////////////////////////////////////////////*/
@Test
public void testServiceId() {
assertEquals(YouTube.getServiceId(), extractor.getServiceId());
}
@Test
public void testName() throws Exception {
assertEquals(extractor.getName(), "Coachella");
}
@Test
public void testId() throws Exception {
assertEquals("UCHF66aWLOxBW4l6VkSrS3cQ", extractor.getId());
}
@Test
public void testUrl() throws ParsingException {
assertEquals("https://www.youtube.com/channel/UCHF66aWLOxBW4l6VkSrS3cQ", extractor.getUrl());
}
@Test
public void testOriginalUrl() throws ParsingException {
assertEquals("https://www.youtube.com/channel/UCHF66aWLOxBW4l6VkSrS3cQ", extractor.getOriginalUrl());
}
/*//////////////////////////////////////////////////////////////////////////
// ListExtractor
//////////////////////////////////////////////////////////////////////////*/
@Test
public void testRelatedItems() throws Exception {
defaultTestRelatedItems(extractor);
}
@Test
public void testMoreRelatedItems() throws Exception {
defaultTestMoreItems(extractor);
}
/*//////////////////////////////////////////////////////////////////////////
// ChannelExtractor
//////////////////////////////////////////////////////////////////////////*/
@Override
public void testDescription() {
}
@Test
public void testAvatarUrl() throws Exception {
String avatarUrl = extractor.getAvatarUrl();
assertIsSecureUrl(avatarUrl);
ExtractorAsserts.assertContains("yt3", avatarUrl);
}
@Test
public void testBannerUrl() throws Exception {
// CarouselHeaderRender does not contain a banner
}
@Test
public void testFeedUrl() throws Exception {
assertEquals("https://www.youtube.com/feeds/videos.xml?channel_id=UCHF66aWLOxBW4l6VkSrS3cQ", extractor.getFeedUrl());
}
@Test
public void testSubscriberCount() throws Exception {
ExtractorAsserts.assertGreaterOrEqual(2_900_000, extractor.getSubscriberCount());
}
@Test
public void testVerified() throws Exception {
assertTrue(extractor.isVerified());
}
}
}

View File

@ -0,0 +1,85 @@
{
"request": {
"httpMethod": "GET",
"url": "https://www.youtube.com/sw.js",
"headers": {
"Origin": [
"https://www.youtube.com"
],
"Referer": [
"https://www.youtube.com"
],
"Accept-Language": [
"en-GB, en;q\u003d0.9"
]
},
"localization": {
"languageCode": "en",
"countryCode": "GB"
}
},
"response": {
"responseCode": 200,
"responseMessage": "",
"responseHeaders": {
"access-control-allow-credentials": [
"true"
],
"access-control-allow-origin": [
"https://www.youtube.com"
],
"alt-svc": [
"h3\u003d\":443\"; ma\u003d2592000,h3-29\u003d\":443\"; ma\u003d2592000"
],
"cache-control": [
"private, max-age\u003d0"
],
"content-type": [
"text/javascript; charset\u003dutf-8"
],
"cross-origin-opener-policy": [
"same-origin; report-to\u003d\"youtube_main\""
],
"date": [
"Sun, 16 Apr 2023 15:33:19 GMT"
],
"expires": [
"Sun, 16 Apr 2023 15:33:19 GMT"
],
"origin-trial": [
"AvC9UlR6RDk2crliDsFl66RWLnTbHrDbp+DiY6AYz/PNQ4G4tdUTjrHYr2sghbkhGQAVxb7jaPTHpEVBz0uzQwkAAAB4eyJvcmlnaW4iOiJodHRwczovL3lvdXR1YmUuY29tOjQ0MyIsImZlYXR1cmUiOiJXZWJWaWV3WFJlcXVlc3RlZFdpdGhEZXByZWNhdGlvbiIsImV4cGlyeSI6MTcxOTUzMjc5OSwiaXNTdWJkb21haW4iOnRydWV9"
],
"p3p": [
"CP\u003d\"This is not a P3P policy! See http://support.google.com/accounts/answer/151657?hl\u003den-GB for more info.\""
],
"permissions-policy": [
"ch-ua-arch\u003d*, ch-ua-bitness\u003d*, ch-ua-full-version\u003d*, ch-ua-full-version-list\u003d*, ch-ua-model\u003d*, ch-ua-wow64\u003d*, ch-ua-platform\u003d*, ch-ua-platform-version\u003d*"
],
"report-to": [
"{\"group\":\"youtube_main\",\"max_age\":2592000,\"endpoints\":[{\"url\":\"https://csp.withgoogle.com/csp/report-to/youtube_main\"}]}"
],
"server": [
"ESF"
],
"set-cookie": [
"YSC\u003dOCGx8FJdx2E; Domain\u003d.youtube.com; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
"VISITOR_INFO1_LIVE\u003d; Domain\u003d.youtube.com; Expires\u003dMon, 20-Jul-2020 15:33:19 GMT; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
"CONSENT\u003dPENDING+955; expires\u003dTue, 15-Apr-2025 15:33:19 GMT; path\u003d/; domain\u003d.youtube.com; Secure"
],
"strict-transport-security": [
"max-age\u003d31536000"
],
"x-content-type-options": [
"nosniff"
],
"x-frame-options": [
"SAMEORIGIN"
],
"x-xss-protection": [
"0"
]
},
"responseBody": "\n self.addEventListener(\u0027install\u0027, event \u003d\u003e {\n event.waitUntil(self.skipWaiting());\n });\n self.addEventListener(\u0027activate\u0027, event \u003d\u003e {\n event.waitUntil(\n self.clients.claim().then(() \u003d\u003e self.registration.unregister()));\n });\n ",
"latestUrl": "https://www.youtube.com/sw.js"
}
}