Merge pull request #968 from AudricV/yt-support-no-video-info-renderers-for-streams

[YouTube] Support lack of video info renderers for streams
This commit is contained in:
Kavin 2022-11-16 20:20:01 +00:00 committed by GitHub
commit c953e23414
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 1754 additions and 73 deletions

View File

@ -204,45 +204,48 @@ public class YoutubeStreamExtractor extends StreamExtractor {
return null; return null;
} }
if (getTextFromObject(getVideoPrimaryInfoRenderer().getObject("dateText")) final String videoPrimaryInfoRendererDateText =
.startsWith("Premiered")) { getTextFromObject(getVideoPrimaryInfoRenderer().getObject("dateText"));
final String time = getTextFromObject(
getVideoPrimaryInfoRenderer().getObject("dateText")).substring(13);
try { // Premiered 20 hours ago if (videoPrimaryInfoRendererDateText != null) {
final TimeAgoParser timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor( if (videoPrimaryInfoRendererDateText.startsWith("Premiered")) {
Localization.fromLocalizationCode("en")); final String time = videoPrimaryInfoRendererDateText.substring(13);
final OffsetDateTime parsedTime = timeAgoParser.parse(time).offsetDateTime();
return DateTimeFormatter.ISO_LOCAL_DATE.format(parsedTime); try { // Premiered 20 hours ago
} catch (final Exception ignored) { final TimeAgoParser timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor(
Localization.fromLocalizationCode("en"));
final OffsetDateTime parsedTime = timeAgoParser.parse(time).offsetDateTime();
return DateTimeFormatter.ISO_LOCAL_DATE.format(parsedTime);
} catch (final Exception ignored) {
}
try { // Premiered Feb 21, 2020
final LocalDate localDate = LocalDate.parse(time,
DateTimeFormatter.ofPattern("MMM dd, yyyy", Locale.ENGLISH));
return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate);
} catch (final Exception ignored) {
}
try { // Premiered on 21 Feb 2020
final LocalDate localDate = LocalDate.parse(time,
DateTimeFormatter.ofPattern("dd MMM yyyy", Locale.ENGLISH));
return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate);
} catch (final Exception ignored) {
}
} }
try { // Premiered Feb 21, 2020 try {
final LocalDate localDate = LocalDate.parse(time, // TODO: this parses English formatted dates only, we need a better approach to
DateTimeFormatter.ofPattern("MMM dd, yyyy", Locale.ENGLISH)); // parse the textual date
return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate); final LocalDate localDate = LocalDate.parse(videoPrimaryInfoRendererDateText,
} catch (final Exception ignored) {
}
try { // Premiered on 21 Feb 2020
final LocalDate localDate = LocalDate.parse(time,
DateTimeFormatter.ofPattern("dd MMM yyyy", Locale.ENGLISH)); DateTimeFormatter.ofPattern("dd MMM yyyy", Locale.ENGLISH));
return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate); return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate);
} catch (final Exception ignored) { } catch (final Exception e) {
throw new ParsingException("Could not get upload date", e);
} }
} }
try { throw new ParsingException("Could not get upload date");
// TODO: this parses English formatted dates only, we need a better approach to parse
// the textual date
final LocalDate localDate = LocalDate.parse(getTextFromObject(
getVideoPrimaryInfoRenderer().getObject("dateText")),
DateTimeFormatter.ofPattern("dd MMM yyyy", Locale.ENGLISH));
return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate);
} catch (final Exception e) {
throw new ParsingException("Could not get upload date", e);
}
} }
@Override @Override
@ -565,19 +568,13 @@ public class YoutubeStreamExtractor extends StreamExtractor {
public String getUploaderAvatarUrl() throws ParsingException { public String getUploaderAvatarUrl() throws ParsingException {
assertPageFetched(); assertPageFetched();
String url = null; final String url = getVideoSecondaryInfoRenderer()
.getObject("owner")
try { .getObject("videoOwnerRenderer")
url = getVideoSecondaryInfoRenderer() .getObject("thumbnail")
.getObject("owner") .getArray("thumbnails")
.getObject("videoOwnerRenderer") .getObject(0)
.getObject("thumbnail") .getString("url");
.getArray("thumbnails")
.getObject(0)
.getString("url");
} catch (final ParsingException ignored) {
// Age-restricted videos cause a ParsingException here
}
if (isNullOrEmpty(url)) { if (isNullOrEmpty(url)) {
if (ageLimit == NO_AGE_LIMIT) { if (ageLimit == NO_AGE_LIMIT) {
@ -1212,40 +1209,29 @@ public class YoutubeStreamExtractor extends StreamExtractor {
// Utils // Utils
//////////////////////////////////////////////////////////////////////////*/ //////////////////////////////////////////////////////////////////////////*/
private JsonObject getVideoPrimaryInfoRenderer() throws ParsingException { @Nonnull
private JsonObject getVideoPrimaryInfoRenderer() {
if (videoPrimaryInfoRenderer != null) { if (videoPrimaryInfoRenderer != null) {
return videoPrimaryInfoRenderer; return videoPrimaryInfoRenderer;
} }
final JsonArray contents = nextResponse.getObject("contents") videoPrimaryInfoRenderer = getVideoInfoRenderer("videoPrimaryInfoRenderer");
.getObject("twoColumnWatchNextResults").getObject("results").getObject("results") return videoPrimaryInfoRenderer;
.getArray("contents");
JsonObject theVideoPrimaryInfoRenderer = null;
for (final Object content : contents) {
if (((JsonObject) content).has("videoPrimaryInfoRenderer")) {
theVideoPrimaryInfoRenderer = ((JsonObject) content)
.getObject("videoPrimaryInfoRenderer");
break;
}
}
if (isNullOrEmpty(theVideoPrimaryInfoRenderer)) {
throw new ParsingException("Could not find videoPrimaryInfoRenderer");
}
videoPrimaryInfoRenderer = theVideoPrimaryInfoRenderer;
return theVideoPrimaryInfoRenderer;
} }
@Nonnull @Nonnull
private JsonObject getVideoSecondaryInfoRenderer() throws ParsingException { private JsonObject getVideoSecondaryInfoRenderer() {
if (videoSecondaryInfoRenderer != null) { if (videoSecondaryInfoRenderer != null) {
return videoSecondaryInfoRenderer; return videoSecondaryInfoRenderer;
} }
videoSecondaryInfoRenderer = nextResponse videoSecondaryInfoRenderer = getVideoInfoRenderer("videoSecondaryInfoRenderer");
.getObject("contents") return videoSecondaryInfoRenderer;
}
@Nonnull
private JsonObject getVideoInfoRenderer(@Nonnull final String videoRendererName) {
return nextResponse.getObject("contents")
.getObject("twoColumnWatchNextResults") .getObject("twoColumnWatchNextResults")
.getObject("results") .getObject("results")
.getObject("results") .getObject("results")
@ -1253,13 +1239,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
.stream() .stream()
.filter(JsonObject.class::isInstance) .filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast) .map(JsonObject.class::cast)
.filter(content -> content.has("videoSecondaryInfoRenderer")) .filter(content -> content.has(videoRendererName))
.map(content -> content.getObject("videoSecondaryInfoRenderer")) .map(content -> content.getObject(videoRendererName))
.findFirst() .findFirst()
.orElseThrow( .orElse(new JsonObject());
() -> new ParsingException("Could not find videoSecondaryInfoRenderer"));
return videoSecondaryInfoRenderer;
} }
@Nonnull @Nonnull

View File

@ -430,6 +430,69 @@ public class YoutubeStreamExtractorDefaultTest {
// @formatter:on // @formatter:on
} }
public static class NoVisualMetadataVideoTest extends DefaultStreamExtractorTest {
// Video without visual metadata on YouTube clients (video title, upload date, channel name,
// comments, ...)
private static final String ID = "An8vtD1FDqs";
private static final String URL = BASE_URL + ID;
private static StreamExtractor extractor;
@BeforeAll
public static void setUp() throws Exception {
YoutubeTestsUtils.ensureStateless();
NewPipe.init(DownloaderFactory.getDownloader(RESOURCE_PATH + "noVisualMetadata"));
extractor = YouTube.getStreamExtractor(URL);
extractor.fetchPage();
}
@Override public StreamType expectedStreamType() { return StreamType.VIDEO_STREAM; }
@Override public String expectedUploaderName() { return "Makani"; }
@Override public String expectedUploaderUrl() { return "https://www.youtube.com/channel/UC-iMZJ8NppwT2fLwzFWJKOQ"; }
@Override public List<String> expectedDescriptionContains() { return Arrays.asList("Makani", "prototype", "rotors"); }
@Override public long expectedLength() { return 175; }
@Override public long expectedViewCountAtLeast() { return 88_000; }
@Nullable @Override public String expectedUploadDate() { return "2017-05-16 00:00:00.000"; }
@Nullable @Override public String expectedTextualUploadDate() { return "2017-05-16"; }
@Override public long expectedLikeCountAtLeast() { return -1; }
@Override public long expectedDislikeCountAtLeast() { return -1; }
@Override public StreamExtractor extractor() { return extractor; }
@Override public StreamingService expectedService() { return YouTube; }
@Override public String expectedName() { return "Makanis first commercial-scale energy kite"; }
@Override public String expectedId() { return "An8vtD1FDqs"; }
@Override public String expectedUrlContains() { return BASE_URL + ID; }
@Override public String expectedOriginalUrlContains() { return URL; }
@Override public String expectedCategory() { return "Science & Technology"; }
@Override public String expectedLicence() { return YOUTUBE_LICENCE; }
@Override public List<String> expectedTags() {
return Arrays.asList("Makani", "Moonshot", "Moonshot Factory", "Prototyping",
"california", "california wind", "clean", "clean energy", "climate change",
"climate crisis", "energy", "energy kite", "google", "google x", "green",
"green energy", "kite", "kite power", "kite power solutions",
"kite power systems", "makani power", "power", "renewable", "renewable energy",
"renewable energy engineering", "renewable energy projects",
"renewable energy sources", "renewables", "solutions", "tech", "technology",
"turbine", "wind", "wind energy", "wind power", "wind turbine", "windmill");
}
@Test
@Override
public void testSubscriberCount() {
assertThrows(ParsingException.class, () -> extractor.getUploaderSubscriberCount());
}
@Test
@Override
public void testLikeCount() {
assertThrows(ParsingException.class, () -> extractor.getLikeCount());
}
@Test
@Override
public void testUploaderAvatarUrl() {
assertThrows(ParsingException.class, () -> extractor.getUploaderAvatarUrl());
}
}
public static class UnlistedTest { public static class UnlistedTest {
private static YoutubeStreamExtractor extractor; private static YoutubeStreamExtractor extractor;

View File

@ -0,0 +1,73 @@
{
"request": {
"httpMethod": "GET",
"url": "https://www.youtube.com/iframe_api",
"headers": {
"Accept-Language": [
"en-GB, en;q\u003d0.9"
]
},
"localization": {
"languageCode": "en",
"countryCode": "GB"
}
},
"response": {
"responseCode": 200,
"responseMessage": "",
"responseHeaders": {
"alt-svc": [
"h3\u003d\":443\"; ma\u003d2592000,h3-29\u003d\":443\"; ma\u003d2592000,h3-Q050\u003d\":443\"; ma\u003d2592000,h3-Q046\u003d\":443\"; ma\u003d2592000,h3-Q043\u003d\":443\"; ma\u003d2592000,quic\u003d\":443\"; ma\u003d2592000; v\u003d\"46,43\""
],
"cache-control": [
"private, max-age\u003d0"
],
"content-type": [
"text/javascript; charset\u003dutf-8"
],
"cross-origin-opener-policy-report-only": [
"same-origin; report-to\u003d\"youtube_main\""
],
"cross-origin-resource-policy": [
"cross-origin"
],
"date": [
"Fri, 04 Nov 2022 18:36:38 GMT"
],
"expires": [
"Fri, 04 Nov 2022 18:36:38 GMT"
],
"p3p": [
"CP\u003d\"This is not a P3P policy! See http://support.google.com/accounts/answer/151657?hl\u003den-GB for more info.\""
],
"permissions-policy": [
"ch-ua-arch\u003d*, ch-ua-bitness\u003d*, ch-ua-full-version\u003d*, ch-ua-full-version-list\u003d*, ch-ua-model\u003d*, ch-ua-wow64\u003d*, ch-ua-platform\u003d*, ch-ua-platform-version\u003d*"
],
"report-to": [
"{\"group\":\"youtube_main\",\"max_age\":2592000,\"endpoints\":[{\"url\":\"https://csp.withgoogle.com/csp/report-to/youtube_main\"}]}"
],
"server": [
"ESF"
],
"set-cookie": [
"YSC\u003dUBx6tMGNmRg; Domain\u003d.youtube.com; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
"VISITOR_INFO1_LIVE\u003dvY4W1Ai6Us0; Domain\u003d.youtube.com; Expires\u003dWed, 03-May-2023 18:36:38 GMT; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
"CONSENT\u003dPENDING+815; expires\u003dSun, 03-Nov-2024 18:36:38 GMT; path\u003d/; domain\u003d.youtube.com; Secure"
],
"strict-transport-security": [
"max-age\u003d31536000"
],
"x-content-type-options": [
"nosniff"
],
"x-frame-options": [
"SAMEORIGIN"
],
"x-xss-protection": [
"0"
]
},
"responseBody": "var scriptUrl \u003d \u0027https:\\/\\/www.youtube.com\\/s\\/player\\/03bec62d\\/www-widgetapi.vflset\\/www-widgetapi.js\u0027;try{var ttPolicy\u003dwindow.trustedTypes.createPolicy(\"youtube-widget-api\",{createScriptURL:function(x){return x}});scriptUrl\u003dttPolicy.createScriptURL(scriptUrl)}catch(e){}var YT;if(!window[\"YT\"])YT\u003d{loading:0,loaded:0};var YTConfig;if(!window[\"YTConfig\"])YTConfig\u003d{\"host\":\"https://www.youtube.com\"};\nif(!YT.loading){YT.loading\u003d1;(function(){var l\u003d[];YT.ready\u003dfunction(f){if(YT.loaded)f();else l.push(f)};window.onYTReady\u003dfunction(){YT.loaded\u003d1;for(var i\u003d0;i\u003cl.length;i++)try{l[i]()}catch(e$0){}};YT.setConfig\u003dfunction(c){for(var k in c)if(c.hasOwnProperty(k))YTConfig[k]\u003dc[k]};var a\u003ddocument.createElement(\"script\");a.type\u003d\"text/javascript\";a.id\u003d\"www-widgetapi-script\";a.src\u003dscriptUrl;a.async\u003dtrue;var c\u003ddocument.currentScript;if(c){var n\u003dc.nonce||c.getAttribute(\"nonce\");if(n)a.setAttribute(\"nonce\",n)}var b\u003d\ndocument.getElementsByTagName(\"script\")[0];b.parentNode.insertBefore(a,b)})()};\n",
"latestUrl": "https://www.youtube.com/iframe_api"
}
}

View File

@ -0,0 +1,82 @@
{
"request": {
"httpMethod": "GET",
"url": "https://www.youtube.com/sw.js",
"headers": {
"Origin": [
"https://www.youtube.com"
],
"Referer": [
"https://www.youtube.com"
],
"Accept-Language": [
"en-GB, en;q\u003d0.9"
]
},
"localization": {
"languageCode": "en",
"countryCode": "GB"
}
},
"response": {
"responseCode": 200,
"responseMessage": "",
"responseHeaders": {
"access-control-allow-credentials": [
"true"
],
"access-control-allow-origin": [
"https://www.youtube.com"
],
"alt-svc": [
"h3\u003d\":443\"; ma\u003d2592000,h3-29\u003d\":443\"; ma\u003d2592000,h3-Q050\u003d\":443\"; ma\u003d2592000,h3-Q046\u003d\":443\"; ma\u003d2592000,h3-Q043\u003d\":443\"; ma\u003d2592000,quic\u003d\":443\"; ma\u003d2592000; v\u003d\"46,43\""
],
"cache-control": [
"private, max-age\u003d0"
],
"content-type": [
"text/javascript; charset\u003dutf-8"
],
"cross-origin-opener-policy-report-only": [
"same-origin; report-to\u003d\"youtube_main\""
],
"date": [
"Fri, 04 Nov 2022 18:36:42 GMT"
],
"expires": [
"Fri, 04 Nov 2022 18:36:42 GMT"
],
"p3p": [
"CP\u003d\"This is not a P3P policy! See http://support.google.com/accounts/answer/151657?hl\u003den-GB for more info.\""
],
"permissions-policy": [
"ch-ua-arch\u003d*, ch-ua-bitness\u003d*, ch-ua-full-version\u003d*, ch-ua-full-version-list\u003d*, ch-ua-model\u003d*, ch-ua-wow64\u003d*, ch-ua-platform\u003d*, ch-ua-platform-version\u003d*"
],
"report-to": [
"{\"group\":\"youtube_main\",\"max_age\":2592000,\"endpoints\":[{\"url\":\"https://csp.withgoogle.com/csp/report-to/youtube_main\"}]}"
],
"server": [
"ESF"
],
"set-cookie": [
"YSC\u003dOWXOpWivHI0; Domain\u003d.youtube.com; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
"VISITOR_INFO1_LIVE\u003d; Domain\u003d.youtube.com; Expires\u003dSat, 08-Feb-2020 18:36:42 GMT; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
"CONSENT\u003dPENDING+611; expires\u003dSun, 03-Nov-2024 18:36:42 GMT; path\u003d/; domain\u003d.youtube.com; Secure"
],
"strict-transport-security": [
"max-age\u003d31536000"
],
"x-content-type-options": [
"nosniff"
],
"x-frame-options": [
"SAMEORIGIN"
],
"x-xss-protection": [
"0"
]
},
"responseBody": "\n self.addEventListener(\u0027install\u0027, event \u003d\u003e {\n event.waitUntil(self.skipWaiting());\n });\n self.addEventListener(\u0027activate\u0027, event \u003d\u003e {\n event.waitUntil(\n self.clients.claim().then(() \u003d\u003e self.registration.unregister()));\n });\n ",
"latestUrl": "https://www.youtube.com/sw.js"
}
}