Merge pull request #968 from AudricV/yt-support-no-video-info-renderers-for-streams

[YouTube] Support lack of video info renderers for streams
This commit is contained in:
Kavin 2022-11-16 20:20:01 +00:00 committed by GitHub
commit c953e23414
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 1754 additions and 73 deletions

View File

@ -204,45 +204,48 @@ public class YoutubeStreamExtractor extends StreamExtractor {
return null;
}
if (getTextFromObject(getVideoPrimaryInfoRenderer().getObject("dateText"))
.startsWith("Premiered")) {
final String time = getTextFromObject(
getVideoPrimaryInfoRenderer().getObject("dateText")).substring(13);
final String videoPrimaryInfoRendererDateText =
getTextFromObject(getVideoPrimaryInfoRenderer().getObject("dateText"));
try { // Premiered 20 hours ago
final TimeAgoParser timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor(
Localization.fromLocalizationCode("en"));
final OffsetDateTime parsedTime = timeAgoParser.parse(time).offsetDateTime();
return DateTimeFormatter.ISO_LOCAL_DATE.format(parsedTime);
} catch (final Exception ignored) {
if (videoPrimaryInfoRendererDateText != null) {
if (videoPrimaryInfoRendererDateText.startsWith("Premiered")) {
final String time = videoPrimaryInfoRendererDateText.substring(13);
try { // Premiered 20 hours ago
final TimeAgoParser timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor(
Localization.fromLocalizationCode("en"));
final OffsetDateTime parsedTime = timeAgoParser.parse(time).offsetDateTime();
return DateTimeFormatter.ISO_LOCAL_DATE.format(parsedTime);
} catch (final Exception ignored) {
}
try { // Premiered Feb 21, 2020
final LocalDate localDate = LocalDate.parse(time,
DateTimeFormatter.ofPattern("MMM dd, yyyy", Locale.ENGLISH));
return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate);
} catch (final Exception ignored) {
}
try { // Premiered on 21 Feb 2020
final LocalDate localDate = LocalDate.parse(time,
DateTimeFormatter.ofPattern("dd MMM yyyy", Locale.ENGLISH));
return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate);
} catch (final Exception ignored) {
}
}
try { // Premiered Feb 21, 2020
final LocalDate localDate = LocalDate.parse(time,
DateTimeFormatter.ofPattern("MMM dd, yyyy", Locale.ENGLISH));
return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate);
} catch (final Exception ignored) {
}
try { // Premiered on 21 Feb 2020
final LocalDate localDate = LocalDate.parse(time,
try {
// TODO: this parses English formatted dates only, we need a better approach to
// parse the textual date
final LocalDate localDate = LocalDate.parse(videoPrimaryInfoRendererDateText,
DateTimeFormatter.ofPattern("dd MMM yyyy", Locale.ENGLISH));
return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate);
} catch (final Exception ignored) {
} catch (final Exception e) {
throw new ParsingException("Could not get upload date", e);
}
}
try {
// TODO: this parses English formatted dates only, we need a better approach to parse
// the textual date
final LocalDate localDate = LocalDate.parse(getTextFromObject(
getVideoPrimaryInfoRenderer().getObject("dateText")),
DateTimeFormatter.ofPattern("dd MMM yyyy", Locale.ENGLISH));
return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate);
} catch (final Exception e) {
throw new ParsingException("Could not get upload date", e);
}
throw new ParsingException("Could not get upload date");
}
@Override
@ -565,19 +568,13 @@ public class YoutubeStreamExtractor extends StreamExtractor {
public String getUploaderAvatarUrl() throws ParsingException {
assertPageFetched();
String url = null;
try {
url = getVideoSecondaryInfoRenderer()
.getObject("owner")
.getObject("videoOwnerRenderer")
.getObject("thumbnail")
.getArray("thumbnails")
.getObject(0)
.getString("url");
} catch (final ParsingException ignored) {
// Age-restricted videos cause a ParsingException here
}
final String url = getVideoSecondaryInfoRenderer()
.getObject("owner")
.getObject("videoOwnerRenderer")
.getObject("thumbnail")
.getArray("thumbnails")
.getObject(0)
.getString("url");
if (isNullOrEmpty(url)) {
if (ageLimit == NO_AGE_LIMIT) {
@ -1212,40 +1209,29 @@ public class YoutubeStreamExtractor extends StreamExtractor {
// Utils
//////////////////////////////////////////////////////////////////////////*/
private JsonObject getVideoPrimaryInfoRenderer() throws ParsingException {
@Nonnull
private JsonObject getVideoPrimaryInfoRenderer() {
if (videoPrimaryInfoRenderer != null) {
return videoPrimaryInfoRenderer;
}
final JsonArray contents = nextResponse.getObject("contents")
.getObject("twoColumnWatchNextResults").getObject("results").getObject("results")
.getArray("contents");
JsonObject theVideoPrimaryInfoRenderer = null;
for (final Object content : contents) {
if (((JsonObject) content).has("videoPrimaryInfoRenderer")) {
theVideoPrimaryInfoRenderer = ((JsonObject) content)
.getObject("videoPrimaryInfoRenderer");
break;
}
}
if (isNullOrEmpty(theVideoPrimaryInfoRenderer)) {
throw new ParsingException("Could not find videoPrimaryInfoRenderer");
}
videoPrimaryInfoRenderer = theVideoPrimaryInfoRenderer;
return theVideoPrimaryInfoRenderer;
videoPrimaryInfoRenderer = getVideoInfoRenderer("videoPrimaryInfoRenderer");
return videoPrimaryInfoRenderer;
}
@Nonnull
private JsonObject getVideoSecondaryInfoRenderer() throws ParsingException {
private JsonObject getVideoSecondaryInfoRenderer() {
if (videoSecondaryInfoRenderer != null) {
return videoSecondaryInfoRenderer;
}
videoSecondaryInfoRenderer = nextResponse
.getObject("contents")
videoSecondaryInfoRenderer = getVideoInfoRenderer("videoSecondaryInfoRenderer");
return videoSecondaryInfoRenderer;
}
@Nonnull
private JsonObject getVideoInfoRenderer(@Nonnull final String videoRendererName) {
return nextResponse.getObject("contents")
.getObject("twoColumnWatchNextResults")
.getObject("results")
.getObject("results")
@ -1253,13 +1239,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.filter(content -> content.has("videoSecondaryInfoRenderer"))
.map(content -> content.getObject("videoSecondaryInfoRenderer"))
.filter(content -> content.has(videoRendererName))
.map(content -> content.getObject(videoRendererName))
.findFirst()
.orElseThrow(
() -> new ParsingException("Could not find videoSecondaryInfoRenderer"));
return videoSecondaryInfoRenderer;
.orElse(new JsonObject());
}
@Nonnull

View File

@ -430,6 +430,69 @@ public class YoutubeStreamExtractorDefaultTest {
// @formatter:on
}
public static class NoVisualMetadataVideoTest extends DefaultStreamExtractorTest {
// Video without visual metadata on YouTube clients (video title, upload date, channel name,
// comments, ...)
private static final String ID = "An8vtD1FDqs";
private static final String URL = BASE_URL + ID;
private static StreamExtractor extractor;
@BeforeAll
public static void setUp() throws Exception {
YoutubeTestsUtils.ensureStateless();
NewPipe.init(DownloaderFactory.getDownloader(RESOURCE_PATH + "noVisualMetadata"));
extractor = YouTube.getStreamExtractor(URL);
extractor.fetchPage();
}
@Override public StreamType expectedStreamType() { return StreamType.VIDEO_STREAM; }
@Override public String expectedUploaderName() { return "Makani"; }
@Override public String expectedUploaderUrl() { return "https://www.youtube.com/channel/UC-iMZJ8NppwT2fLwzFWJKOQ"; }
@Override public List<String> expectedDescriptionContains() { return Arrays.asList("Makani", "prototype", "rotors"); }
@Override public long expectedLength() { return 175; }
@Override public long expectedViewCountAtLeast() { return 88_000; }
@Nullable @Override public String expectedUploadDate() { return "2017-05-16 00:00:00.000"; }
@Nullable @Override public String expectedTextualUploadDate() { return "2017-05-16"; }
@Override public long expectedLikeCountAtLeast() { return -1; }
@Override public long expectedDislikeCountAtLeast() { return -1; }
@Override public StreamExtractor extractor() { return extractor; }
@Override public StreamingService expectedService() { return YouTube; }
@Override public String expectedName() { return "Makanis first commercial-scale energy kite"; }
@Override public String expectedId() { return "An8vtD1FDqs"; }
@Override public String expectedUrlContains() { return BASE_URL + ID; }
@Override public String expectedOriginalUrlContains() { return URL; }
@Override public String expectedCategory() { return "Science & Technology"; }
@Override public String expectedLicence() { return YOUTUBE_LICENCE; }
@Override public List<String> expectedTags() {
return Arrays.asList("Makani", "Moonshot", "Moonshot Factory", "Prototyping",
"california", "california wind", "clean", "clean energy", "climate change",
"climate crisis", "energy", "energy kite", "google", "google x", "green",
"green energy", "kite", "kite power", "kite power solutions",
"kite power systems", "makani power", "power", "renewable", "renewable energy",
"renewable energy engineering", "renewable energy projects",
"renewable energy sources", "renewables", "solutions", "tech", "technology",
"turbine", "wind", "wind energy", "wind power", "wind turbine", "windmill");
}
@Test
@Override
public void testSubscriberCount() {
assertThrows(ParsingException.class, () -> extractor.getUploaderSubscriberCount());
}
@Test
@Override
public void testLikeCount() {
assertThrows(ParsingException.class, () -> extractor.getLikeCount());
}
@Test
@Override
public void testUploaderAvatarUrl() {
assertThrows(ParsingException.class, () -> extractor.getUploaderAvatarUrl());
}
}
public static class UnlistedTest {
private static YoutubeStreamExtractor extractor;

View File

@ -0,0 +1,73 @@
{
"request": {
"httpMethod": "GET",
"url": "https://www.youtube.com/iframe_api",
"headers": {
"Accept-Language": [
"en-GB, en;q\u003d0.9"
]
},
"localization": {
"languageCode": "en",
"countryCode": "GB"
}
},
"response": {
"responseCode": 200,
"responseMessage": "",
"responseHeaders": {
"alt-svc": [
"h3\u003d\":443\"; ma\u003d2592000,h3-29\u003d\":443\"; ma\u003d2592000,h3-Q050\u003d\":443\"; ma\u003d2592000,h3-Q046\u003d\":443\"; ma\u003d2592000,h3-Q043\u003d\":443\"; ma\u003d2592000,quic\u003d\":443\"; ma\u003d2592000; v\u003d\"46,43\""
],
"cache-control": [
"private, max-age\u003d0"
],
"content-type": [
"text/javascript; charset\u003dutf-8"
],
"cross-origin-opener-policy-report-only": [
"same-origin; report-to\u003d\"youtube_main\""
],
"cross-origin-resource-policy": [
"cross-origin"
],
"date": [
"Fri, 04 Nov 2022 18:36:38 GMT"
],
"expires": [
"Fri, 04 Nov 2022 18:36:38 GMT"
],
"p3p": [
"CP\u003d\"This is not a P3P policy! See http://support.google.com/accounts/answer/151657?hl\u003den-GB for more info.\""
],
"permissions-policy": [
"ch-ua-arch\u003d*, ch-ua-bitness\u003d*, ch-ua-full-version\u003d*, ch-ua-full-version-list\u003d*, ch-ua-model\u003d*, ch-ua-wow64\u003d*, ch-ua-platform\u003d*, ch-ua-platform-version\u003d*"
],
"report-to": [
"{\"group\":\"youtube_main\",\"max_age\":2592000,\"endpoints\":[{\"url\":\"https://csp.withgoogle.com/csp/report-to/youtube_main\"}]}"
],
"server": [
"ESF"
],
"set-cookie": [
"YSC\u003dUBx6tMGNmRg; Domain\u003d.youtube.com; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
"VISITOR_INFO1_LIVE\u003dvY4W1Ai6Us0; Domain\u003d.youtube.com; Expires\u003dWed, 03-May-2023 18:36:38 GMT; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
"CONSENT\u003dPENDING+815; expires\u003dSun, 03-Nov-2024 18:36:38 GMT; path\u003d/; domain\u003d.youtube.com; Secure"
],
"strict-transport-security": [
"max-age\u003d31536000"
],
"x-content-type-options": [
"nosniff"
],
"x-frame-options": [
"SAMEORIGIN"
],
"x-xss-protection": [
"0"
]
},
"responseBody": "var scriptUrl \u003d \u0027https:\\/\\/www.youtube.com\\/s\\/player\\/03bec62d\\/www-widgetapi.vflset\\/www-widgetapi.js\u0027;try{var ttPolicy\u003dwindow.trustedTypes.createPolicy(\"youtube-widget-api\",{createScriptURL:function(x){return x}});scriptUrl\u003dttPolicy.createScriptURL(scriptUrl)}catch(e){}var YT;if(!window[\"YT\"])YT\u003d{loading:0,loaded:0};var YTConfig;if(!window[\"YTConfig\"])YTConfig\u003d{\"host\":\"https://www.youtube.com\"};\nif(!YT.loading){YT.loading\u003d1;(function(){var l\u003d[];YT.ready\u003dfunction(f){if(YT.loaded)f();else l.push(f)};window.onYTReady\u003dfunction(){YT.loaded\u003d1;for(var i\u003d0;i\u003cl.length;i++)try{l[i]()}catch(e$0){}};YT.setConfig\u003dfunction(c){for(var k in c)if(c.hasOwnProperty(k))YTConfig[k]\u003dc[k]};var a\u003ddocument.createElement(\"script\");a.type\u003d\"text/javascript\";a.id\u003d\"www-widgetapi-script\";a.src\u003dscriptUrl;a.async\u003dtrue;var c\u003ddocument.currentScript;if(c){var n\u003dc.nonce||c.getAttribute(\"nonce\");if(n)a.setAttribute(\"nonce\",n)}var b\u003d\ndocument.getElementsByTagName(\"script\")[0];b.parentNode.insertBefore(a,b)})()};\n",
"latestUrl": "https://www.youtube.com/iframe_api"
}
}

View File

@ -0,0 +1,82 @@
{
"request": {
"httpMethod": "GET",
"url": "https://www.youtube.com/sw.js",
"headers": {
"Origin": [
"https://www.youtube.com"
],
"Referer": [
"https://www.youtube.com"
],
"Accept-Language": [
"en-GB, en;q\u003d0.9"
]
},
"localization": {
"languageCode": "en",
"countryCode": "GB"
}
},
"response": {
"responseCode": 200,
"responseMessage": "",
"responseHeaders": {
"access-control-allow-credentials": [
"true"
],
"access-control-allow-origin": [
"https://www.youtube.com"
],
"alt-svc": [
"h3\u003d\":443\"; ma\u003d2592000,h3-29\u003d\":443\"; ma\u003d2592000,h3-Q050\u003d\":443\"; ma\u003d2592000,h3-Q046\u003d\":443\"; ma\u003d2592000,h3-Q043\u003d\":443\"; ma\u003d2592000,quic\u003d\":443\"; ma\u003d2592000; v\u003d\"46,43\""
],
"cache-control": [
"private, max-age\u003d0"
],
"content-type": [
"text/javascript; charset\u003dutf-8"
],
"cross-origin-opener-policy-report-only": [
"same-origin; report-to\u003d\"youtube_main\""
],
"date": [
"Fri, 04 Nov 2022 18:36:42 GMT"
],
"expires": [
"Fri, 04 Nov 2022 18:36:42 GMT"
],
"p3p": [
"CP\u003d\"This is not a P3P policy! See http://support.google.com/accounts/answer/151657?hl\u003den-GB for more info.\""
],
"permissions-policy": [
"ch-ua-arch\u003d*, ch-ua-bitness\u003d*, ch-ua-full-version\u003d*, ch-ua-full-version-list\u003d*, ch-ua-model\u003d*, ch-ua-wow64\u003d*, ch-ua-platform\u003d*, ch-ua-platform-version\u003d*"
],
"report-to": [
"{\"group\":\"youtube_main\",\"max_age\":2592000,\"endpoints\":[{\"url\":\"https://csp.withgoogle.com/csp/report-to/youtube_main\"}]}"
],
"server": [
"ESF"
],
"set-cookie": [
"YSC\u003dOWXOpWivHI0; Domain\u003d.youtube.com; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
"VISITOR_INFO1_LIVE\u003d; Domain\u003d.youtube.com; Expires\u003dSat, 08-Feb-2020 18:36:42 GMT; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
"CONSENT\u003dPENDING+611; expires\u003dSun, 03-Nov-2024 18:36:42 GMT; path\u003d/; domain\u003d.youtube.com; Secure"
],
"strict-transport-security": [
"max-age\u003d31536000"
],
"x-content-type-options": [
"nosniff"
],
"x-frame-options": [
"SAMEORIGIN"
],
"x-xss-protection": [
"0"
]
},
"responseBody": "\n self.addEventListener(\u0027install\u0027, event \u003d\u003e {\n event.waitUntil(self.skipWaiting());\n });\n self.addEventListener(\u0027activate\u0027, event \u003d\u003e {\n event.waitUntil(\n self.clients.claim().then(() \u003d\u003e self.registration.unregister()));\n });\n ",
"latestUrl": "https://www.youtube.com/sw.js"
}
}