2018-05-08 21:19:03 +02:00
|
|
|
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
2017-03-01 18:47:52 +01:00
|
|
|
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.CPN;
|
|
|
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.VIDEO_ID;
|
|
|
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.createDesktopPlayerBody;
|
2022-03-18 15:09:06 +01:00
|
|
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.fixThumbnailUrl;
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.generateContentPlaybackNonce;
|
|
|
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.generateTParameter;
|
|
|
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonAndroidPostResponse;
|
2022-03-18 15:09:06 +01:00
|
|
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
|
|
|
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
|
|
|
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareAndroidMobileEmbedVideoJsonBuilder;
|
|
|
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareAndroidMobileJsonBuilder;
|
|
|
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopEmbedVideoJsonBuilder;
|
|
|
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
|
|
|
|
import static org.schabi.newpipe.extractor.utils.Utils.EMPTY_STRING;
|
|
|
|
import static org.schabi.newpipe.extractor.utils.Utils.UTF_8;
|
|
|
|
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
|
|
|
|
|
2017-11-22 18:39:38 +01:00
|
|
|
import com.grack.nanojson.JsonArray;
|
2017-08-16 04:40:03 +02:00
|
|
|
import com.grack.nanojson.JsonObject;
|
2021-05-23 17:55:19 +02:00
|
|
|
import com.grack.nanojson.JsonWriter;
|
2021-04-15 18:58:59 +02:00
|
|
|
|
2017-03-01 18:47:52 +01:00
|
|
|
import org.mozilla.javascript.Context;
|
|
|
|
import org.mozilla.javascript.Function;
|
|
|
|
import org.mozilla.javascript.ScriptableObject;
|
2019-04-28 22:03:16 +02:00
|
|
|
import org.schabi.newpipe.extractor.MediaFormat;
|
2020-12-20 19:54:12 +01:00
|
|
|
import org.schabi.newpipe.extractor.MetaInfo;
|
2022-02-02 20:23:11 +01:00
|
|
|
import org.schabi.newpipe.extractor.MultiInfoItemsCollector;
|
2019-04-28 22:03:16 +02:00
|
|
|
import org.schabi.newpipe.extractor.StreamingService;
|
|
|
|
import org.schabi.newpipe.extractor.downloader.Downloader;
|
2021-04-15 18:58:59 +02:00
|
|
|
import org.schabi.newpipe.extractor.exceptions.AgeRestrictedContentException;
|
|
|
|
import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
|
|
|
|
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
|
|
|
import org.schabi.newpipe.extractor.exceptions.GeographicRestrictionException;
|
|
|
|
import org.schabi.newpipe.extractor.exceptions.PaidContentException;
|
|
|
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
|
|
|
import org.schabi.newpipe.extractor.exceptions.PrivateContentException;
|
|
|
|
import org.schabi.newpipe.extractor.exceptions.YoutubeMusicPremiumContentException;
|
2018-07-13 18:02:40 +02:00
|
|
|
import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
|
2021-05-23 17:55:19 +02:00
|
|
|
import org.schabi.newpipe.extractor.localization.ContentCountry;
|
2019-11-03 19:45:25 +01:00
|
|
|
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
2020-02-25 21:19:53 +01:00
|
|
|
import org.schabi.newpipe.extractor.localization.Localization;
|
2019-12-16 08:35:43 +01:00
|
|
|
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
|
2020-02-25 21:19:53 +01:00
|
|
|
import org.schabi.newpipe.extractor.localization.TimeAgoPatternsManager;
|
2018-05-08 21:19:03 +02:00
|
|
|
import org.schabi.newpipe.extractor.services.youtube.ItagItem;
|
2021-07-20 20:48:11 +02:00
|
|
|
import org.schabi.newpipe.extractor.services.youtube.YoutubeJavaScriptExtractor;
|
2020-04-10 10:51:05 +02:00
|
|
|
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
|
2021-07-17 19:10:09 +02:00
|
|
|
import org.schabi.newpipe.extractor.services.youtube.YoutubeThrottlingDecrypter;
|
2020-05-30 10:25:43 +02:00
|
|
|
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
|
2022-03-18 15:09:06 +01:00
|
|
|
import org.schabi.newpipe.extractor.stream.AudioStream;
|
|
|
|
import org.schabi.newpipe.extractor.stream.Description;
|
|
|
|
import org.schabi.newpipe.extractor.stream.Frameset;
|
|
|
|
import org.schabi.newpipe.extractor.stream.Stream;
|
|
|
|
import org.schabi.newpipe.extractor.stream.StreamExtractor;
|
|
|
|
import org.schabi.newpipe.extractor.stream.StreamSegment;
|
|
|
|
import org.schabi.newpipe.extractor.stream.StreamType;
|
|
|
|
import org.schabi.newpipe.extractor.stream.SubtitlesStream;
|
|
|
|
import org.schabi.newpipe.extractor.stream.VideoStream;
|
2020-02-09 11:59:23 +01:00
|
|
|
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
2017-06-29 20:12:55 +02:00
|
|
|
import org.schabi.newpipe.extractor.utils.Parser;
|
2017-07-11 05:08:03 +02:00
|
|
|
import org.schabi.newpipe.extractor.utils.Utils;
|
2017-03-01 18:47:52 +01:00
|
|
|
|
|
|
|
import java.io.IOException;
|
2020-12-20 19:54:12 +01:00
|
|
|
import java.io.UnsupportedEncodingException;
|
2020-10-18 05:48:14 +02:00
|
|
|
import java.time.LocalDate;
|
|
|
|
import java.time.OffsetDateTime;
|
|
|
|
import java.time.format.DateTimeFormatter;
|
2022-03-18 15:09:06 +01:00
|
|
|
import java.util.ArrayList;
|
|
|
|
import java.util.Collections;
|
|
|
|
import java.util.LinkedHashMap;
|
|
|
|
import java.util.List;
|
|
|
|
import java.util.Locale;
|
|
|
|
import java.util.Map;
|
|
|
|
import java.util.Objects;
|
|
|
|
import javax.annotation.Nonnull;
|
|
|
|
import javax.annotation.Nullable;
|
2020-02-27 17:39:23 +01:00
|
|
|
|
2017-06-29 20:12:55 +02:00
|
|
|
/*
|
2017-03-01 18:47:52 +01:00
|
|
|
* Created by Christian Schabesberger on 06.08.15.
|
|
|
|
*
|
2019-03-14 08:49:11 +01:00
|
|
|
* Copyright (C) Christian Schabesberger 2019 <chris.schabesberger@mailbox.org>
|
2017-03-01 18:47:52 +01:00
|
|
|
* YoutubeStreamExtractor.java is part of NewPipe.
|
|
|
|
*
|
|
|
|
* NewPipe is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* NewPipe is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
|
|
|
public class YoutubeStreamExtractor extends StreamExtractor {
|
2017-07-11 05:08:03 +02:00
|
|
|
/*//////////////////////////////////////////////////////////////////////////
|
|
|
|
// Exceptions
|
|
|
|
//////////////////////////////////////////////////////////////////////////*/
|
2017-03-01 18:47:52 +01:00
|
|
|
|
2020-10-29 18:44:05 +01:00
|
|
|
public static class DeobfuscateException extends ParsingException {
|
2021-02-24 17:06:38 +01:00
|
|
|
DeobfuscateException(final String message, final Throwable cause) {
|
2017-03-01 18:47:52 +01:00
|
|
|
super(message, cause);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-07-11 05:08:03 +02:00
|
|
|
/*//////////////////////////////////////////////////////////////////////////*/
|
2017-03-01 18:47:52 +01:00
|
|
|
|
2021-02-07 22:42:21 +01:00
|
|
|
@Nullable
|
|
|
|
private static String cachedDeobfuscationCode = null;
|
2021-05-23 17:55:19 +02:00
|
|
|
@Nullable
|
2021-06-05 18:22:18 +02:00
|
|
|
private static String sts = null;
|
2021-05-29 14:43:26 +02:00
|
|
|
@Nullable
|
2021-06-05 18:22:18 +02:00
|
|
|
private static String playerCode = null;
|
2021-05-23 17:55:19 +02:00
|
|
|
|
2022-01-15 17:25:00 +01:00
|
|
|
private static boolean isAndroidClientFetchForced = false;
|
|
|
|
private static boolean isIosClientFetchForced = false;
|
|
|
|
|
2020-10-29 18:44:05 +01:00
|
|
|
private JsonObject playerResponse;
|
2021-05-23 17:55:19 +02:00
|
|
|
private JsonObject nextResponse;
|
2021-05-29 14:43:26 +02:00
|
|
|
|
|
|
|
@Nullable
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
private JsonObject desktopStreamingData;
|
|
|
|
@Nullable
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
private JsonObject androidStreamingData;
|
2022-01-15 17:25:00 +01:00
|
|
|
@Nullable
|
|
|
|
private JsonObject iosStreamingData;
|
|
|
|
|
2020-02-28 17:03:21 +01:00
|
|
|
private JsonObject videoPrimaryInfoRenderer;
|
|
|
|
private JsonObject videoSecondaryInfoRenderer;
|
2020-10-29 18:44:05 +01:00
|
|
|
private int ageLimit = -1;
|
2022-01-15 17:25:00 +01:00
|
|
|
private StreamType streamType;
|
2021-02-07 22:42:21 +01:00
|
|
|
@Nullable
|
|
|
|
private List<SubtitlesStream> subtitles = null;
|
2017-08-10 19:50:59 +02:00
|
|
|
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
private String desktopCpn;
|
|
|
|
private String androidCpn;
|
2022-01-15 17:25:00 +01:00
|
|
|
private String iosCpn;
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
|
2021-02-24 17:06:38 +01:00
|
|
|
public YoutubeStreamExtractor(final StreamingService service, final LinkHandler linkHandler) {
|
2019-04-28 22:03:16 +02:00
|
|
|
super(service, linkHandler);
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
|
2017-07-11 05:08:03 +02:00
|
|
|
/*//////////////////////////////////////////////////////////////////////////
|
|
|
|
// Impl
|
|
|
|
//////////////////////////////////////////////////////////////////////////*/
|
2017-03-01 18:47:52 +01:00
|
|
|
|
2017-11-25 01:10:04 +01:00
|
|
|
@Nonnull
|
2017-03-01 18:47:52 +01:00
|
|
|
@Override
|
2017-08-11 03:23:09 +02:00
|
|
|
public String getName() throws ParsingException {
|
2017-11-30 10:49:27 +01:00
|
|
|
assertPageFetched();
|
2020-07-18 09:50:22 +02:00
|
|
|
String title = null;
|
|
|
|
|
|
|
|
try {
|
|
|
|
title = getTextFromObject(getVideoPrimaryInfoRenderer().getObject("title"));
|
2021-02-24 17:06:38 +01:00
|
|
|
} catch (final ParsingException ignored) {
|
2021-06-02 21:24:55 +02:00
|
|
|
// Age-restricted videos cause a ParsingException here
|
2020-07-26 14:15:13 +02:00
|
|
|
}
|
2020-02-29 17:18:50 +01:00
|
|
|
|
2020-05-11 11:40:24 +02:00
|
|
|
if (isNullOrEmpty(title)) {
|
2020-04-16 16:08:14 +02:00
|
|
|
title = playerResponse.getObject("videoDetails").getString("title");
|
2020-02-29 17:18:50 +01:00
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
if (isNullOrEmpty(title)) {
|
|
|
|
throw new ParsingException("Could not get name");
|
|
|
|
}
|
2017-11-25 01:10:04 +01:00
|
|
|
}
|
2020-02-29 17:18:50 +01:00
|
|
|
|
|
|
|
return title;
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
|
2020-11-04 14:50:35 +01:00
|
|
|
@Nullable
|
2017-03-01 18:47:52 +01:00
|
|
|
@Override
|
2019-04-28 22:03:16 +02:00
|
|
|
public String getTextualUploadDate() throws ParsingException {
|
2021-04-15 18:58:59 +02:00
|
|
|
final JsonObject micro = playerResponse.getObject("microformat")
|
|
|
|
.getObject("playerMicroformatRenderer");
|
2020-11-04 14:50:35 +01:00
|
|
|
if (!micro.getString("uploadDate", EMPTY_STRING).isEmpty()) {
|
2020-04-16 16:08:14 +02:00
|
|
|
return micro.getString("uploadDate");
|
2020-11-04 14:50:35 +01:00
|
|
|
} else if (!micro.getString("publishDate", EMPTY_STRING).isEmpty()) {
|
2020-04-16 16:08:14 +02:00
|
|
|
return micro.getString("publishDate");
|
2020-11-04 14:50:35 +01:00
|
|
|
} else {
|
|
|
|
final JsonObject liveDetails = micro.getObject("liveBroadcastDetails");
|
|
|
|
if (!liveDetails.getString("endTimestamp", EMPTY_STRING).isEmpty()) {
|
|
|
|
// an ended live stream
|
|
|
|
return liveDetails.getString("endTimestamp");
|
|
|
|
} else if (!liveDetails.getString("startTimestamp", EMPTY_STRING).isEmpty()) {
|
|
|
|
// a running live stream
|
|
|
|
return liveDetails.getString("startTimestamp");
|
|
|
|
} else if (getStreamType() == StreamType.LIVE_STREAM) {
|
|
|
|
// this should never be reached, but a live stream without upload date is valid
|
|
|
|
return null;
|
|
|
|
}
|
2020-04-16 16:08:14 +02:00
|
|
|
}
|
2020-02-25 09:50:22 +01:00
|
|
|
|
2021-04-15 18:58:59 +02:00
|
|
|
if (getTextFromObject(getVideoPrimaryInfoRenderer().getObject("dateText"))
|
|
|
|
.startsWith("Premiered")) {
|
2022-03-18 15:09:06 +01:00
|
|
|
final String time = getTextFromObject(
|
|
|
|
getVideoPrimaryInfoRenderer().getObject("dateText")).substring(10);
|
2020-04-16 16:08:14 +02:00
|
|
|
|
|
|
|
try { // Premiered 20 hours ago
|
2021-05-29 14:43:26 +02:00
|
|
|
final TimeAgoParser timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor(
|
2021-04-15 18:58:59 +02:00
|
|
|
Localization.fromLocalizationCode("en"));
|
2021-05-29 14:43:26 +02:00
|
|
|
final OffsetDateTime parsedTime = timeAgoParser.parse(time).offsetDateTime();
|
2020-10-18 05:48:14 +02:00
|
|
|
return DateTimeFormatter.ISO_LOCAL_DATE.format(parsedTime);
|
2021-02-24 17:06:38 +01:00
|
|
|
} catch (final Exception ignored) {
|
2020-10-26 16:32:39 +01:00
|
|
|
}
|
2020-04-16 16:08:14 +02:00
|
|
|
|
|
|
|
try { // Premiered Feb 21, 2020
|
2020-10-29 18:44:05 +01:00
|
|
|
final LocalDate localDate = LocalDate.parse(time,
|
|
|
|
DateTimeFormatter.ofPattern("MMM dd, yyyy", Locale.ENGLISH));
|
2020-10-18 05:48:14 +02:00
|
|
|
return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate);
|
2021-02-24 17:06:38 +01:00
|
|
|
} catch (final Exception ignored) {
|
2020-10-26 16:32:39 +01:00
|
|
|
}
|
2020-04-16 16:08:14 +02:00
|
|
|
}
|
2020-02-25 21:19:53 +01:00
|
|
|
|
2017-03-01 18:47:52 +01:00
|
|
|
try {
|
2021-04-15 18:58:59 +02:00
|
|
|
// TODO: this parses English formatted dates only, we need a better approach to parse
|
|
|
|
// the textual date
|
2021-05-29 14:43:26 +02:00
|
|
|
final LocalDate localDate = LocalDate.parse(getTextFromObject(
|
2022-03-26 19:46:10 +01:00
|
|
|
getVideoPrimaryInfoRenderer().getObject("dateText")),
|
2020-10-18 05:48:14 +02:00
|
|
|
DateTimeFormatter.ofPattern("dd MMM yyyy", Locale.ENGLISH));
|
|
|
|
return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate);
|
2021-02-24 17:06:38 +01:00
|
|
|
} catch (final Exception ignored) {
|
2020-10-26 16:32:39 +01:00
|
|
|
}
|
2020-11-04 14:50:35 +01:00
|
|
|
|
2020-02-25 16:24:18 +01:00
|
|
|
throw new ParsingException("Could not get upload date");
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
|
2019-04-28 22:03:16 +02:00
|
|
|
@Override
|
2019-11-03 19:45:25 +01:00
|
|
|
public DateWrapper getUploadDate() throws ParsingException {
|
2019-04-28 22:03:16 +02:00
|
|
|
final String textualUploadDate = getTextualUploadDate();
|
|
|
|
|
2020-05-11 11:40:24 +02:00
|
|
|
if (isNullOrEmpty(textualUploadDate)) {
|
2019-04-28 22:03:16 +02:00
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
2019-12-16 08:35:43 +01:00
|
|
|
return new DateWrapper(YoutubeParsingHelper.parseDateFrom(textualUploadDate), true);
|
2019-04-28 22:03:16 +02:00
|
|
|
}
|
|
|
|
|
2017-11-25 01:10:04 +01:00
|
|
|
@Nonnull
|
2017-03-01 18:47:52 +01:00
|
|
|
@Override
|
2017-08-11 03:23:09 +02:00
|
|
|
public String getThumbnailUrl() throws ParsingException {
|
2017-11-30 10:49:27 +01:00
|
|
|
assertPageFetched();
|
2017-03-01 18:47:52 +01:00
|
|
|
try {
|
2022-03-18 15:09:06 +01:00
|
|
|
final JsonArray thumbnails = playerResponse.getObject("videoDetails")
|
|
|
|
.getObject("thumbnail").getArray("thumbnails");
|
2020-01-06 20:45:57 +01:00
|
|
|
// the last thumbnail is the one with the highest resolution
|
2022-03-18 15:09:06 +01:00
|
|
|
final String url = thumbnails.getObject(thumbnails.size() - 1).getString("url");
|
2017-08-10 19:50:59 +02:00
|
|
|
|
2020-02-28 09:36:33 +01:00
|
|
|
return fixThumbnailUrl(url);
|
2021-02-24 17:06:38 +01:00
|
|
|
} catch (final Exception e) {
|
2020-02-25 09:50:22 +01:00
|
|
|
throw new ParsingException("Could not get thumbnail url");
|
2017-08-11 03:23:09 +02:00
|
|
|
}
|
2020-01-06 20:45:57 +01:00
|
|
|
|
2017-08-11 03:23:09 +02:00
|
|
|
}
|
|
|
|
|
2017-11-25 01:10:04 +01:00
|
|
|
@Nonnull
|
2017-08-11 03:23:09 +02:00
|
|
|
@Override
|
2021-02-12 22:22:11 +01:00
|
|
|
public Description getDescription() throws ParsingException {
|
2017-11-30 10:49:27 +01:00
|
|
|
assertPageFetched();
|
2021-05-29 14:43:26 +02:00
|
|
|
// Description with more info on links
|
2020-07-18 09:50:22 +02:00
|
|
|
try {
|
2022-03-18 15:09:06 +01:00
|
|
|
final String description = getTextFromObject(getVideoSecondaryInfoRenderer()
|
2021-04-15 18:58:59 +02:00
|
|
|
.getObject("description"), true);
|
2022-03-18 15:09:06 +01:00
|
|
|
if (!isNullOrEmpty(description)) {
|
|
|
|
return new Description(description, Description.HTML);
|
|
|
|
}
|
2021-02-24 17:06:38 +01:00
|
|
|
} catch (final ParsingException ignored) {
|
2021-05-29 14:43:26 +02:00
|
|
|
// Age-restricted videos cause a ParsingException here
|
2020-07-26 14:15:13 +02:00
|
|
|
}
|
2020-02-25 18:27:39 +01:00
|
|
|
|
2021-04-15 18:58:59 +02:00
|
|
|
String description = playerResponse.getObject("videoDetails")
|
|
|
|
.getString("shortDescription");
|
2021-02-12 22:22:11 +01:00
|
|
|
if (description == null) {
|
|
|
|
final JsonObject descriptionObject = playerResponse.getObject("microformat")
|
|
|
|
.getObject("playerMicroformatRenderer").getObject("description");
|
|
|
|
description = getTextFromObject(descriptionObject);
|
|
|
|
}
|
|
|
|
|
2021-05-29 14:43:26 +02:00
|
|
|
// Raw non-html description
|
2021-02-12 22:22:11 +01:00
|
|
|
return new Description(description, Description.PLAIN_TEXT);
|
2017-08-11 03:23:09 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2020-10-29 18:44:05 +01:00
|
|
|
public int getAgeLimit() throws ParsingException {
|
|
|
|
if (ageLimit == -1) {
|
|
|
|
ageLimit = NO_AGE_LIMIT;
|
2020-02-28 17:14:26 +01:00
|
|
|
|
2020-10-29 18:44:05 +01:00
|
|
|
final JsonArray metadataRows = getVideoSecondaryInfoRenderer()
|
|
|
|
.getObject("metadataRowContainer").getObject("metadataRowContainerRenderer")
|
|
|
|
.getArray("rows");
|
|
|
|
for (final Object metadataRow : metadataRows) {
|
|
|
|
final JsonArray contents = ((JsonObject) metadataRow)
|
|
|
|
.getObject("metadataRowRenderer").getArray("contents");
|
|
|
|
for (final Object content : contents) {
|
|
|
|
final JsonArray runs = ((JsonObject) content).getArray("runs");
|
|
|
|
for (final Object run : runs) {
|
|
|
|
final String rowText = ((JsonObject) run).getString("text", EMPTY_STRING);
|
|
|
|
if (rowText.contains("Age-restricted")) {
|
|
|
|
ageLimit = 18;
|
|
|
|
return ageLimit;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-02-28 17:14:26 +01:00
|
|
|
return ageLimit;
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2017-08-10 19:50:59 +02:00
|
|
|
public long getLength() throws ParsingException {
|
2017-11-30 10:49:27 +01:00
|
|
|
assertPageFetched();
|
2019-07-30 20:53:23 +02:00
|
|
|
|
2017-08-10 19:50:59 +02:00
|
|
|
try {
|
2021-05-29 14:43:26 +02:00
|
|
|
final String duration = playerResponse
|
2019-07-30 20:53:23 +02:00
|
|
|
.getObject("videoDetails")
|
|
|
|
.getString("lengthSeconds");
|
|
|
|
return Long.parseLong(duration);
|
2021-02-24 17:06:38 +01:00
|
|
|
} catch (final Exception e) {
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
if (desktopStreamingData != null) {
|
2021-08-12 18:26:43 +02:00
|
|
|
final JsonArray adaptiveFormats = desktopStreamingData.getArray(ADAPTIVE_FORMATS);
|
2021-06-11 13:32:59 +02:00
|
|
|
final String durationMs = adaptiveFormats.getObject(0)
|
2020-01-06 20:45:57 +01:00
|
|
|
.getString("approxDurationMs");
|
2020-01-20 22:52:48 +01:00
|
|
|
return Math.round(Long.parseLong(durationMs) / 1000f);
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
} else if (androidStreamingData != null) {
|
|
|
|
final JsonArray adaptiveFormats = androidStreamingData.getArray(ADAPTIVE_FORMATS);
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
final String durationMs = adaptiveFormats.getObject(0)
|
|
|
|
.getString("approxDurationMs");
|
|
|
|
return Math.round(Long.parseLong(durationMs) / 1000f);
|
|
|
|
} else {
|
2020-01-06 20:45:57 +01:00
|
|
|
throw new ParsingException("Could not get duration", e);
|
|
|
|
}
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-08-11 03:23:09 +02:00
|
|
|
/**
|
|
|
|
* Attempts to parse (and return) the offset to start playing the video from.
|
|
|
|
*
|
|
|
|
* @return the offset (in seconds), or 0 if no timestamp is found.
|
|
|
|
*/
|
|
|
|
@Override
|
|
|
|
public long getTimeStamp() throws ParsingException {
|
2020-04-11 17:18:17 +02:00
|
|
|
final long timestamp =
|
2022-01-12 10:45:06 +01:00
|
|
|
getTimestampSeconds("((#|&|\\?)t=\\d*h?\\d*m?\\d+s?)");
|
2020-04-09 14:45:33 +02:00
|
|
|
|
|
|
|
if (timestamp == -2) {
|
2021-05-29 14:43:26 +02:00
|
|
|
// Regex for timestamp was not found
|
2020-04-09 14:45:33 +02:00
|
|
|
return 0;
|
|
|
|
} else {
|
|
|
|
return timestamp;
|
|
|
|
}
|
2017-08-11 03:23:09 +02:00
|
|
|
}
|
|
|
|
|
2017-03-01 18:47:52 +01:00
|
|
|
@Override
|
|
|
|
public long getViewCount() throws ParsingException {
|
2021-06-02 18:46:35 +02:00
|
|
|
String views = null;
|
|
|
|
|
|
|
|
try {
|
|
|
|
views = getTextFromObject(getVideoPrimaryInfoRenderer().getObject("viewCount")
|
|
|
|
.getObject("videoViewCountRenderer").getObject("viewCount"));
|
|
|
|
} catch (final ParsingException ignored) {
|
|
|
|
// Age-restricted videos cause a ParsingException here
|
|
|
|
}
|
|
|
|
|
|
|
|
if (isNullOrEmpty(views)) {
|
|
|
|
views = playerResponse.getObject("videoDetails").getString("viewCount");
|
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
if (isNullOrEmpty(views)) {
|
|
|
|
throw new ParsingException("Could not get view count");
|
|
|
|
}
|
2021-06-02 18:46:35 +02:00
|
|
|
}
|
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
if (views.toLowerCase().contains("no views")) {
|
|
|
|
return 0;
|
|
|
|
}
|
2020-03-17 15:00:07 +01:00
|
|
|
|
2020-02-29 17:18:50 +01:00
|
|
|
return Long.parseLong(Utils.removeNonDigitCharacters(views));
|
2020-02-24 16:04:01 +01:00
|
|
|
}
|
|
|
|
|
2017-03-01 18:47:52 +01:00
|
|
|
@Override
|
2017-08-11 03:23:09 +02:00
|
|
|
public long getLikeCount() throws ParsingException {
|
2017-11-30 10:49:27 +01:00
|
|
|
assertPageFetched();
|
2021-11-30 19:52:51 +01:00
|
|
|
String likesString = "";
|
2017-03-01 18:47:52 +01:00
|
|
|
try {
|
2021-11-30 19:52:51 +01:00
|
|
|
likesString = getVideoPrimaryInfoRenderer()
|
|
|
|
.getObject("videoActions")
|
|
|
|
.getObject("menuRenderer")
|
|
|
|
.getArray("topLevelButtons")
|
|
|
|
.getObject(0)
|
|
|
|
.getObject("toggleButtonRenderer")
|
|
|
|
.getObject("defaultText")
|
|
|
|
.getObject("accessibility")
|
|
|
|
.getObject("accessibilityData")
|
|
|
|
.getString("label");
|
2021-11-19 21:36:03 +01:00
|
|
|
|
|
|
|
if (likesString == null) {
|
2021-05-29 14:43:26 +02:00
|
|
|
// If this kicks in our button has no content and therefore ratings must be disabled
|
2020-01-06 20:45:57 +01:00
|
|
|
if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) {
|
2022-03-18 15:09:06 +01:00
|
|
|
throw new ParsingException(
|
|
|
|
"Ratings are enabled even though the like button is missing");
|
2020-01-06 20:45:57 +01:00
|
|
|
}
|
2017-08-11 03:23:09 +02:00
|
|
|
return -1;
|
|
|
|
}
|
2021-11-19 21:36:03 +01:00
|
|
|
|
2021-12-27 16:50:08 +01:00
|
|
|
if (likesString.toLowerCase().contains("no likes")) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-08-11 03:23:09 +02:00
|
|
|
return Integer.parseInt(Utils.removeNonDigitCharacters(likesString));
|
2021-02-24 17:06:38 +01:00
|
|
|
} catch (final NumberFormatException nfe) {
|
2021-04-15 18:58:59 +02:00
|
|
|
throw new ParsingException("Could not parse \"" + likesString + "\" as an Integer",
|
|
|
|
nfe);
|
2021-02-24 17:06:38 +01:00
|
|
|
} catch (final Exception e) {
|
2020-10-29 18:44:05 +01:00
|
|
|
if (getAgeLimit() == NO_AGE_LIMIT) {
|
|
|
|
throw new ParsingException("Could not get like count", e);
|
|
|
|
}
|
|
|
|
return -1;
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-11-25 01:10:04 +01:00
|
|
|
@Nonnull
|
2017-08-11 03:23:09 +02:00
|
|
|
@Override
|
|
|
|
public String getUploaderUrl() throws ParsingException {
|
2017-11-30 10:49:27 +01:00
|
|
|
assertPageFetched();
|
2020-04-16 16:08:14 +02:00
|
|
|
|
2021-05-29 14:43:26 +02:00
|
|
|
// Don't use the id in the videoSecondaryRenderer object to get real id of the uploader
|
|
|
|
// The difference between the real id of the channel and the displayed id is especially
|
|
|
|
// visible for music channels and autogenerated channels.
|
2021-02-24 17:06:38 +01:00
|
|
|
final String uploaderId = playerResponse.getObject("videoDetails").getString("channelId");
|
2020-07-18 09:50:22 +02:00
|
|
|
if (!isNullOrEmpty(uploaderId)) {
|
2020-04-16 16:08:14 +02:00
|
|
|
return YoutubeChannelLinkHandlerFactory.getInstance().getUrl("channel/" + uploaderId);
|
2020-07-18 09:50:22 +02:00
|
|
|
}
|
2020-04-16 16:08:14 +02:00
|
|
|
|
2020-02-25 10:05:53 +01:00
|
|
|
throw new ParsingException("Could not get uploader url");
|
2017-11-25 01:10:04 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
@Nonnull
|
2017-08-11 03:23:09 +02:00
|
|
|
@Override
|
|
|
|
public String getUploaderName() throws ParsingException {
|
2017-11-30 10:49:27 +01:00
|
|
|
assertPageFetched();
|
2020-07-18 09:50:22 +02:00
|
|
|
|
2021-04-15 18:58:59 +02:00
|
|
|
// Don't use the name in the videoSecondaryRenderer object to get real name of the uploader
|
|
|
|
// The difference between the real name of the channel and the displayed name is especially
|
|
|
|
// visible for music channels and autogenerated channels.
|
|
|
|
final String uploaderName = playerResponse.getObject("videoDetails").getString("author");
|
2022-03-18 15:09:06 +01:00
|
|
|
if (isNullOrEmpty(uploaderName)) {
|
|
|
|
throw new ParsingException("Could not get uploader name");
|
|
|
|
}
|
2020-02-29 17:18:50 +01:00
|
|
|
|
|
|
|
return uploaderName;
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
|
2021-01-22 01:44:58 +01:00
|
|
|
@Override
|
|
|
|
public boolean isUploaderVerified() throws ParsingException {
|
|
|
|
final JsonArray badges = getVideoSecondaryInfoRenderer().getObject("owner")
|
|
|
|
.getObject("videoOwnerRenderer").getArray("badges");
|
|
|
|
|
|
|
|
return YoutubeParsingHelper.isVerified(badges);
|
|
|
|
}
|
|
|
|
|
2017-11-25 01:10:04 +01:00
|
|
|
@Nonnull
|
2017-03-01 18:47:52 +01:00
|
|
|
@Override
|
2017-08-08 23:36:11 +02:00
|
|
|
public String getUploaderAvatarUrl() throws ParsingException {
|
2017-11-30 10:49:27 +01:00
|
|
|
assertPageFetched();
|
2020-07-18 09:50:22 +02:00
|
|
|
|
|
|
|
String url = null;
|
|
|
|
|
2020-02-18 13:05:11 +01:00
|
|
|
try {
|
2021-04-15 18:58:59 +02:00
|
|
|
url = getVideoSecondaryInfoRenderer().getObject("owner")
|
|
|
|
.getObject("videoOwnerRenderer").getObject("thumbnail")
|
|
|
|
.getArray("thumbnails").getObject(0).getString("url");
|
2021-02-24 17:06:38 +01:00
|
|
|
} catch (final ParsingException ignored) {
|
2021-05-29 14:43:26 +02:00
|
|
|
// Age-restricted videos cause a ParsingException here
|
2020-07-26 14:15:13 +02:00
|
|
|
}
|
2020-02-27 17:39:23 +01:00
|
|
|
|
2020-07-18 09:50:22 +02:00
|
|
|
if (isNullOrEmpty(url)) {
|
2020-10-29 18:44:05 +01:00
|
|
|
if (ageLimit == NO_AGE_LIMIT) {
|
|
|
|
throw new ParsingException("Could not get uploader avatar URL");
|
|
|
|
}
|
|
|
|
return "";
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
2020-07-18 09:50:22 +02:00
|
|
|
|
|
|
|
return fixThumbnailUrl(url);
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
|
2022-02-11 05:08:22 +01:00
|
|
|
@Override
|
|
|
|
public long getUploaderSubscriberCount() throws ParsingException {
|
2022-03-18 15:09:06 +01:00
|
|
|
final JsonObject videoOwnerRenderer = JsonUtils.getObject(videoSecondaryInfoRenderer,
|
|
|
|
"owner.videoOwnerRenderer");
|
2022-02-26 18:12:51 +01:00
|
|
|
if (!videoOwnerRenderer.has("subscriberCountText")) {
|
2022-02-12 19:00:54 +01:00
|
|
|
return UNKNOWN_SUBSCRIBER_COUNT;
|
2022-02-11 05:08:22 +01:00
|
|
|
}
|
2022-02-26 18:12:51 +01:00
|
|
|
try {
|
2022-03-18 15:09:06 +01:00
|
|
|
return Utils.mixedNumberWordToLong(getTextFromObject(videoOwnerRenderer
|
|
|
|
.getObject("subscriberCountText")));
|
2022-02-26 18:12:51 +01:00
|
|
|
} catch (final NumberFormatException e) {
|
|
|
|
throw new ParsingException("Could not get uploader subscriber count", e);
|
|
|
|
}
|
2022-02-11 05:08:22 +01:00
|
|
|
}
|
|
|
|
|
2018-02-25 22:03:32 +01:00
|
|
|
@Nonnull
|
2017-03-01 18:47:52 +01:00
|
|
|
@Override
|
|
|
|
public String getDashMpdUrl() throws ParsingException {
|
2017-11-30 10:49:27 +01:00
|
|
|
assertPageFetched();
|
2021-06-24 18:39:16 +02:00
|
|
|
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
if (desktopStreamingData != null) {
|
|
|
|
return desktopStreamingData.getString("dashManifestUrl");
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
} else if (androidStreamingData != null) {
|
|
|
|
return androidStreamingData.getString("dashManifestUrl");
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
} else {
|
|
|
|
return EMPTY_STRING;
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-02-25 22:03:32 +01:00
|
|
|
@Nonnull
|
|
|
|
@Override
|
|
|
|
public String getHlsUrl() throws ParsingException {
|
|
|
|
assertPageFetched();
|
2019-01-19 13:50:02 +01:00
|
|
|
|
2022-01-15 17:25:00 +01:00
|
|
|
// Return HLS manifest of the iOS client first because on livestreams, the HLS manifest
|
|
|
|
// returned has separated audio and video streams
|
|
|
|
// Also, on videos, non-iOS clients don't have an HLS manifest URL in their player response
|
|
|
|
if (iosStreamingData != null) {
|
|
|
|
return iosStreamingData.getString("hlsManifestUrl", EMPTY_STRING);
|
|
|
|
} else if (desktopStreamingData != null) {
|
|
|
|
return desktopStreamingData.getString("hlsManifestUrl", EMPTY_STRING);
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
} else if (androidStreamingData != null) {
|
2022-01-15 17:25:00 +01:00
|
|
|
return androidStreamingData.getString("hlsManifestUrl", EMPTY_STRING);
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
} else {
|
|
|
|
return EMPTY_STRING;
|
2018-02-25 22:03:32 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-03-01 18:47:52 +01:00
|
|
|
@Override
|
2019-09-11 20:04:28 +02:00
|
|
|
public List<AudioStream> getAudioStreams() throws ExtractionException {
|
2017-11-30 10:49:27 +01:00
|
|
|
assertPageFetched();
|
2021-02-24 17:06:38 +01:00
|
|
|
final List<AudioStream> audioStreams = new ArrayList<>();
|
2017-03-01 18:47:52 +01:00
|
|
|
|
2021-02-24 17:06:38 +01:00
|
|
|
try {
|
2021-04-15 18:58:59 +02:00
|
|
|
for (final Map.Entry<String, ItagItem> entry : getItags(ADAPTIVE_FORMATS,
|
|
|
|
ItagItem.ItagType.AUDIO).entrySet()) {
|
2021-02-24 17:06:38 +01:00
|
|
|
final ItagItem itag = entry.getValue();
|
2022-02-01 19:52:41 +01:00
|
|
|
final String url = tryDecryption(entry.getKey(), getId());
|
2021-07-27 17:28:32 +02:00
|
|
|
|
|
|
|
final AudioStream audioStream = new AudioStream(url, itag);
|
2017-08-10 19:50:59 +02:00
|
|
|
if (!Stream.containSimilarStream(audioStream, audioStreams)) {
|
|
|
|
audioStreams.add(audioStream);
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
}
|
2021-02-24 17:06:38 +01:00
|
|
|
} catch (final Exception e) {
|
2017-08-10 19:50:59 +02:00
|
|
|
throw new ParsingException("Could not get audio streams", e);
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
2017-08-10 19:50:59 +02:00
|
|
|
|
2017-03-01 18:47:52 +01:00
|
|
|
return audioStreams;
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2019-09-11 20:04:28 +02:00
|
|
|
public List<VideoStream> getVideoStreams() throws ExtractionException {
|
2017-11-30 10:49:27 +01:00
|
|
|
assertPageFetched();
|
2021-02-24 17:06:38 +01:00
|
|
|
final List<VideoStream> videoStreams = new ArrayList<>();
|
2017-08-10 19:50:59 +02:00
|
|
|
|
2021-02-24 17:06:38 +01:00
|
|
|
try {
|
2021-04-15 18:58:59 +02:00
|
|
|
for (final Map.Entry<String, ItagItem> entry : getItags(FORMATS,
|
|
|
|
ItagItem.ItagType.VIDEO).entrySet()) {
|
2021-02-24 17:06:38 +01:00
|
|
|
final ItagItem itag = entry.getValue();
|
2022-02-01 19:52:41 +01:00
|
|
|
final String url = tryDecryption(entry.getKey(), getId());
|
2021-07-12 20:06:19 +02:00
|
|
|
|
2021-07-17 19:10:09 +02:00
|
|
|
final VideoStream videoStream = new VideoStream(url, false, itag);
|
2017-08-10 19:50:59 +02:00
|
|
|
if (!Stream.containSimilarStream(videoStream, videoStreams)) {
|
|
|
|
videoStreams.add(videoStream);
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
}
|
2021-02-24 17:06:38 +01:00
|
|
|
} catch (final Exception e) {
|
2017-08-10 19:50:59 +02:00
|
|
|
throw new ParsingException("Could not get video streams", e);
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return videoStreams;
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2018-08-21 17:23:56 +02:00
|
|
|
public List<VideoStream> getVideoOnlyStreams() throws ExtractionException {
|
2017-11-30 10:49:27 +01:00
|
|
|
assertPageFetched();
|
2021-02-24 17:06:38 +01:00
|
|
|
final List<VideoStream> videoOnlyStreams = new ArrayList<>();
|
2021-07-27 17:28:32 +02:00
|
|
|
|
2017-04-12 02:55:53 +02:00
|
|
|
try {
|
2021-04-15 18:58:59 +02:00
|
|
|
for (final Map.Entry<String, ItagItem> entry : getItags(ADAPTIVE_FORMATS,
|
|
|
|
ItagItem.ItagType.VIDEO_ONLY).entrySet()) {
|
2021-02-24 17:06:38 +01:00
|
|
|
final ItagItem itag = entry.getValue();
|
2022-02-01 19:52:41 +01:00
|
|
|
final String url = tryDecryption(entry.getKey(), getId());
|
2017-04-12 02:55:53 +02:00
|
|
|
|
2021-07-27 17:28:32 +02:00
|
|
|
final VideoStream videoStream = new VideoStream(url, true, itag);
|
2017-08-10 19:50:59 +02:00
|
|
|
if (!Stream.containSimilarStream(videoStream, videoOnlyStreams)) {
|
|
|
|
videoOnlyStreams.add(videoStream);
|
2017-04-12 02:55:53 +02:00
|
|
|
}
|
|
|
|
}
|
2021-02-24 17:06:38 +01:00
|
|
|
} catch (final Exception e) {
|
2017-08-10 19:50:59 +02:00
|
|
|
throw new ParsingException("Could not get video only streams", e);
|
2017-04-12 02:55:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return videoOnlyStreams;
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
|
2022-02-01 19:52:41 +01:00
|
|
|
/**
|
|
|
|
* Try to decrypt url and fallback to given url, because decryption is not
|
|
|
|
* always needed.
|
|
|
|
* This way a breaking change from YouTube does not result in a broken extractor.
|
|
|
|
*/
|
|
|
|
private String tryDecryption(final String url, final String videoId) {
|
|
|
|
try {
|
|
|
|
return YoutubeThrottlingDecrypter.apply(url, videoId);
|
2022-03-15 17:10:05 +01:00
|
|
|
} catch (final ParsingException e) {
|
2022-02-01 19:52:41 +01:00
|
|
|
return url;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-11-22 18:39:38 +01:00
|
|
|
@Override
|
2018-02-01 22:27:14 +01:00
|
|
|
@Nonnull
|
2020-10-29 18:44:05 +01:00
|
|
|
public List<SubtitlesStream> getSubtitlesDefault() throws ParsingException {
|
2018-09-24 21:04:22 +02:00
|
|
|
return getSubtitles(MediaFormat.TTML);
|
2017-11-23 16:33:03 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2018-02-01 22:27:14 +01:00
|
|
|
@Nonnull
|
2020-10-29 18:44:05 +01:00
|
|
|
public List<SubtitlesStream> getSubtitles(final MediaFormat format) throws ParsingException {
|
2017-11-30 10:49:27 +01:00
|
|
|
assertPageFetched();
|
2021-06-02 21:24:55 +02:00
|
|
|
// If the video is age-restricted getSubtitles will fail
|
2020-10-29 18:44:05 +01:00
|
|
|
if (getAgeLimit() != NO_AGE_LIMIT) {
|
|
|
|
return Collections.emptyList();
|
|
|
|
}
|
|
|
|
if (subtitles != null) {
|
2021-05-29 14:43:26 +02:00
|
|
|
// Already calculated
|
2020-10-29 18:44:05 +01:00
|
|
|
return subtitles;
|
2017-11-23 11:47:05 +01:00
|
|
|
}
|
2020-10-29 18:44:05 +01:00
|
|
|
|
|
|
|
final JsonObject renderer = playerResponse.getObject("captions")
|
|
|
|
.getObject("playerCaptionsTracklistRenderer");
|
|
|
|
final JsonArray captionsArray = renderer.getArray("captionTracks");
|
|
|
|
// TODO: use this to apply auto translation to different language from a source language
|
|
|
|
// final JsonArray autoCaptionsArray = renderer.getArray("translationLanguages");
|
|
|
|
|
|
|
|
subtitles = new ArrayList<>();
|
|
|
|
for (int i = 0; i < captionsArray.size(); i++) {
|
|
|
|
final String languageCode = captionsArray.getObject(i).getString("languageCode");
|
|
|
|
final String baseUrl = captionsArray.getObject(i).getString("baseUrl");
|
|
|
|
final String vssId = captionsArray.getObject(i).getString("vssId");
|
|
|
|
|
|
|
|
if (languageCode != null && baseUrl != null && vssId != null) {
|
|
|
|
final boolean isAutoGenerated = vssId.startsWith("a.");
|
|
|
|
final String cleanUrl = baseUrl
|
|
|
|
.replaceAll("&fmt=[^&]*", "") // Remove preexisting format if exists
|
|
|
|
.replaceAll("&tlang=[^&]*", ""); // Remove translation language
|
|
|
|
|
|
|
|
subtitles.add(new SubtitlesStream(format, languageCode,
|
|
|
|
cleanUrl + "&fmt=" + format.getSuffix(), isAutoGenerated));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-02-01 22:27:14 +01:00
|
|
|
return subtitles;
|
2017-11-22 18:39:38 +01:00
|
|
|
}
|
|
|
|
|
2017-03-01 18:47:52 +01:00
|
|
|
@Override
|
2020-11-04 14:50:35 +01:00
|
|
|
public StreamType getStreamType() {
|
2018-02-25 23:31:42 +01:00
|
|
|
assertPageFetched();
|
2021-06-24 18:39:16 +02:00
|
|
|
|
2022-01-15 17:25:00 +01:00
|
|
|
return streamType;
|
|
|
|
}
|
|
|
|
|
|
|
|
private void setStreamType() {
|
2021-06-26 20:04:55 +02:00
|
|
|
if (playerResponse.getObject("playabilityStatus").has("liveStreamability")
|
|
|
|
|| playerResponse.getObject("videoDetails").getBoolean("isPostLiveDvr", false)) {
|
2022-01-15 17:25:00 +01:00
|
|
|
streamType = StreamType.LIVE_STREAM;
|
|
|
|
} else {
|
|
|
|
streamType = StreamType.VIDEO_STREAM;
|
2021-06-24 18:39:16 +02:00
|
|
|
}
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
|
2020-10-24 21:17:58 +02:00
|
|
|
@Nullable
|
2017-03-01 18:47:52 +01:00
|
|
|
@Override
|
2022-02-02 20:23:11 +01:00
|
|
|
public MultiInfoItemsCollector getRelatedItems() throws ExtractionException {
|
2017-11-30 10:49:27 +01:00
|
|
|
assertPageFetched();
|
2020-02-28 16:40:50 +01:00
|
|
|
|
2020-10-24 21:17:58 +02:00
|
|
|
if (getAgeLimit() != NO_AGE_LIMIT) {
|
|
|
|
return null;
|
|
|
|
}
|
2020-02-28 16:40:50 +01:00
|
|
|
|
2017-03-01 18:47:52 +01:00
|
|
|
try {
|
2022-02-02 20:23:11 +01:00
|
|
|
final MultiInfoItemsCollector collector = new MultiInfoItemsCollector(getServiceId());
|
2020-05-30 10:25:43 +02:00
|
|
|
|
2021-05-23 17:55:19 +02:00
|
|
|
final JsonArray results = nextResponse.getObject("contents")
|
2021-04-15 18:58:59 +02:00
|
|
|
.getObject("twoColumnWatchNextResults").getObject("secondaryResults")
|
|
|
|
.getObject("secondaryResults").getArray("results");
|
2020-02-18 17:04:22 +01:00
|
|
|
|
2019-04-28 22:03:16 +02:00
|
|
|
final TimeAgoParser timeAgoParser = getTimeAgoParser();
|
|
|
|
|
2022-02-02 20:23:11 +01:00
|
|
|
for (final Object resultObject : results) {
|
|
|
|
final JsonObject result = (JsonObject) resultObject;
|
|
|
|
if (result.has("compactVideoRenderer")) {
|
|
|
|
collector.commit(new YoutubeStreamInfoItemExtractor(
|
|
|
|
result.getObject("compactVideoRenderer"), timeAgoParser));
|
|
|
|
} else if (result.has("compactRadioRenderer")) {
|
2022-02-28 19:42:34 +01:00
|
|
|
collector.commit(new YoutubeMixOrPlaylistInfoItemExtractor(
|
2022-02-02 20:23:11 +01:00
|
|
|
result.getObject("compactRadioRenderer")));
|
2022-02-28 19:42:34 +01:00
|
|
|
} else if (result.has("compactPlaylistRenderer")) {
|
|
|
|
collector.commit(new YoutubeMixOrPlaylistInfoItemExtractor(
|
|
|
|
result.getObject("compactPlaylistRenderer")));
|
2020-04-16 16:08:14 +02:00
|
|
|
}
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
return collector;
|
2021-02-24 17:06:38 +01:00
|
|
|
} catch (final Exception e) {
|
2017-03-01 18:47:52 +01:00
|
|
|
throw new ParsingException("Could not get related videos", e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-06-29 20:12:55 +02:00
|
|
|
/**
|
2017-07-11 05:08:03 +02:00
|
|
|
* {@inheritDoc}
|
2017-06-29 20:12:55 +02:00
|
|
|
*/
|
2017-07-11 05:08:03 +02:00
|
|
|
@Override
|
|
|
|
public String getErrorMessage() {
|
2020-02-29 22:57:25 +01:00
|
|
|
try {
|
2021-04-15 18:58:59 +02:00
|
|
|
return getTextFromObject(playerResponse.getObject("playabilityStatus")
|
|
|
|
.getObject("errorScreen").getObject("playerErrorMessageRenderer")
|
|
|
|
.getObject("reason"));
|
2021-02-24 17:06:38 +01:00
|
|
|
} catch (final ParsingException | NullPointerException e) {
|
2021-05-29 14:43:26 +02:00
|
|
|
return null; // No error message
|
2020-02-29 22:57:25 +01:00
|
|
|
}
|
2017-07-11 05:08:03 +02:00
|
|
|
}
|
2017-03-01 18:47:52 +01:00
|
|
|
|
2017-07-11 05:08:03 +02:00
|
|
|
/*//////////////////////////////////////////////////////////////////////////
|
2017-08-10 19:50:59 +02:00
|
|
|
// Fetch page
|
2017-07-11 05:08:03 +02:00
|
|
|
//////////////////////////////////////////////////////////////////////////*/
|
2017-03-01 18:47:52 +01:00
|
|
|
|
2019-09-11 20:04:28 +02:00
|
|
|
private static final String FORMATS = "formats";
|
|
|
|
private static final String ADAPTIVE_FORMATS = "adaptiveFormats";
|
2020-10-26 21:22:21 +01:00
|
|
|
private static final String DEOBFUSCATION_FUNC_NAME = "deobfuscate";
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
private static final String STREAMING_DATA = "streamingData";
|
|
|
|
private static final String PLAYER = "player";
|
|
|
|
private static final String NEXT = "next";
|
2017-07-11 05:08:03 +02:00
|
|
|
|
2021-05-29 14:43:26 +02:00
|
|
|
private static final String[] REGEXES = {
|
2022-03-18 15:09:06 +01:00
|
|
|
"(?:\\b|[^a-zA-Z0-9$])([a-zA-Z0-9$]{2,})\\s*=\\s*function\\(\\s*a\\s*\\)"
|
|
|
|
+ "\\s*\\{\\s*a\\s*=\\s*a\\.split\\(\\s*\"\"\\s*\\)",
|
2021-08-12 18:26:43 +02:00
|
|
|
"\\bm=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(h\\.s\\)\\)",
|
|
|
|
"\\bc&&\\(c=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(c\\)\\)",
|
2020-07-28 00:44:38 +02:00
|
|
|
"([\\w$]+)\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(\"\"\\)\\s*;",
|
2021-08-12 18:26:43 +02:00
|
|
|
"\\b([\\w$]{2,})\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(\"\"\\)\\s*;",
|
2020-07-28 00:44:38 +02:00
|
|
|
"\\bc\\s*&&\\s*d\\.set\\([^,]+\\s*,\\s*(:encodeURIComponent\\s*\\()([a-zA-Z0-9$]+)\\("
|
|
|
|
};
|
2021-05-29 14:43:26 +02:00
|
|
|
private static final String STS_REGEX = "signatureTimestamp[=:](\\d+)";
|
2018-09-08 07:25:07 +02:00
|
|
|
|
2017-08-06 22:20:15 +02:00
|
|
|
@Override
|
2020-10-29 18:44:05 +01:00
|
|
|
public void onFetchPage(@Nonnull final Downloader downloader)
|
|
|
|
throws IOException, ExtractionException {
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
if (sts == null) {
|
|
|
|
getStsFromPlayerJs();
|
|
|
|
}
|
|
|
|
|
2021-05-29 14:43:26 +02:00
|
|
|
final String videoId = getId();
|
2021-05-23 17:55:19 +02:00
|
|
|
final Localization localization = getExtractorLocalization();
|
|
|
|
final ContentCountry contentCountry = getExtractorContentCountry();
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
desktopCpn = generateContentPlaybackNonce();
|
2020-02-28 15:17:47 +01:00
|
|
|
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
playerResponse = getJsonPostResponse(PLAYER,
|
|
|
|
createDesktopPlayerBody(localization, contentCountry, videoId, sts, false,
|
|
|
|
desktopCpn),
|
|
|
|
localization);
|
2021-05-29 14:43:26 +02:00
|
|
|
|
|
|
|
// Save the playerResponse from the player endpoint of the desktop internal API because
|
|
|
|
// there can be restrictions on the embedded player.
|
|
|
|
// E.g. if a video is age-restricted, the embedded player's playabilityStatus says that
|
|
|
|
// the video cannot be played outside of YouTube, but does not show the original message.
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
final JsonObject youtubePlayerResponse = playerResponse;
|
2021-02-24 12:06:19 +01:00
|
|
|
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
if (playerResponse == null) {
|
2021-02-24 14:13:12 +01:00
|
|
|
throw new ExtractionException("Could not get playerResponse");
|
|
|
|
}
|
|
|
|
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
final JsonObject playabilityStatus = playerResponse.getObject("playabilityStatus");
|
2021-05-29 14:43:26 +02:00
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
final boolean ageRestricted = playabilityStatus.getString("reason", EMPTY_STRING)
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
.contains("age");
|
2021-05-29 14:43:26 +02:00
|
|
|
|
2022-01-15 17:25:00 +01:00
|
|
|
setStreamType();
|
|
|
|
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
if (!playerResponse.has(STREAMING_DATA)) {
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
try {
|
|
|
|
fetchDesktopEmbedJsonPlayer(contentCountry, localization, videoId);
|
|
|
|
} catch (final Exception ignored) {
|
|
|
|
}
|
2022-01-15 17:25:00 +01:00
|
|
|
|
|
|
|
// Refresh the stream type because the stream type maybe not properly known for
|
|
|
|
// age-restricted videos
|
|
|
|
setStreamType();
|
|
|
|
|
|
|
|
if (streamType == StreamType.VIDEO_STREAM || isAndroidClientFetchForced) {
|
|
|
|
try {
|
|
|
|
fetchAndroidEmbedJsonPlayer(contentCountry, localization, videoId);
|
|
|
|
} catch (final Exception ignored) {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (streamType == StreamType.LIVE_STREAM || isIosClientFetchForced) {
|
|
|
|
try {
|
|
|
|
fetchIosEmbedJsonPlayer(contentCountry, localization, videoId);
|
|
|
|
} catch (final Exception ignored) {
|
|
|
|
}
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
}
|
|
|
|
}
|
2021-05-29 14:43:26 +02:00
|
|
|
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
if (desktopStreamingData == null && playerResponse.has(STREAMING_DATA)) {
|
|
|
|
desktopStreamingData = playerResponse.getObject(STREAMING_DATA);
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (desktopStreamingData == null) {
|
|
|
|
checkPlayabilityStatus(youtubePlayerResponse, playabilityStatus);
|
|
|
|
}
|
2021-07-05 19:21:54 +02:00
|
|
|
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
if (ageRestricted) {
|
|
|
|
final byte[] ageRestrictedBody = JsonWriter.string(prepareDesktopEmbedVideoJsonBuilder(
|
2022-03-26 19:46:10 +01:00
|
|
|
localization, contentCountry, videoId)
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
.value(VIDEO_ID, videoId)
|
2022-03-26 19:46:10 +01:00
|
|
|
.done())
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
.getBytes(UTF_8);
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
nextResponse = getJsonPostResponse(NEXT, ageRestrictedBody, localization);
|
2021-07-05 19:21:54 +02:00
|
|
|
} else {
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(localization,
|
|
|
|
contentCountry)
|
|
|
|
.value(VIDEO_ID, videoId)
|
|
|
|
.done())
|
|
|
|
.getBytes(UTF_8);
|
|
|
|
nextResponse = getJsonPostResponse(NEXT, body, localization);
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
}
|
|
|
|
|
2022-01-15 17:25:00 +01:00
|
|
|
if ((!ageRestricted && streamType == StreamType.VIDEO_STREAM)
|
|
|
|
|| isAndroidClientFetchForced) {
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
try {
|
|
|
|
fetchAndroidMobileJsonPlayer(contentCountry, localization, videoId);
|
|
|
|
} catch (final Exception ignored) {
|
|
|
|
}
|
|
|
|
}
|
2022-01-15 17:25:00 +01:00
|
|
|
|
|
|
|
if ((!ageRestricted && streamType == StreamType.LIVE_STREAM)
|
|
|
|
|| isIosClientFetchForced) {
|
|
|
|
try {
|
|
|
|
fetchIosMobileJsonPlayer(contentCountry, localization, videoId);
|
|
|
|
} catch (final Exception ignored) {
|
|
|
|
}
|
|
|
|
}
|
2021-05-29 14:43:26 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
private void checkPlayabilityStatus(final JsonObject youtubePlayerResponse,
|
2022-03-18 15:09:06 +01:00
|
|
|
@Nonnull final JsonObject playabilityStatus)
|
2021-06-06 18:32:07 +02:00
|
|
|
throws ParsingException {
|
2021-02-24 12:06:19 +01:00
|
|
|
String status = playabilityStatus.getString("status");
|
2021-02-14 15:22:45 +01:00
|
|
|
// If status exist, and is not "OK", throw the specific exception based on error message
|
|
|
|
// or a ContentNotAvailableException with the reason text if it's an unknown reason.
|
2021-02-24 14:13:12 +01:00
|
|
|
if (status != null && !status.equalsIgnoreCase("ok")) {
|
2022-03-18 15:09:06 +01:00
|
|
|
final JsonObject newPlayabilityStatus
|
|
|
|
= youtubePlayerResponse.getObject("playabilityStatus");
|
|
|
|
status = newPlayabilityStatus.getString("status");
|
|
|
|
final String reason = newPlayabilityStatus.getString("reason");
|
2021-02-14 15:22:45 +01:00
|
|
|
|
2021-02-24 14:13:12 +01:00
|
|
|
if (status.equalsIgnoreCase("login_required")) {
|
2021-02-14 15:22:45 +01:00
|
|
|
if (reason == null) {
|
2022-03-18 15:09:06 +01:00
|
|
|
final String message = newPlayabilityStatus.getArray("messages").getString(0);
|
2021-06-06 15:40:19 +02:00
|
|
|
if (message != null && message.contains("private")) {
|
2021-02-14 15:22:45 +01:00
|
|
|
throw new PrivateContentException("This video is private.");
|
|
|
|
}
|
2021-06-06 15:40:19 +02:00
|
|
|
} else if (reason.contains("age")) {
|
2021-05-29 14:43:26 +02:00
|
|
|
// No streams can be fetched, therefore throw an AgeRestrictedContentException
|
2021-04-15 18:58:59 +02:00
|
|
|
// explicitly.
|
|
|
|
throw new AgeRestrictedContentException(
|
|
|
|
"This age-restricted video cannot be watched.");
|
2021-02-14 15:22:45 +01:00
|
|
|
}
|
|
|
|
}
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
|
|
|
|
if (status.equalsIgnoreCase("unplayable") && reason != null) {
|
|
|
|
if (reason.contains("Music Premium")) {
|
|
|
|
throw new YoutubeMusicPremiumContentException();
|
|
|
|
}
|
|
|
|
if (reason.contains("payment")) {
|
|
|
|
throw new PaidContentException("This video is a paid video");
|
|
|
|
}
|
|
|
|
if (reason.contains("members-only")) {
|
|
|
|
throw new PaidContentException("This video is only available"
|
|
|
|
+ " for members of the channel of this video");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (reason.contains("unavailable")) {
|
|
|
|
final String detailedErrorMessage = getTextFromObject(newPlayabilityStatus
|
|
|
|
.getObject("errorScreen").getObject("playerErrorMessageRenderer")
|
|
|
|
.getObject("subreason"));
|
|
|
|
if (detailedErrorMessage != null && detailedErrorMessage.contains("country")) {
|
|
|
|
throw new GeographicRestrictionException(
|
|
|
|
"This video is not available in client's country.");
|
2021-02-14 15:22:45 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-02-21 17:43:15 +01:00
|
|
|
|
|
|
|
throw new ContentNotAvailableException("Got error: \"" + reason + "\"");
|
2020-03-01 02:00:33 +01:00
|
|
|
}
|
2021-05-29 14:43:26 +02:00
|
|
|
}
|
|
|
|
|
2021-06-02 21:44:51 +02:00
|
|
|
/**
|
2022-01-15 17:25:00 +01:00
|
|
|
* Fetch the Android Mobile API and assign the streaming data to the androidStreamingData JSON
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
* object.
|
2021-06-02 21:44:51 +02:00
|
|
|
*/
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
private void fetchAndroidMobileJsonPlayer(@Nonnull final ContentCountry contentCountry,
|
|
|
|
@Nonnull final Localization localization,
|
|
|
|
@Nonnull final String videoId)
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
throws IOException, ExtractionException {
|
|
|
|
final byte[] mobileBody = JsonWriter.string(prepareAndroidMobileJsonBuilder(
|
2022-03-26 19:46:10 +01:00
|
|
|
localization, contentCountry)
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
.value(VIDEO_ID, videoId)
|
|
|
|
.value(CPN, androidCpn)
|
2022-03-26 19:46:10 +01:00
|
|
|
.done())
|
2021-05-29 14:43:26 +02:00
|
|
|
.getBytes(UTF_8);
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
final JsonObject androidPlayerResponse = getJsonAndroidPostResponse(PLAYER,
|
2022-01-15 17:25:00 +01:00
|
|
|
mobileBody, localization, "&t=" + generateTParameter()
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
+ "&id=" + videoId);
|
|
|
|
|
|
|
|
final JsonObject streamingData = androidPlayerResponse.getObject(STREAMING_DATA);
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
if (!isNullOrEmpty(streamingData)) {
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
androidStreamingData = streamingData;
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
if (desktopStreamingData == null) {
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
playerResponse = androidPlayerResponse;
|
2021-05-29 14:43:26 +02:00
|
|
|
}
|
|
|
|
}
|
2017-07-11 05:08:03 +02:00
|
|
|
}
|
|
|
|
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
/**
|
2022-01-15 17:25:00 +01:00
|
|
|
* Fetch the iOS Mobile API and assign the streaming data to the iosStreamingData JSON
|
|
|
|
* object.
|
|
|
|
*/
|
|
|
|
private void fetchIosMobileJsonPlayer(@Nonnull final ContentCountry contentCountry,
|
|
|
|
@Nonnull final Localization localization,
|
|
|
|
@Nonnull final String videoId)
|
|
|
|
throws IOException, ExtractionException {
|
|
|
|
iosCpn = generateContentPlaybackNonce();
|
|
|
|
final byte[] mobileBody = JsonWriter.string(prepareIosMobileJsonBuilder(
|
|
|
|
localization, contentCountry)
|
|
|
|
.value(VIDEO_ID, videoId)
|
|
|
|
.value(CPN, iosCpn)
|
|
|
|
.done())
|
|
|
|
.getBytes(UTF_8);
|
|
|
|
|
|
|
|
final JsonObject iosPlayerResponse = getJsonIosPostResponse(PLAYER,
|
|
|
|
mobileBody, localization, "&t=" + generateTParameter()
|
|
|
|
+ "&id=" + videoId);
|
|
|
|
|
|
|
|
final JsonObject streamingData = iosPlayerResponse.getObject(STREAMING_DATA);
|
|
|
|
if (!isNullOrEmpty(streamingData)) {
|
|
|
|
iosStreamingData = streamingData;
|
|
|
|
if (desktopStreamingData == null) {
|
|
|
|
playerResponse = iosPlayerResponse;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Download the web desktop JSON player as an embed client to bypass some age-restrictions and
|
|
|
|
* assign the streaming data to the desktopStreamingData JSON object.
|
|
|
|
*
|
|
|
|
* @param contentCountry the content country to use
|
|
|
|
* @param localization the localization to use
|
|
|
|
* @param videoId the video id
|
|
|
|
* @throws IOException if something goes wrong when fetching the web desktop embed
|
|
|
|
* player endpoint
|
|
|
|
* @throws ExtractionException if something goes wrong when fetching the web desktop embed
|
|
|
|
* player endpoint
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
*/
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
private void fetchDesktopEmbedJsonPlayer(@Nonnull final ContentCountry contentCountry,
|
|
|
|
@Nonnull final Localization localization,
|
|
|
|
@Nonnull final String videoId)
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
throws IOException, ExtractionException {
|
|
|
|
if (sts == null) {
|
|
|
|
getStsFromPlayerJs();
|
|
|
|
}
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
|
|
|
|
// Because a cpn is unique to each request, we need to generate it again
|
|
|
|
desktopCpn = generateContentPlaybackNonce();
|
|
|
|
|
|
|
|
final JsonObject desktopWebEmbedPlayerResponse = getJsonPostResponse(PLAYER,
|
|
|
|
createDesktopPlayerBody(localization, contentCountry, videoId, sts, true,
|
|
|
|
desktopCpn),
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
localization);
|
|
|
|
final JsonObject streamingData = desktopWebEmbedPlayerResponse.getObject(
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
STREAMING_DATA);
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
if (!isNullOrEmpty(streamingData)) {
|
|
|
|
playerResponse = desktopWebEmbedPlayerResponse;
|
|
|
|
desktopStreamingData = streamingData;
|
2019-09-11 19:05:41 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
/**
|
2022-01-15 17:25:00 +01:00
|
|
|
* Download the Android mobile JSON player as an embed client to bypass some age-restrictions
|
|
|
|
* and assign the streaming data to the androidStreamingData JSON object.
|
|
|
|
*
|
|
|
|
* @param contentCountry the content country to use
|
|
|
|
* @param localization the localization to use
|
|
|
|
* @param videoId the video id
|
|
|
|
* @throws IOException if something goes wrong when fetching the Android embed player
|
|
|
|
* endpoint
|
|
|
|
* @throws ExtractionException if something goes wrong when fetching the Android embed player
|
|
|
|
* endpoint
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
*/
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
private void fetchAndroidEmbedJsonPlayer(@Nonnull final ContentCountry contentCountry,
|
|
|
|
@Nonnull final Localization localization,
|
|
|
|
@Nonnull final String videoId)
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
throws IOException, ExtractionException {
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
// Because a cpn is unique to each request, we need to generate it again
|
|
|
|
androidCpn = generateContentPlaybackNonce();
|
|
|
|
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
final byte[] androidMobileEmbedBody = JsonWriter.string(
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
prepareAndroidMobileEmbedVideoJsonBuilder(localization, contentCountry, videoId,
|
|
|
|
androidCpn)
|
|
|
|
.done())
|
2021-05-29 14:43:26 +02:00
|
|
|
.getBytes(UTF_8);
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
final JsonObject androidMobileEmbedPlayerResponse = getJsonAndroidPostResponse(PLAYER,
|
2022-01-15 17:25:00 +01:00
|
|
|
androidMobileEmbedBody, localization, "&t=" + generateTParameter()
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
+ "&id=" + videoId);
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
final JsonObject streamingData = androidMobileEmbedPlayerResponse.getObject(
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
STREAMING_DATA);
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
if (!isNullOrEmpty(streamingData)) {
|
|
|
|
if (desktopStreamingData == null) {
|
|
|
|
playerResponse = androidMobileEmbedPlayerResponse;
|
|
|
|
}
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
androidStreamingData = androidMobileEmbedPlayerResponse.getObject(STREAMING_DATA);
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
}
|
2021-05-29 14:43:26 +02:00
|
|
|
}
|
|
|
|
|
2022-01-15 17:25:00 +01:00
|
|
|
/**
|
|
|
|
* Download the iOS mobile JSON player as an embed client to bypass some age-restrictions and
|
|
|
|
* assign the streaming data to the iosStreamingData JSON object.
|
|
|
|
*
|
|
|
|
* @param contentCountry the content country to use
|
|
|
|
* @param localization the localization to use
|
|
|
|
* @param videoId the video id
|
|
|
|
* @throws IOException if something goes wrong when fetching the iOS embed player
|
|
|
|
* endpoint
|
|
|
|
* @throws ExtractionException if something goes wrong when fetching the iOS embed player
|
|
|
|
* endpoint
|
|
|
|
*/
|
|
|
|
private void fetchIosEmbedJsonPlayer(@Nonnull final ContentCountry contentCountry,
|
|
|
|
@Nonnull final Localization localization,
|
|
|
|
@Nonnull final String videoId)
|
|
|
|
throws IOException, ExtractionException {
|
|
|
|
// Because a cpn is unique to each request, we need to generate it again
|
|
|
|
iosCpn = generateContentPlaybackNonce();
|
|
|
|
|
|
|
|
final byte[] androidMobileEmbedBody = JsonWriter.string(
|
|
|
|
prepareIosMobileEmbedVideoJsonBuilder(localization, contentCountry, videoId,
|
|
|
|
iosCpn)
|
|
|
|
.done())
|
|
|
|
.getBytes(UTF_8);
|
|
|
|
final JsonObject iosMobileEmbedPlayerResponse = getJsonIosPostResponse(PLAYER,
|
|
|
|
androidMobileEmbedBody, localization, "&t=" + generateTParameter()
|
|
|
|
+ "&id=" + videoId);
|
|
|
|
final JsonObject streamingData = iosMobileEmbedPlayerResponse.getObject(
|
|
|
|
STREAMING_DATA);
|
|
|
|
if (!isNullOrEmpty(streamingData)) {
|
|
|
|
if (desktopStreamingData == null) {
|
|
|
|
playerResponse = iosMobileEmbedPlayerResponse;
|
|
|
|
}
|
|
|
|
iosStreamingData = iosMobileEmbedPlayerResponse.getObject(STREAMING_DATA);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
private static void storePlayerJs() throws ParsingException {
|
2021-04-15 18:58:59 +02:00
|
|
|
try {
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
playerCode = YoutubeJavaScriptExtractor.extractJavaScriptCode();
|
2021-05-29 14:43:26 +02:00
|
|
|
} catch (final Exception e) {
|
|
|
|
throw new ParsingException("Could not store JavaScript player", e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
private static String getDeobfuscationFuncName(final String thePlayerCode) throws DeobfuscateException {
|
2020-10-29 18:44:05 +01:00
|
|
|
Parser.RegexException exception = null;
|
|
|
|
for (final String regex : REGEXES) {
|
2019-07-30 20:53:23 +02:00
|
|
|
try {
|
2022-03-18 15:09:06 +01:00
|
|
|
return Parser.matchGroup1(regex, thePlayerCode);
|
2021-02-24 17:06:38 +01:00
|
|
|
} catch (final Parser.RegexException re) {
|
2020-10-29 18:44:05 +01:00
|
|
|
if (exception == null) {
|
|
|
|
exception = re;
|
|
|
|
}
|
2019-07-30 20:53:23 +02:00
|
|
|
}
|
2017-07-11 05:08:03 +02:00
|
|
|
}
|
2021-04-15 18:58:59 +02:00
|
|
|
throw new DeobfuscateException(
|
|
|
|
"Could not find deobfuscate function with any of the given patterns.", exception);
|
2017-07-11 05:08:03 +02:00
|
|
|
}
|
2017-03-01 18:47:52 +01:00
|
|
|
|
2021-06-06 18:32:07 +02:00
|
|
|
@Nonnull
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
private static String loadDeobfuscationCode() throws DeobfuscateException {
|
2017-03-01 18:47:52 +01:00
|
|
|
try {
|
2020-10-26 16:32:39 +01:00
|
|
|
final String deobfuscationFunctionName = getDeobfuscationFuncName(playerCode);
|
2017-03-01 18:47:52 +01:00
|
|
|
|
2018-09-08 07:25:07 +02:00
|
|
|
final String functionPattern = "("
|
2020-10-26 16:32:39 +01:00
|
|
|
+ deobfuscationFunctionName.replace("$", "\\$")
|
2017-03-01 18:47:52 +01:00
|
|
|
+ "=function\\([a-zA-Z0-9_]+\\)\\{.+?\\})";
|
2021-04-15 18:58:59 +02:00
|
|
|
final String deobfuscateFunction = "var " + Parser.matchGroup1(functionPattern,
|
|
|
|
playerCode) + ";";
|
2017-03-01 18:47:52 +01:00
|
|
|
|
2018-09-08 07:25:07 +02:00
|
|
|
final String helperObjectName =
|
2021-04-15 18:58:59 +02:00
|
|
|
Parser.matchGroup1(";([A-Za-z0-9_\\$]{2})\\...\\(",
|
|
|
|
deobfuscateFunction);
|
2018-09-08 07:25:07 +02:00
|
|
|
final String helperPattern =
|
2021-04-15 18:58:59 +02:00
|
|
|
"(var " + helperObjectName.replace("$", "\\$")
|
|
|
|
+ "=\\{.+?\\}\\};)";
|
2018-09-08 07:25:07 +02:00
|
|
|
final String helperObject =
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
Parser.matchGroup1(helperPattern, Objects.requireNonNull(playerCode).replace(
|
|
|
|
"\n", ""));
|
2017-03-01 18:47:52 +01:00
|
|
|
|
2018-09-08 07:25:07 +02:00
|
|
|
final String callerFunction =
|
2021-04-15 18:58:59 +02:00
|
|
|
"function " + DEOBFUSCATION_FUNC_NAME + "(a){return "
|
|
|
|
+ deobfuscationFunctionName + "(a);}";
|
2017-03-01 18:47:52 +01:00
|
|
|
|
2020-10-26 16:32:39 +01:00
|
|
|
return helperObject + deobfuscateFunction + callerFunction;
|
2021-02-24 17:06:38 +01:00
|
|
|
} catch (final Exception e) {
|
2020-10-26 16:32:39 +01:00
|
|
|
throw new DeobfuscateException("Could not parse deobfuscate function ", e);
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-29 18:44:05 +01:00
|
|
|
@Nonnull
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
private static String getDeobfuscationCode() throws ParsingException {
|
2020-10-29 18:44:05 +01:00
|
|
|
if (cachedDeobfuscationCode == null) {
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
if (isNullOrEmpty(playerCode)) {
|
|
|
|
throw new ParsingException("playerCode is null");
|
|
|
|
}
|
2021-05-29 14:43:26 +02:00
|
|
|
|
2021-07-17 19:14:57 +02:00
|
|
|
cachedDeobfuscationCode = loadDeobfuscationCode();
|
2020-10-29 18:44:05 +01:00
|
|
|
}
|
|
|
|
return cachedDeobfuscationCode;
|
|
|
|
}
|
|
|
|
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
private static void getStsFromPlayerJs() throws ParsingException {
|
2022-03-18 15:09:06 +01:00
|
|
|
if (!isNullOrEmpty(sts)) {
|
|
|
|
return;
|
|
|
|
}
|
2021-05-29 14:43:26 +02:00
|
|
|
if (playerCode == null) {
|
|
|
|
storePlayerJs();
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
if (playerCode == null) {
|
|
|
|
throw new ParsingException("playerCode is null");
|
|
|
|
}
|
2021-05-29 14:43:26 +02:00
|
|
|
}
|
|
|
|
sts = Parser.matchGroup1(STS_REGEX, playerCode);
|
|
|
|
}
|
|
|
|
|
2020-10-29 18:44:05 +01:00
|
|
|
private String deobfuscateSignature(final String obfuscatedSig) throws ParsingException {
|
|
|
|
final String deobfuscationCode = getDeobfuscationCode();
|
|
|
|
|
2020-08-15 17:08:07 +02:00
|
|
|
final Context context = Context.enter();
|
2017-03-01 18:47:52 +01:00
|
|
|
context.setOptimizationLevel(-1);
|
2020-08-15 17:08:07 +02:00
|
|
|
final Object result;
|
2017-03-01 18:47:52 +01:00
|
|
|
try {
|
2020-08-15 17:08:07 +02:00
|
|
|
final ScriptableObject scope = context.initSafeStandardObjects();
|
2020-10-26 21:22:21 +01:00
|
|
|
context.evaluateString(scope, deobfuscationCode, "deobfuscationCode", 1, null);
|
|
|
|
final Function deobfuscateFunc = (Function) scope.get(DEOBFUSCATION_FUNC_NAME, scope);
|
2020-10-26 16:32:39 +01:00
|
|
|
result = deobfuscateFunc.call(context, scope, scope, new Object[]{obfuscatedSig});
|
2021-02-24 17:06:38 +01:00
|
|
|
} catch (final Exception e) {
|
2020-10-26 16:32:39 +01:00
|
|
|
throw new DeobfuscateException("Could not get deobfuscate signature", e);
|
2017-03-01 18:47:52 +01:00
|
|
|
} finally {
|
|
|
|
Context.exit();
|
|
|
|
}
|
2020-10-19 13:47:41 +02:00
|
|
|
return Objects.toString(result, "");
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
|
|
|
|
2017-08-10 19:50:59 +02:00
|
|
|
/*//////////////////////////////////////////////////////////////////////////
|
|
|
|
// Utils
|
|
|
|
//////////////////////////////////////////////////////////////////////////*/
|
|
|
|
|
2020-02-25 09:50:22 +01:00
|
|
|
private JsonObject getVideoPrimaryInfoRenderer() throws ParsingException {
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
if (videoPrimaryInfoRenderer != null) {
|
|
|
|
return videoPrimaryInfoRenderer;
|
2022-03-18 15:09:06 +01:00
|
|
|
}
|
2020-02-28 17:03:21 +01:00
|
|
|
|
2021-05-23 17:55:19 +02:00
|
|
|
final JsonArray contents = nextResponse.getObject("contents")
|
2021-04-15 18:58:59 +02:00
|
|
|
.getObject("twoColumnWatchNextResults").getObject("results").getObject("results")
|
|
|
|
.getArray("contents");
|
2022-03-18 15:09:06 +01:00
|
|
|
JsonObject theVideoPrimaryInfoRenderer = null;
|
2020-02-25 09:50:22 +01:00
|
|
|
|
2021-02-24 17:06:38 +01:00
|
|
|
for (final Object content : contents) {
|
2020-04-16 16:08:14 +02:00
|
|
|
if (((JsonObject) content).has("videoPrimaryInfoRenderer")) {
|
2022-03-18 15:09:06 +01:00
|
|
|
theVideoPrimaryInfoRenderer = ((JsonObject) content)
|
2021-04-15 18:58:59 +02:00
|
|
|
.getObject("videoPrimaryInfoRenderer");
|
2020-02-25 09:50:22 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
if (isNullOrEmpty(theVideoPrimaryInfoRenderer)) {
|
2020-02-25 09:50:22 +01:00
|
|
|
throw new ParsingException("Could not find videoPrimaryInfoRenderer");
|
|
|
|
}
|
|
|
|
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
videoPrimaryInfoRenderer = theVideoPrimaryInfoRenderer;
|
2022-03-18 15:09:06 +01:00
|
|
|
return theVideoPrimaryInfoRenderer;
|
2020-02-25 09:50:22 +01:00
|
|
|
}
|
|
|
|
|
2020-02-25 10:05:53 +01:00
|
|
|
private JsonObject getVideoSecondaryInfoRenderer() throws ParsingException {
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
if (videoSecondaryInfoRenderer != null) {
|
|
|
|
return videoSecondaryInfoRenderer;
|
2022-03-18 15:09:06 +01:00
|
|
|
}
|
2020-02-28 17:03:21 +01:00
|
|
|
|
2021-05-23 17:55:19 +02:00
|
|
|
final JsonArray contents = nextResponse.getObject("contents")
|
2021-04-15 18:58:59 +02:00
|
|
|
.getObject("twoColumnWatchNextResults").getObject("results").getObject("results")
|
|
|
|
.getArray("contents");
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
JsonObject theVideoSecondaryInfoRenderer = null;
|
2020-02-25 10:05:53 +01:00
|
|
|
|
2021-02-24 17:06:38 +01:00
|
|
|
for (final Object content : contents) {
|
2020-04-16 16:08:14 +02:00
|
|
|
if (((JsonObject) content).has("videoSecondaryInfoRenderer")) {
|
2022-03-18 15:09:06 +01:00
|
|
|
theVideoSecondaryInfoRenderer = ((JsonObject) content)
|
2021-04-15 18:58:59 +02:00
|
|
|
.getObject("videoSecondaryInfoRenderer");
|
2020-02-25 10:05:53 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
if (isNullOrEmpty(theVideoSecondaryInfoRenderer)) {
|
2020-02-25 10:05:53 +01:00
|
|
|
throw new ParsingException("Could not find videoSecondaryInfoRenderer");
|
|
|
|
}
|
|
|
|
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
videoSecondaryInfoRenderer = theVideoSecondaryInfoRenderer;
|
2022-03-18 15:09:06 +01:00
|
|
|
return theVideoSecondaryInfoRenderer;
|
2020-02-25 10:05:53 +01:00
|
|
|
}
|
|
|
|
|
2021-06-06 18:32:07 +02:00
|
|
|
@Nonnull
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
private Map<String, ItagItem> getItags(@Nonnull final String streamingDataKey,
|
|
|
|
@Nonnull final ItagItem.ItagType itagTypeWanted) {
|
2020-10-29 18:44:05 +01:00
|
|
|
final Map<String, ItagItem> urlAndItags = new LinkedHashMap<>();
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
if (desktopStreamingData == null && androidStreamingData == null) {
|
2019-09-12 15:08:17 +02:00
|
|
|
return urlAndItags;
|
2017-08-10 19:50:59 +02:00
|
|
|
}
|
|
|
|
|
2022-01-15 17:25:00 +01:00
|
|
|
// Use the androidStreamingData object first because there is no n param and no
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
// signatureCiphers in streaming URLs of the Android client
|
|
|
|
urlAndItags.putAll(getStreamsFromStreamingDataKey(
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
androidStreamingData, streamingDataKey, itagTypeWanted, androidCpn));
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
urlAndItags.putAll(getStreamsFromStreamingDataKey(
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
desktopStreamingData, streamingDataKey, itagTypeWanted, desktopCpn));
|
2022-01-15 17:25:00 +01:00
|
|
|
// Use the iosStreamingData object in the last position because most of the available
|
|
|
|
// streams can be extracted with the Android and web clients and also because the iOS
|
|
|
|
// client is only enabled by default on livestreams
|
|
|
|
urlAndItags.putAll(getStreamsFromStreamingDataKey(
|
|
|
|
iosStreamingData, streamingDataKey, itagTypeWanted, androidCpn));
|
2020-09-29 10:48:02 +02:00
|
|
|
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
return urlAndItags;
|
|
|
|
}
|
|
|
|
|
|
|
|
@Nonnull
|
|
|
|
private Map<String, ItagItem> getStreamsFromStreamingDataKey(
|
|
|
|
final JsonObject streamingData,
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
@Nonnull final String streamingDataKey,
|
|
|
|
@Nonnull final ItagItem.ItagType itagTypeWanted,
|
|
|
|
@Nonnull final String contentPlaybackNonce) {
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
|
|
|
|
final Map<String, ItagItem> urlAndItagsFromStreamingDataObject = new LinkedHashMap<>();
|
|
|
|
if (streamingData != null && streamingData.has(streamingDataKey)) {
|
|
|
|
final JsonArray formats = streamingData.getArray(streamingDataKey);
|
|
|
|
for (int i = 0; i != formats.size(); ++i) {
|
2022-03-18 15:09:06 +01:00
|
|
|
final JsonObject formatData = formats.getObject(i);
|
|
|
|
final int itag = formatData.getInt("itag");
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
|
|
|
|
if (ItagItem.isSupported(itag)) {
|
|
|
|
try {
|
|
|
|
final ItagItem itagItem = ItagItem.getItag(itag);
|
|
|
|
if (itagItem.itagType == itagTypeWanted) {
|
|
|
|
// Ignore streams that are delivered using YouTube's OTF format,
|
|
|
|
// as those only work with DASH and not with progressive HTTP.
|
|
|
|
if (formatData.getString("type", EMPTY_STRING)
|
|
|
|
.equalsIgnoreCase("FORMAT_STREAM_TYPE_OTF")) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
final String streamUrl;
|
|
|
|
if (formatData.has("url")) {
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
streamUrl = formatData.getString("url") + "&cpn="
|
|
|
|
+ contentPlaybackNonce;
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
} else {
|
|
|
|
// This url has an obfuscated signature
|
|
|
|
final String cipherString = formatData.has("cipher")
|
|
|
|
? formatData.getString("cipher")
|
|
|
|
: formatData.getString("signatureCipher");
|
|
|
|
final Map<String, String> cipher = Parser.compatParseMap(
|
|
|
|
cipherString);
|
|
|
|
streamUrl = cipher.get("url") + "&" + cipher.get("sp") + "="
|
|
|
|
+ deobfuscateSignature(cipher.get("s"));
|
|
|
|
}
|
2019-09-11 19:56:16 +02:00
|
|
|
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
final JsonObject initRange = formatData.getObject("initRange");
|
|
|
|
final JsonObject indexRange = formatData.getObject("indexRange");
|
|
|
|
final String mimeType = formatData.getString("mimeType", EMPTY_STRING);
|
|
|
|
final String codec = mimeType.contains("codecs")
|
|
|
|
? mimeType.split("\"")[1] : EMPTY_STRING;
|
|
|
|
|
|
|
|
itagItem.setBitrate(formatData.getInt("bitrate"));
|
|
|
|
itagItem.setWidth(formatData.getInt("width"));
|
|
|
|
itagItem.setHeight(formatData.getInt("height"));
|
|
|
|
itagItem.setInitStart(Integer.parseInt(initRange.getString("start",
|
|
|
|
"-1")));
|
|
|
|
itagItem.setInitEnd(Integer.parseInt(initRange.getString("end",
|
|
|
|
"-1")));
|
|
|
|
itagItem.setIndexStart(Integer.parseInt(indexRange.getString("start",
|
|
|
|
"-1")));
|
|
|
|
itagItem.setIndexEnd(Integer.parseInt(indexRange.getString("end",
|
|
|
|
"-1")));
|
|
|
|
itagItem.fps = formatData.getInt("fps");
|
|
|
|
itagItem.setQuality(formatData.getString("quality"));
|
|
|
|
itagItem.setCodec(codec);
|
|
|
|
|
|
|
|
urlAndItagsFromStreamingDataObject.put(streamUrl, itagItem);
|
|
|
|
}
|
|
|
|
} catch (final UnsupportedEncodingException | ParsingException ignored) {
|
2017-08-10 19:50:59 +02:00
|
|
|
}
|
2020-10-26 16:32:39 +01:00
|
|
|
}
|
2017-08-10 19:50:59 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
return urlAndItagsFromStreamingDataObject;
|
2017-08-10 19:50:59 +02:00
|
|
|
}
|
|
|
|
|
2020-02-08 23:58:46 +01:00
|
|
|
@Nonnull
|
|
|
|
@Override
|
|
|
|
public List<Frameset> getFrames() throws ExtractionException {
|
|
|
|
try {
|
2020-04-09 15:52:42 +02:00
|
|
|
final JsonObject storyboards = playerResponse.getObject("storyboards");
|
|
|
|
final JsonObject storyboardsRenderer;
|
|
|
|
if (storyboards.has("playerLiveStoryboardSpecRenderer")) {
|
|
|
|
storyboardsRenderer = storyboards.getObject("playerLiveStoryboardSpecRenderer");
|
|
|
|
} else {
|
|
|
|
storyboardsRenderer = storyboards.getObject("playerStoryboardSpecRenderer");
|
|
|
|
}
|
|
|
|
|
2021-06-08 20:30:13 +02:00
|
|
|
if (storyboardsRenderer == null) {
|
2021-06-15 21:58:00 +02:00
|
|
|
return Collections.emptyList();
|
2021-06-08 20:30:13 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
final String storyboardsRendererSpec = storyboardsRenderer.getString("spec");
|
|
|
|
if (storyboardsRendererSpec == null) {
|
2021-06-15 21:58:00 +02:00
|
|
|
return Collections.emptyList();
|
2021-06-08 20:30:13 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
final String[] spec = storyboardsRendererSpec.split("\\|");
|
2020-02-08 23:58:46 +01:00
|
|
|
final String url = spec[0];
|
|
|
|
final ArrayList<Frameset> result = new ArrayList<>(spec.length - 1);
|
2020-04-09 15:52:42 +02:00
|
|
|
|
2020-02-08 23:58:46 +01:00
|
|
|
for (int i = 1; i < spec.length; ++i) {
|
|
|
|
final String[] parts = spec[i].split("#");
|
2021-01-14 20:01:06 +01:00
|
|
|
if (parts.length != 8 || Integer.parseInt(parts[5]) == 0) {
|
2020-02-08 23:58:46 +01:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
final int frameWidth = Integer.parseInt(parts[0]);
|
|
|
|
final int frameHeight = Integer.parseInt(parts[1]);
|
|
|
|
final int totalCount = Integer.parseInt(parts[2]);
|
|
|
|
final int framesPerPageX = Integer.parseInt(parts[3]);
|
|
|
|
final int framesPerPageY = Integer.parseInt(parts[4]);
|
2021-04-15 18:58:59 +02:00
|
|
|
final String baseUrl = url.replace("$L", String.valueOf(i - 1))
|
|
|
|
.replace("$N", parts[6]) + "&sigh=" + parts[7];
|
2020-02-08 23:58:46 +01:00
|
|
|
final List<String> urls;
|
|
|
|
if (baseUrl.contains("$M")) {
|
2021-04-15 18:58:59 +02:00
|
|
|
final int totalPages = (int) Math.ceil(totalCount / (double)
|
|
|
|
(framesPerPageX * framesPerPageY));
|
2020-02-08 23:58:46 +01:00
|
|
|
urls = new ArrayList<>(totalPages);
|
|
|
|
for (int j = 0; j < totalPages; j++) {
|
|
|
|
urls.add(baseUrl.replace("$M", String.valueOf(j)));
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
urls = Collections.singletonList(baseUrl);
|
|
|
|
}
|
|
|
|
result.add(new Frameset(
|
|
|
|
urls,
|
|
|
|
frameWidth,
|
|
|
|
frameHeight,
|
|
|
|
totalCount,
|
2021-01-14 20:01:06 +01:00
|
|
|
Integer.parseInt(parts[5]),
|
2020-02-08 23:58:46 +01:00
|
|
|
framesPerPageX,
|
|
|
|
framesPerPageY
|
|
|
|
));
|
|
|
|
}
|
|
|
|
result.trimToSize();
|
|
|
|
return result;
|
2021-02-24 17:06:38 +01:00
|
|
|
} catch (final Exception e) {
|
2021-05-29 14:43:26 +02:00
|
|
|
throw new ExtractionException("Could not get frames", e);
|
2020-02-08 23:58:46 +01:00
|
|
|
}
|
|
|
|
}
|
added metadata, fix descriptions, fix thumbnail, update tests
thumbnail: quality before: https://peertube.cpy.re/static/thumbnails/d2a5ec78-5f85-4090-8ec5-dc1102e022ea.jpg
quality after: https://peertube.cpy.re/static/previews/d2a5ec78-5f85-4090-8ec5-dc1102e022ea.jpg
description: we were getting about the first 260 characters, we now get full description (with fallback to first 260 chars if the get request for full description fails)
test: updated tests to match description, also changed some test: it was assertEquals(extracted, expected), but the proper way to do it is assertEquals(expected, extracted)
metadata: got host, privacy (public, private, unlisted), licence, language, tags
2020-01-19 12:45:52 +01:00
|
|
|
|
2020-02-25 09:07:22 +01:00
|
|
|
@Nonnull
|
added metadata, fix descriptions, fix thumbnail, update tests
thumbnail: quality before: https://peertube.cpy.re/static/thumbnails/d2a5ec78-5f85-4090-8ec5-dc1102e022ea.jpg
quality after: https://peertube.cpy.re/static/previews/d2a5ec78-5f85-4090-8ec5-dc1102e022ea.jpg
description: we were getting about the first 260 characters, we now get full description (with fallback to first 260 chars if the get request for full description fails)
test: updated tests to match description, also changed some test: it was assertEquals(extracted, expected), but the proper way to do it is assertEquals(expected, extracted)
metadata: got host, privacy (public, private, unlisted), licence, language, tags
2020-01-19 12:45:52 +01:00
|
|
|
@Override
|
2020-02-09 11:59:23 +01:00
|
|
|
public Privacy getPrivacy() {
|
2021-04-15 18:58:59 +02:00
|
|
|
final boolean isUnlisted = playerResponse.getObject("microformat")
|
|
|
|
.getObject("playerMicroformatRenderer").getBoolean("isUnlisted");
|
2020-02-09 11:59:23 +01:00
|
|
|
return isUnlisted ? Privacy.UNLISTED : Privacy.PUBLIC;
|
added metadata, fix descriptions, fix thumbnail, update tests
thumbnail: quality before: https://peertube.cpy.re/static/thumbnails/d2a5ec78-5f85-4090-8ec5-dc1102e022ea.jpg
quality after: https://peertube.cpy.re/static/previews/d2a5ec78-5f85-4090-8ec5-dc1102e022ea.jpg
description: we were getting about the first 260 characters, we now get full description (with fallback to first 260 chars if the get request for full description fails)
test: updated tests to match description, also changed some test: it was assertEquals(extracted, expected), but the proper way to do it is assertEquals(expected, extracted)
metadata: got host, privacy (public, private, unlisted), licence, language, tags
2020-01-19 12:45:52 +01:00
|
|
|
}
|
|
|
|
|
2020-02-25 09:07:22 +01:00
|
|
|
@Nonnull
|
added metadata, fix descriptions, fix thumbnail, update tests
thumbnail: quality before: https://peertube.cpy.re/static/thumbnails/d2a5ec78-5f85-4090-8ec5-dc1102e022ea.jpg
quality after: https://peertube.cpy.re/static/previews/d2a5ec78-5f85-4090-8ec5-dc1102e022ea.jpg
description: we were getting about the first 260 characters, we now get full description (with fallback to first 260 chars if the get request for full description fails)
test: updated tests to match description, also changed some test: it was assertEquals(extracted, expected), but the proper way to do it is assertEquals(expected, extracted)
metadata: got host, privacy (public, private, unlisted), licence, language, tags
2020-01-19 12:45:52 +01:00
|
|
|
@Override
|
2020-02-25 09:07:22 +01:00
|
|
|
public String getCategory() {
|
2021-04-15 18:58:59 +02:00
|
|
|
return playerResponse.getObject("microformat").getObject("playerMicroformatRenderer")
|
2021-06-24 18:39:16 +02:00
|
|
|
.getString("category", EMPTY_STRING);
|
added metadata, fix descriptions, fix thumbnail, update tests
thumbnail: quality before: https://peertube.cpy.re/static/thumbnails/d2a5ec78-5f85-4090-8ec5-dc1102e022ea.jpg
quality after: https://peertube.cpy.re/static/previews/d2a5ec78-5f85-4090-8ec5-dc1102e022ea.jpg
description: we were getting about the first 260 characters, we now get full description (with fallback to first 260 chars if the get request for full description fails)
test: updated tests to match description, also changed some test: it was assertEquals(extracted, expected), but the proper way to do it is assertEquals(expected, extracted)
metadata: got host, privacy (public, private, unlisted), licence, language, tags
2020-01-19 12:45:52 +01:00
|
|
|
}
|
|
|
|
|
2020-02-25 09:07:22 +01:00
|
|
|
@Nonnull
|
added metadata, fix descriptions, fix thumbnail, update tests
thumbnail: quality before: https://peertube.cpy.re/static/thumbnails/d2a5ec78-5f85-4090-8ec5-dc1102e022ea.jpg
quality after: https://peertube.cpy.re/static/previews/d2a5ec78-5f85-4090-8ec5-dc1102e022ea.jpg
description: we were getting about the first 260 characters, we now get full description (with fallback to first 260 chars if the get request for full description fails)
test: updated tests to match description, also changed some test: it was assertEquals(extracted, expected), but the proper way to do it is assertEquals(expected, extracted)
metadata: got host, privacy (public, private, unlisted), licence, language, tags
2020-01-19 12:45:52 +01:00
|
|
|
@Override
|
2020-02-09 11:59:23 +01:00
|
|
|
public String getLicence() throws ParsingException {
|
|
|
|
final JsonObject metadataRowRenderer = getVideoSecondaryInfoRenderer()
|
2021-04-15 18:58:59 +02:00
|
|
|
.getObject("metadataRowContainer").getObject("metadataRowContainerRenderer")
|
|
|
|
.getArray("rows")
|
2020-02-09 11:59:23 +01:00
|
|
|
.getObject(0).getObject("metadataRowRenderer");
|
|
|
|
|
|
|
|
final JsonArray contents = metadataRowRenderer.getArray("contents");
|
|
|
|
final String license = getTextFromObject(contents.getObject(0));
|
2021-05-29 14:43:26 +02:00
|
|
|
return license != null && "Licence".equals(getTextFromObject(metadataRowRenderer
|
|
|
|
.getObject("title"))) ? license : "YouTube licence";
|
added metadata, fix descriptions, fix thumbnail, update tests
thumbnail: quality before: https://peertube.cpy.re/static/thumbnails/d2a5ec78-5f85-4090-8ec5-dc1102e022ea.jpg
quality after: https://peertube.cpy.re/static/previews/d2a5ec78-5f85-4090-8ec5-dc1102e022ea.jpg
description: we were getting about the first 260 characters, we now get full description (with fallback to first 260 chars if the get request for full description fails)
test: updated tests to match description, also changed some test: it was assertEquals(extracted, expected), but the proper way to do it is assertEquals(expected, extracted)
metadata: got host, privacy (public, private, unlisted), licence, language, tags
2020-01-19 12:45:52 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2020-02-25 09:07:22 +01:00
|
|
|
public Locale getLanguageInfo() {
|
2020-01-25 13:16:42 +01:00
|
|
|
return null;
|
added metadata, fix descriptions, fix thumbnail, update tests
thumbnail: quality before: https://peertube.cpy.re/static/thumbnails/d2a5ec78-5f85-4090-8ec5-dc1102e022ea.jpg
quality after: https://peertube.cpy.re/static/previews/d2a5ec78-5f85-4090-8ec5-dc1102e022ea.jpg
description: we were getting about the first 260 characters, we now get full description (with fallback to first 260 chars if the get request for full description fails)
test: updated tests to match description, also changed some test: it was assertEquals(extracted, expected), but the proper way to do it is assertEquals(expected, extracted)
metadata: got host, privacy (public, private, unlisted), licence, language, tags
2020-01-19 12:45:52 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
@Nonnull
|
|
|
|
@Override
|
2020-02-25 09:07:22 +01:00
|
|
|
public List<String> getTags() {
|
2021-04-15 18:58:59 +02:00
|
|
|
return JsonUtils.getStringListFromJsonArray(playerResponse.getObject("videoDetails")
|
|
|
|
.getArray("keywords"));
|
2020-01-23 14:19:22 +01:00
|
|
|
}
|
2020-12-12 10:24:29 +01:00
|
|
|
|
|
|
|
@Nonnull
|
|
|
|
@Override
|
|
|
|
public List<StreamSegment> getStreamSegments() throws ParsingException {
|
|
|
|
final ArrayList<StreamSegment> segments = new ArrayList<>();
|
2021-05-23 17:55:19 +02:00
|
|
|
if (nextResponse.has("engagementPanels")) {
|
|
|
|
final JsonArray panels = nextResponse.getArray("engagementPanels");
|
2020-12-12 10:24:29 +01:00
|
|
|
JsonArray segmentsArray = null;
|
|
|
|
|
|
|
|
// Search for correct panel containing the data
|
|
|
|
for (int i = 0; i < panels.size(); i++) {
|
2021-04-15 18:58:59 +02:00
|
|
|
final String panelIdentifier = panels.getObject(i)
|
|
|
|
.getObject("engagementPanelSectionListRenderer")
|
2021-02-03 22:07:34 +01:00
|
|
|
.getString("panelIdentifier");
|
2022-02-18 20:22:28 +01:00
|
|
|
// panelIdentifier might be null if the panel has something to do with ads
|
|
|
|
// See https://github.com/TeamNewPipe/NewPipe/issues/7792#issuecomment-1030900188
|
2022-02-16 23:39:26 +01:00
|
|
|
if ("engagement-panel-macro-markers-description-chapters".equals(panelIdentifier)) {
|
2021-04-15 18:58:59 +02:00
|
|
|
segmentsArray = panels.getObject(i)
|
|
|
|
.getObject("engagementPanelSectionListRenderer").getObject("content")
|
|
|
|
.getObject("macroMarkersListRenderer").getArray("contents");
|
2020-12-12 10:24:29 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (segmentsArray != null) {
|
|
|
|
final long duration = getLength();
|
|
|
|
for (final Object object : segmentsArray) {
|
2021-04-15 18:58:59 +02:00
|
|
|
final JsonObject segmentJson = ((JsonObject) object)
|
|
|
|
.getObject("macroMarkersListItemRenderer");
|
2020-12-12 10:24:29 +01:00
|
|
|
|
2021-04-15 18:58:59 +02:00
|
|
|
final int startTimeSeconds = segmentJson.getObject("onTap")
|
2021-05-29 14:43:26 +02:00
|
|
|
.getObject("watchEndpoint").getInt("startTimeSeconds", -1);
|
2020-12-12 10:24:29 +01:00
|
|
|
|
|
|
|
if (startTimeSeconds == -1) {
|
|
|
|
throw new ParsingException("Could not get stream segment start time.");
|
|
|
|
}
|
|
|
|
if (startTimeSeconds > duration) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
final String title = getTextFromObject(segmentJson.getObject("title"));
|
|
|
|
if (isNullOrEmpty(title)) {
|
|
|
|
throw new ParsingException("Could not get stream segment title.");
|
|
|
|
}
|
|
|
|
|
|
|
|
final StreamSegment segment = new StreamSegment(title, startTimeSeconds);
|
|
|
|
segment.setUrl(getUrl() + "?t=" + startTimeSeconds);
|
|
|
|
if (segmentJson.has("thumbnail")) {
|
2021-04-15 18:58:59 +02:00
|
|
|
final JsonArray previewsArray = segmentJson.getObject("thumbnail")
|
|
|
|
.getArray("thumbnails");
|
2020-12-12 10:24:29 +01:00
|
|
|
if (!previewsArray.isEmpty()) {
|
2021-04-15 18:58:59 +02:00
|
|
|
// Assume that the thumbnail with the highest resolution is at the
|
|
|
|
// last position
|
|
|
|
final String url = previewsArray
|
|
|
|
.getObject(previewsArray.size() - 1).getString("url");
|
2020-12-12 15:00:45 +01:00
|
|
|
segment.setPreviewUrl(fixThumbnailUrl(url));
|
2020-12-12 10:24:29 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
segments.add(segment);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return segments;
|
|
|
|
}
|
2020-12-20 19:54:12 +01:00
|
|
|
|
|
|
|
@Nonnull
|
|
|
|
@Override
|
|
|
|
public List<MetaInfo> getMetaInfo() throws ParsingException {
|
|
|
|
return YoutubeParsingHelper.getMetaInfo(
|
2021-05-23 17:55:19 +02:00
|
|
|
nextResponse.getObject("contents").getObject("twoColumnWatchNextResults")
|
2021-02-07 22:42:21 +01:00
|
|
|
.getObject("results").getObject("results").getArray("contents"));
|
2020-12-20 19:54:12 +01:00
|
|
|
}
|
2021-06-06 18:32:07 +02:00
|
|
|
|
|
|
|
/**
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
* Reset YouTube's deobfuscation code.
|
2021-06-06 18:32:07 +02:00
|
|
|
* <p>
|
|
|
|
* This is needed for mocks in YouTube stream tests, because when they are ran, the
|
|
|
|
* {@code signatureTimestamp} is known (the {@code sts} string) so a different body than the
|
|
|
|
* body present in the mocks is send by the extractor instance. As a result, running all
|
|
|
|
* YouTube stream tests with the MockDownloader (like the CI does) will fail if this method is
|
|
|
|
* not called before fetching the page of a test.
|
|
|
|
* </p>
|
|
|
|
*/
|
|
|
|
public static void resetDeobfuscationCode() {
|
|
|
|
cachedDeobfuscationCode = null;
|
|
|
|
playerCode = null;
|
|
|
|
sts = null;
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
YoutubeJavaScriptExtractor.resetJavaScriptCode();
|
2021-06-06 18:32:07 +02:00
|
|
|
}
|
2022-01-15 17:25:00 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Enable or disable the fetch of the Android client for all stream types.
|
|
|
|
*
|
|
|
|
* <p>
|
|
|
|
* By default, the fetch of the Android client will be made only on videos, in order to reduce
|
|
|
|
* data usage, because available streams of the Android client will be almost equal to the ones
|
|
|
|
* available on the web client.
|
|
|
|
* </p>
|
|
|
|
*
|
|
|
|
* <p>
|
|
|
|
* Enabling this option will allow you to get a 48kbps audio
|
|
|
|
* stream on livestreams without fetching the DASH manifest returned in YouTube's player
|
|
|
|
* response.
|
|
|
|
* </p>
|
|
|
|
* @param forceFetchOfAndroidClientValue whether to always fetch the Android client and not
|
|
|
|
* only for videos
|
|
|
|
*/
|
|
|
|
public static void forceFetchOfAndroidClient(final boolean forceFetchOfAndroidClientValue) {
|
|
|
|
isAndroidClientFetchForced = forceFetchOfAndroidClientValue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Enable or disable the fetch of the iOS client for all stream types.
|
|
|
|
*
|
|
|
|
* <p>
|
|
|
|
* By default, the fetch of the iOS client will be made only on livestreams, in order to get an
|
|
|
|
* HLS manifest with separated audio and video.
|
|
|
|
* </p>
|
|
|
|
* <p>
|
|
|
|
* Enabling this option will allow you to get an
|
|
|
|
* HLS manifest also for videos.
|
|
|
|
* </p>
|
|
|
|
* @param forceFetchOfIosClientValue whether to always fetch the iOS client and not only for
|
|
|
|
* livestreams
|
|
|
|
*/
|
|
|
|
public static void forceFetchOfIosClient(final boolean forceFetchOfIosClientValue) {
|
|
|
|
isIosClientFetchForced = forceFetchOfIosClientValue;
|
|
|
|
}
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|