2020-04-10 10:51:05 +02:00
|
|
|
package org.schabi.newpipe.extractor.services.youtube;
|
2017-03-01 18:47:52 +01:00
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
import static org.schabi.newpipe.extractor.NewPipe.getDownloader;
|
|
|
|
import static org.schabi.newpipe.extractor.utils.Utils.EMPTY_STRING;
|
|
|
|
import static org.schabi.newpipe.extractor.utils.Utils.HTTP;
|
|
|
|
import static org.schabi.newpipe.extractor.utils.Utils.HTTPS;
|
|
|
|
import static org.schabi.newpipe.extractor.utils.Utils.UTF_8;
|
|
|
|
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
|
|
|
|
|
2021-03-03 19:49:26 +01:00
|
|
|
import com.grack.nanojson.JsonArray;
|
2021-04-02 21:34:47 +02:00
|
|
|
import com.grack.nanojson.JsonBuilder;
|
2021-03-03 19:49:26 +01:00
|
|
|
import com.grack.nanojson.JsonObject;
|
|
|
|
import com.grack.nanojson.JsonParser;
|
|
|
|
import com.grack.nanojson.JsonParserException;
|
|
|
|
import com.grack.nanojson.JsonWriter;
|
|
|
|
|
2020-12-15 17:21:21 +01:00
|
|
|
import org.schabi.newpipe.extractor.MetaInfo;
|
2019-04-28 22:03:16 +02:00
|
|
|
import org.schabi.newpipe.extractor.downloader.Response;
|
2022-03-18 15:09:06 +01:00
|
|
|
import org.schabi.newpipe.extractor.exceptions.AccountTerminatedException;
|
|
|
|
import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
|
|
|
|
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
|
|
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
|
|
|
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
2021-04-30 19:06:56 +02:00
|
|
|
import org.schabi.newpipe.extractor.localization.ContentCountry;
|
2020-02-29 16:55:07 +01:00
|
|
|
import org.schabi.newpipe.extractor.localization.Localization;
|
2022-02-17 17:19:54 +01:00
|
|
|
import org.schabi.newpipe.extractor.playlist.PlaylistInfo;
|
2020-12-15 17:21:21 +01:00
|
|
|
import org.schabi.newpipe.extractor.stream.Description;
|
2021-03-04 18:58:51 +01:00
|
|
|
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
2020-02-22 23:51:02 +01:00
|
|
|
import org.schabi.newpipe.extractor.utils.Parser;
|
2020-02-28 09:36:33 +01:00
|
|
|
import org.schabi.newpipe.extractor.utils.Utils;
|
2017-03-01 18:47:52 +01:00
|
|
|
|
2020-02-29 16:42:04 +01:00
|
|
|
import java.io.IOException;
|
2020-02-27 17:39:23 +01:00
|
|
|
import java.io.UnsupportedEncodingException;
|
2020-04-01 16:01:21 +02:00
|
|
|
import java.net.MalformedURLException;
|
2019-01-13 12:52:07 +01:00
|
|
|
import java.net.URL;
|
2020-02-27 17:39:23 +01:00
|
|
|
import java.net.URLDecoder;
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
import java.nio.charset.StandardCharsets;
|
|
|
|
import java.security.SecureRandom;
|
2020-11-03 11:54:46 +01:00
|
|
|
import java.time.LocalDate;
|
2020-10-18 05:48:14 +02:00
|
|
|
import java.time.OffsetDateTime;
|
2020-11-03 11:54:46 +01:00
|
|
|
import java.time.ZoneOffset;
|
2020-10-18 05:48:14 +02:00
|
|
|
import java.time.format.DateTimeParseException;
|
2022-03-18 15:09:06 +01:00
|
|
|
import java.util.ArrayList;
|
2021-12-11 16:52:17 +01:00
|
|
|
import java.util.Arrays;
|
2022-03-18 15:09:06 +01:00
|
|
|
import java.util.Collections;
|
|
|
|
import java.util.HashMap;
|
|
|
|
import java.util.List;
|
|
|
|
import java.util.Map;
|
|
|
|
import java.util.Objects;
|
|
|
|
import java.util.Optional;
|
|
|
|
import java.util.Random;
|
2021-01-17 18:48:16 +01:00
|
|
|
|
|
|
|
import javax.annotation.Nonnull;
|
|
|
|
import javax.annotation.Nullable;
|
2020-02-26 15:22:59 +01:00
|
|
|
|
2017-06-29 20:12:55 +02:00
|
|
|
/*
|
2017-03-01 18:47:52 +01:00
|
|
|
* Created by Christian Schabesberger on 02.03.16.
|
|
|
|
*
|
|
|
|
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
|
|
|
|
* YoutubeParsingHelper.java is part of NewPipe.
|
|
|
|
*
|
|
|
|
* NewPipe is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* NewPipe is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
public final class YoutubeParsingHelper {
|
2017-03-01 18:47:52 +01:00
|
|
|
|
|
|
|
private YoutubeParsingHelper() {
|
|
|
|
}
|
|
|
|
|
2021-05-30 17:23:51 +02:00
|
|
|
public static final String YOUTUBEI_V1_URL = "https://www.youtube.com/youtubei/v1/";
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
public static final String CPN = "cpn";
|
|
|
|
public static final String VIDEO_ID = "videoId";
|
2021-05-30 17:23:51 +02:00
|
|
|
|
2021-12-11 16:52:17 +01:00
|
|
|
private static final String HARDCODED_CLIENT_VERSION = "2.20220107.00.00";
|
2021-04-12 18:24:32 +02:00
|
|
|
private static final String HARDCODED_KEY = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8";
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
|
|
|
|
private static final String ANDROID_YOUTUBE_KEY = "AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w";
|
2021-12-11 16:52:17 +01:00
|
|
|
private static final String MOBILE_YOUTUBE_CLIENT_VERSION = "16.49.37";
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
|
2020-02-26 15:22:59 +01:00
|
|
|
private static String clientVersion;
|
2020-07-26 12:00:56 +02:00
|
|
|
private static String key;
|
|
|
|
|
2021-06-05 13:51:56 +02:00
|
|
|
private static final String[] HARDCODED_YOUTUBE_MUSIC_KEY =
|
2021-12-11 16:52:17 +01:00
|
|
|
{"AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30", "67", "1.20220103.00.00"};
|
2021-06-06 15:39:45 +02:00
|
|
|
private static String[] youtubeMusicKey;
|
2020-03-17 11:33:39 +01:00
|
|
|
|
2021-04-12 18:24:32 +02:00
|
|
|
private static boolean keyAndVersionExtracted = false;
|
2021-07-09 18:23:46 +02:00
|
|
|
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
|
|
|
|
private static Optional<Boolean> hardcodedClientVersionAndKeyValid = Optional.empty();
|
2021-04-12 18:24:32 +02:00
|
|
|
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
private static final String CONTENT_PLAYBACK_NONCE_ALPHABET =
|
|
|
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
|
|
|
|
|
2021-04-08 16:36:55 +02:00
|
|
|
private static Random numberGenerator = new Random();
|
|
|
|
|
2021-04-07 12:25:59 +02:00
|
|
|
/**
|
|
|
|
* <code>PENDING+</code> means that the user did not yet submit their choices.
|
|
|
|
* Therefore, YouTube & Google should not track the user, because they did not give consent.
|
|
|
|
* The three digits at the end can be random, but are required.
|
|
|
|
*/
|
2021-04-09 11:51:54 +02:00
|
|
|
private static final String CONSENT_COOKIE_VALUE = "PENDING+";
|
2021-04-07 12:25:59 +02:00
|
|
|
/**
|
|
|
|
* Youtube <code>CONSENT</code> cookie. Should prevent redirect to consent.youtube.com
|
|
|
|
*/
|
2021-04-09 11:51:54 +02:00
|
|
|
private static final String CONSENT_COOKIE = "CONSENT=" + CONSENT_COOKIE_VALUE;
|
2021-04-07 12:25:59 +02:00
|
|
|
|
2021-04-25 18:54:26 +02:00
|
|
|
private static final String FEED_BASE_CHANNEL_ID =
|
|
|
|
"https://www.youtube.com/feeds/videos.xml?channel_id=";
|
2019-12-16 08:35:44 +01:00
|
|
|
private static final String FEED_BASE_USER = "https://www.youtube.com/feeds/videos.xml?user=";
|
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
private static boolean isGoogleURL(final String url) {
|
|
|
|
final String cachedUrl = extractCachedUrlIfNeeded(url);
|
2020-12-20 19:54:12 +01:00
|
|
|
try {
|
2022-03-18 15:09:06 +01:00
|
|
|
final URL u = new URL(cachedUrl);
|
2020-12-20 19:54:12 +01:00
|
|
|
final String host = u.getHost();
|
2021-06-24 18:39:16 +02:00
|
|
|
return host.startsWith("google.")
|
|
|
|
|| host.startsWith("m.google.")
|
2021-04-26 11:49:47 +02:00
|
|
|
|| host.startsWith("www.google.");
|
2021-04-08 16:17:59 +02:00
|
|
|
} catch (final MalformedURLException e) {
|
2020-12-20 19:54:12 +01:00
|
|
|
return false;
|
2019-10-29 06:00:29 +01:00
|
|
|
}
|
2020-12-20 19:54:12 +01:00
|
|
|
}
|
2019-10-29 06:00:29 +01:00
|
|
|
|
2021-06-11 13:34:23 +02:00
|
|
|
public static boolean isYoutubeURL(@Nonnull final URL url) {
|
2020-12-15 17:21:21 +01:00
|
|
|
final String host = url.getHost();
|
2021-06-24 18:39:16 +02:00
|
|
|
return host.equalsIgnoreCase("youtube.com")
|
|
|
|
|| host.equalsIgnoreCase("www.youtube.com")
|
2021-04-25 18:54:26 +02:00
|
|
|
|| host.equalsIgnoreCase("m.youtube.com")
|
|
|
|
|| host.equalsIgnoreCase("music.youtube.com");
|
2019-01-13 12:52:07 +01:00
|
|
|
}
|
|
|
|
|
2021-06-11 13:34:23 +02:00
|
|
|
public static boolean isYoutubeServiceURL(@Nonnull final URL url) {
|
2020-12-15 17:21:21 +01:00
|
|
|
final String host = url.getHost();
|
2021-04-25 18:54:26 +02:00
|
|
|
return host.equalsIgnoreCase("www.youtube-nocookie.com")
|
|
|
|
|| host.equalsIgnoreCase("youtu.be");
|
2019-01-27 01:28:51 +01:00
|
|
|
}
|
2019-01-13 12:52:07 +01:00
|
|
|
|
2021-06-11 13:34:23 +02:00
|
|
|
public static boolean isHooktubeURL(@Nonnull final URL url) {
|
2020-12-15 17:21:21 +01:00
|
|
|
final String host = url.getHost();
|
2019-01-27 01:28:51 +01:00
|
|
|
return host.equalsIgnoreCase("hooktube.com");
|
|
|
|
}
|
|
|
|
|
2021-06-11 13:34:23 +02:00
|
|
|
public static boolean isInvidioURL(@Nonnull final URL url) {
|
2020-12-15 17:21:21 +01:00
|
|
|
final String host = url.getHost();
|
2020-07-02 21:31:05 +02:00
|
|
|
return host.equalsIgnoreCase("invidio.us")
|
|
|
|
|| host.equalsIgnoreCase("dev.invidio.us")
|
|
|
|
|| host.equalsIgnoreCase("www.invidio.us")
|
2021-01-22 19:20:22 +01:00
|
|
|
|| host.equalsIgnoreCase("redirect.invidious.io")
|
2020-07-02 21:31:05 +02:00
|
|
|
|| host.equalsIgnoreCase("invidious.snopyta.org")
|
|
|
|
|| host.equalsIgnoreCase("yewtu.be")
|
2020-11-11 16:12:31 +01:00
|
|
|
|| host.equalsIgnoreCase("tube.connect.cafe")
|
2021-08-12 10:06:41 +02:00
|
|
|
|| host.equalsIgnoreCase("tubus.eduvid.org")
|
2020-11-11 16:12:31 +01:00
|
|
|
|| host.equalsIgnoreCase("invidious.kavin.rocks")
|
2021-06-23 14:12:03 +02:00
|
|
|
|| host.equalsIgnoreCase("invidious-us.kavin.rocks")
|
|
|
|
|| host.equalsIgnoreCase("piped.kavin.rocks")
|
2020-11-11 16:12:31 +01:00
|
|
|
|| host.equalsIgnoreCase("invidious.site")
|
|
|
|
|| host.equalsIgnoreCase("vid.mint.lgbt")
|
|
|
|
|| host.equalsIgnoreCase("invidiou.site")
|
2021-01-22 19:20:22 +01:00
|
|
|
|| host.equalsIgnoreCase("invidious.fdn.fr")
|
|
|
|
|| host.equalsIgnoreCase("invidious.048596.xyz")
|
|
|
|
|| host.equalsIgnoreCase("invidious.zee.li")
|
|
|
|
|| host.equalsIgnoreCase("vid.puffyan.us")
|
2021-06-23 14:12:03 +02:00
|
|
|
|| host.equalsIgnoreCase("ytprivate.com")
|
|
|
|
|| host.equalsIgnoreCase("invidious.namazso.eu")
|
|
|
|
|| host.equalsIgnoreCase("invidious.silkky.cloud")
|
|
|
|
|| host.equalsIgnoreCase("invidious.exonip.de")
|
|
|
|
|| host.equalsIgnoreCase("inv.riverside.rocks")
|
|
|
|
|| host.equalsIgnoreCase("invidious.blamefran.net")
|
|
|
|
|| host.equalsIgnoreCase("invidious.moomoo.me")
|
|
|
|
|| host.equalsIgnoreCase("ytb.trom.tf")
|
|
|
|
|| host.equalsIgnoreCase("yt.cyberhost.uk")
|
|
|
|
|| host.equalsIgnoreCase("y.com.cm");
|
2019-01-13 12:52:07 +01:00
|
|
|
}
|
|
|
|
|
2021-10-22 21:48:18 +02:00
|
|
|
public static boolean isY2ubeURL(@Nonnull final URL url) {
|
|
|
|
return url.getHost().equalsIgnoreCase("y2u.be");
|
|
|
|
}
|
|
|
|
|
2020-06-15 11:27:44 +02:00
|
|
|
/**
|
2020-07-02 21:31:05 +02:00
|
|
|
* Parses the duration string of the video expecting ":" or "." as separators
|
2021-02-07 22:12:22 +01:00
|
|
|
*
|
2020-06-15 11:27:44 +02:00
|
|
|
* @return the duration in seconds
|
2020-07-02 21:31:05 +02:00
|
|
|
* @throws ParsingException when more than 3 separators are found
|
2020-06-15 11:27:44 +02:00
|
|
|
*/
|
2021-06-11 13:34:23 +02:00
|
|
|
public static int parseDurationString(@Nonnull final String input)
|
2017-03-01 18:47:52 +01:00
|
|
|
throws ParsingException, NumberFormatException {
|
2018-09-09 11:53:10 +02:00
|
|
|
// If time separator : is not detected, try . instead
|
2018-09-09 14:01:39 +02:00
|
|
|
final String[] splitInput = input.contains(":")
|
|
|
|
? input.split(":")
|
|
|
|
: input.split("\\.");
|
|
|
|
|
2017-03-01 18:47:52 +01:00
|
|
|
String days = "0";
|
|
|
|
String hours = "0";
|
|
|
|
String minutes = "0";
|
2018-09-09 14:01:39 +02:00
|
|
|
final String seconds;
|
2017-03-01 18:47:52 +01:00
|
|
|
|
2017-06-29 20:12:55 +02:00
|
|
|
switch (splitInput.length) {
|
2017-03-01 18:47:52 +01:00
|
|
|
case 4:
|
|
|
|
days = splitInput[0];
|
|
|
|
hours = splitInput[1];
|
|
|
|
minutes = splitInput[2];
|
|
|
|
seconds = splitInput[3];
|
|
|
|
break;
|
|
|
|
case 3:
|
|
|
|
hours = splitInput[0];
|
|
|
|
minutes = splitInput[1];
|
|
|
|
seconds = splitInput[2];
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
minutes = splitInput[0];
|
|
|
|
seconds = splitInput[1];
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
seconds = splitInput[0];
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
throw new ParsingException("Error duration string with unknown format: " + input);
|
|
|
|
}
|
2020-05-30 17:20:54 +02:00
|
|
|
|
2022-03-17 14:50:12 +01:00
|
|
|
return ((convertDurationToInt(days) * 24
|
|
|
|
+ convertDurationToInt(hours)) * 60
|
|
|
|
+ convertDurationToInt(minutes)) * 60
|
|
|
|
+ convertDurationToInt(seconds);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Tries to convert a duration string to an integer without throwing an exception.
|
|
|
|
* <br/>
|
|
|
|
* Helper method for {@link #parseDurationString(String)}.
|
|
|
|
* <br/>
|
|
|
|
* Note: This method is also used as a workaround for NewPipe#8034 (YT shorts no longer
|
|
|
|
* display any duration in channels).
|
|
|
|
*
|
|
|
|
* @param input The string to process
|
|
|
|
* @return The converted integer or 0 if the conversion failed.
|
|
|
|
*/
|
|
|
|
private static int convertDurationToInt(final String input) {
|
|
|
|
if (input == null || input.isEmpty()) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
final String clearedInput = Utils.removeNonDigitCharacters(input);
|
|
|
|
try {
|
|
|
|
return Integer.parseInt(clearedInput);
|
|
|
|
} catch (final NumberFormatException ex) {
|
|
|
|
return 0;
|
|
|
|
}
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|
2019-04-28 22:03:16 +02:00
|
|
|
|
2021-06-11 13:34:23 +02:00
|
|
|
@Nonnull
|
|
|
|
public static String getFeedUrlFrom(@Nonnull final String channelIdOrUser) {
|
2019-12-16 08:35:44 +01:00
|
|
|
if (channelIdOrUser.startsWith("user/")) {
|
|
|
|
return FEED_BASE_USER + channelIdOrUser.replace("user/", "");
|
|
|
|
} else if (channelIdOrUser.startsWith("channel/")) {
|
|
|
|
return FEED_BASE_CHANNEL_ID + channelIdOrUser.replace("channel/", "");
|
|
|
|
} else {
|
|
|
|
return FEED_BASE_CHANNEL_ID + channelIdOrUser;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-25 18:54:26 +02:00
|
|
|
public static OffsetDateTime parseDateFrom(final String textualUploadDate)
|
|
|
|
throws ParsingException {
|
2019-04-28 22:03:16 +02:00
|
|
|
try {
|
2020-10-18 05:48:14 +02:00
|
|
|
return OffsetDateTime.parse(textualUploadDate);
|
2021-04-08 16:17:59 +02:00
|
|
|
} catch (final DateTimeParseException e) {
|
2020-11-03 11:54:46 +01:00
|
|
|
try {
|
|
|
|
return LocalDate.parse(textualUploadDate).atStartOfDay().atOffset(ZoneOffset.UTC);
|
2021-04-08 16:17:59 +02:00
|
|
|
} catch (final DateTimeParseException e1) {
|
2021-04-25 18:54:26 +02:00
|
|
|
throw new ParsingException("Could not parse date: \"" + textualUploadDate + "\"",
|
|
|
|
e1);
|
2020-11-03 11:54:46 +01:00
|
|
|
}
|
2019-04-28 22:03:16 +02:00
|
|
|
}
|
|
|
|
}
|
2020-02-22 23:51:02 +01:00
|
|
|
|
2020-02-02 18:15:47 +01:00
|
|
|
/**
|
2020-04-16 19:28:27 +02:00
|
|
|
* Checks if the given playlist id is a YouTube Mix (auto-generated playlist)
|
|
|
|
* Ids from a YouTube Mix start with "RD"
|
2021-02-07 22:12:22 +01:00
|
|
|
*
|
2021-06-24 18:39:16 +02:00
|
|
|
* @param playlistId the playlist id
|
2020-04-16 19:28:27 +02:00
|
|
|
* @return Whether given id belongs to a YouTube Mix
|
2020-02-02 18:15:47 +01:00
|
|
|
*/
|
2021-06-11 13:34:23 +02:00
|
|
|
public static boolean isYoutubeMixId(@Nonnull final String playlistId) {
|
2022-02-17 17:19:54 +01:00
|
|
|
return playlistId.startsWith("RD")
|
|
|
|
&& !isYoutubeMusicMixId(playlistId);
|
2020-03-21 18:48:12 +01:00
|
|
|
}
|
|
|
|
|
2022-02-02 20:23:11 +01:00
|
|
|
/**
|
|
|
|
* Checks if the given playlist id is a YouTube My Mix (auto-generated playlist)
|
|
|
|
* Ids from a YouTube My Mix start with "RDMM"
|
|
|
|
*
|
|
|
|
* @param playlistId the playlist id
|
|
|
|
* @return Whether given id belongs to a YouTube My Mix
|
|
|
|
*/
|
|
|
|
public static boolean isYoutubeMyMixId(@Nonnull final String playlistId) {
|
|
|
|
return playlistId.startsWith("RDMM");
|
|
|
|
}
|
|
|
|
|
2020-03-21 18:48:12 +01:00
|
|
|
/**
|
2020-04-16 19:28:27 +02:00
|
|
|
* Checks if the given playlist id is a YouTube Music Mix (auto-generated playlist)
|
2020-12-23 21:07:30 +01:00
|
|
|
* Ids from a YouTube Music Mix start with "RDAMVM" or "RDCLAK"
|
2021-02-07 22:12:22 +01:00
|
|
|
*
|
2021-04-12 18:24:32 +02:00
|
|
|
* @param playlistId the playlist id
|
2020-04-16 19:28:27 +02:00
|
|
|
* @return Whether given id belongs to a YouTube Music Mix
|
2020-03-21 18:48:12 +01:00
|
|
|
*/
|
2021-06-11 13:34:23 +02:00
|
|
|
public static boolean isYoutubeMusicMixId(@Nonnull final String playlistId) {
|
2020-12-23 21:07:30 +01:00
|
|
|
return playlistId.startsWith("RDAMVM") || playlistId.startsWith("RDCLAK");
|
2020-02-02 14:19:48 +01:00
|
|
|
}
|
2021-02-07 22:12:22 +01:00
|
|
|
|
2020-09-26 11:22:24 +02:00
|
|
|
/**
|
|
|
|
* Checks if the given playlist id is a YouTube Channel Mix (auto-generated playlist)
|
|
|
|
* Ids from a YouTube channel Mix start with "RDCM"
|
2021-02-07 22:12:22 +01:00
|
|
|
*
|
2020-09-26 11:22:24 +02:00
|
|
|
* @return Whether given id belongs to a YouTube Channel Mix
|
|
|
|
*/
|
2021-06-11 13:34:23 +02:00
|
|
|
public static boolean isYoutubeChannelMixId(@Nonnull final String playlistId) {
|
2020-09-26 11:22:24 +02:00
|
|
|
return playlistId.startsWith("RDCM");
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2022-02-17 17:19:54 +01:00
|
|
|
* Checks if the given playlist id is a YouTube Genre Mix (auto-generated playlist)
|
|
|
|
* Ids from a YouTube Genre Mix start with "RDGMEM"
|
|
|
|
*
|
|
|
|
* @return Whether given id belongs to a YouTube Genre Mix
|
|
|
|
*/
|
|
|
|
public static boolean isYoutubeGenreMixId(@Nonnull final String playlistId) {
|
|
|
|
return playlistId.startsWith("RDGMEM");
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param playlistId the playlist id to parse
|
|
|
|
* @return the {@link PlaylistInfo.PlaylistType} extracted from the playlistId (mix playlist
|
|
|
|
* types included)
|
|
|
|
* @throws ParsingException if the playlistId is null or empty, if the playlistId is not a mix,
|
|
|
|
* if it is a mix but it's not based on a specific stream (this is the
|
|
|
|
* case for channel or genre mixes)
|
2020-09-26 11:22:24 +02:00
|
|
|
*/
|
2021-06-11 13:34:23 +02:00
|
|
|
@Nonnull
|
2022-02-17 17:19:54 +01:00
|
|
|
public static String extractVideoIdFromMixId(final String playlistId)
|
2021-06-11 13:34:23 +02:00
|
|
|
throws ParsingException {
|
2022-02-17 17:19:54 +01:00
|
|
|
if (isNullOrEmpty(playlistId)) {
|
|
|
|
throw new ParsingException("Video id could not be determined from empty playlist id");
|
|
|
|
|
|
|
|
} else if (isYoutubeMyMixId(playlistId)) {
|
2020-09-26 11:22:24 +02:00
|
|
|
return playlistId.substring(4);
|
|
|
|
|
2022-02-17 15:25:06 +01:00
|
|
|
} else if (isYoutubeMusicMixId(playlistId)) {
|
2020-09-26 11:22:24 +02:00
|
|
|
return playlistId.substring(6);
|
|
|
|
|
2022-02-17 15:25:06 +01:00
|
|
|
} else if (isYoutubeChannelMixId(playlistId)) {
|
2022-02-17 17:19:54 +01:00
|
|
|
// Channel mixes are of the form RMCM{channelId}, so videoId can't be determined
|
|
|
|
throw new ParsingException("Video id could not be determined from channel mix id: "
|
|
|
|
+ playlistId);
|
|
|
|
|
|
|
|
} else if (isYoutubeGenreMixId(playlistId)) {
|
|
|
|
// Genre mixes are of the form RDGMEM{garbage}, so videoId can't be determined
|
|
|
|
throw new ParsingException("Video id could not be determined from genre mix id: "
|
2021-04-25 18:54:26 +02:00
|
|
|
+ playlistId);
|
2020-09-26 11:22:24 +02:00
|
|
|
|
2022-02-17 15:25:06 +01:00
|
|
|
} else if (isYoutubeMixId(playlistId)) { // normal mix
|
2022-02-17 17:19:54 +01:00
|
|
|
if (playlistId.length() != 13) {
|
|
|
|
// Stream YouTube mixes are of the form RD{videoId}, but if videoId is not exactly
|
|
|
|
// 11 characters then it can't be a video id, hence we are dealing with a different
|
|
|
|
// type of mix (e.g. genre mixes handled above, of the form RDGMEM{garbage})
|
|
|
|
throw new ParsingException("Video id could not be determined from mix id: "
|
|
|
|
+ playlistId);
|
|
|
|
}
|
2020-09-26 11:22:24 +02:00
|
|
|
return playlistId.substring(2);
|
|
|
|
|
2020-12-25 15:00:31 +01:00
|
|
|
} else { // not a mix
|
2022-02-17 17:19:54 +01:00
|
|
|
throw new ParsingException("Video id could not be determined from playlist id: "
|
2021-04-25 18:54:26 +02:00
|
|
|
+ playlistId);
|
2020-09-26 11:22:24 +02:00
|
|
|
}
|
|
|
|
}
|
2020-02-02 14:19:48 +01:00
|
|
|
|
2022-02-17 17:39:49 +01:00
|
|
|
/**
|
|
|
|
* @param playlistId the playlist id to parse
|
|
|
|
* @return the {@link PlaylistInfo.PlaylistType} extracted from the playlistId (mix playlist
|
|
|
|
* types included)
|
|
|
|
* @throws ParsingException if the playlistId is null or empty
|
|
|
|
*/
|
|
|
|
@Nonnull
|
|
|
|
public static PlaylistInfo.PlaylistType extractPlaylistTypeFromPlaylistId(
|
|
|
|
final String playlistId) throws ParsingException {
|
|
|
|
if (isNullOrEmpty(playlistId)) {
|
|
|
|
throw new ParsingException("Could not extract playlist type from empty playlist id");
|
|
|
|
} else if (isYoutubeMusicMixId(playlistId)) {
|
|
|
|
return PlaylistInfo.PlaylistType.MIX_MUSIC;
|
|
|
|
} else if (isYoutubeChannelMixId(playlistId)) {
|
|
|
|
return PlaylistInfo.PlaylistType.MIX_CHANNEL;
|
|
|
|
} else if (isYoutubeGenreMixId(playlistId)) {
|
|
|
|
return PlaylistInfo.PlaylistType.MIX_GENRE;
|
|
|
|
} else if (isYoutubeMixId(playlistId)) { // normal mix
|
|
|
|
// Either a normal mix based on a stream, or a "my mix" (still based on a stream).
|
|
|
|
// NOTE: if YouTube introduces even more types of mixes that still start with RD,
|
|
|
|
// they will default to this, even though they might not be based on a stream.
|
|
|
|
return PlaylistInfo.PlaylistType.MIX_STREAM;
|
|
|
|
} else {
|
|
|
|
// not a known type of mix: just consider it a normal playlist
|
|
|
|
return PlaylistInfo.PlaylistType.NORMAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param playlistUrl the playlist url to parse
|
|
|
|
* @return the {@link PlaylistInfo.PlaylistType} extracted from the playlistUrl's list param
|
|
|
|
* (mix playlist types included)
|
|
|
|
* @throws ParsingException if the playlistUrl is malformed, if has no list param or if the list
|
|
|
|
* param is empty
|
|
|
|
*/
|
|
|
|
public static PlaylistInfo.PlaylistType extractPlaylistTypeFromPlaylistUrl(
|
|
|
|
final String playlistUrl) throws ParsingException {
|
|
|
|
try {
|
|
|
|
return extractPlaylistTypeFromPlaylistId(
|
|
|
|
Utils.getQueryValue(Utils.stringToURL(playlistUrl), "list"));
|
|
|
|
} catch (final MalformedURLException e) {
|
|
|
|
throw new ParsingException("Could not extract playlist type from malformed url", e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-15 17:21:21 +01:00
|
|
|
public static JsonObject getInitialData(final String html) throws ParsingException {
|
2020-02-22 23:51:02 +01:00
|
|
|
try {
|
2020-10-16 20:27:40 +02:00
|
|
|
try {
|
2021-04-25 18:54:26 +02:00
|
|
|
final String initialData = Parser.matchGroup1(
|
|
|
|
"window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", html);
|
2020-10-16 20:27:40 +02:00
|
|
|
return JsonParser.object().from(initialData);
|
2021-04-08 16:17:59 +02:00
|
|
|
} catch (final Parser.RegexException e) {
|
2021-04-25 18:54:26 +02:00
|
|
|
final String initialData = Parser.matchGroup1(
|
|
|
|
"var\\s*ytInitialData\\s*=\\s*(\\{.*?\\});", html);
|
2020-10-16 20:27:40 +02:00
|
|
|
return JsonParser.object().from(initialData);
|
|
|
|
}
|
2021-04-08 16:17:59 +02:00
|
|
|
} catch (final JsonParserException | Parser.RegexException e) {
|
2020-02-22 23:51:02 +01:00
|
|
|
throw new ParsingException("Could not get ytInitialData", e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
public static boolean areHardcodedClientVersionAndKeyValid()
|
2021-06-24 18:39:16 +02:00
|
|
|
throws IOException, ExtractionException {
|
2021-07-09 18:23:46 +02:00
|
|
|
if (hardcodedClientVersionAndKeyValid.isPresent()) {
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
return hardcodedClientVersionAndKeyValid.get();
|
2021-04-25 18:54:26 +02:00
|
|
|
}
|
2021-04-12 18:24:32 +02:00
|
|
|
// @formatter:off
|
|
|
|
final byte[] body = JsonWriter.string()
|
|
|
|
.object()
|
|
|
|
.object("context")
|
|
|
|
.object("client")
|
2021-06-24 18:39:16 +02:00
|
|
|
.value("hl", "en-GB")
|
2021-04-12 18:24:32 +02:00
|
|
|
.value("gl", "GB")
|
2021-06-24 18:39:16 +02:00
|
|
|
.value("clientName", "WEB")
|
2021-04-12 18:24:32 +02:00
|
|
|
.value("clientVersion", HARDCODED_CLIENT_VERSION)
|
|
|
|
.end()
|
2021-06-24 18:39:16 +02:00
|
|
|
.object("user")
|
|
|
|
.value("lockedSafetyMode", false)
|
|
|
|
.end()
|
|
|
|
.value("fetchLiveState", true)
|
2021-04-12 18:24:32 +02:00
|
|
|
.end()
|
|
|
|
.end().done().getBytes(UTF_8);
|
|
|
|
// @formatter:on
|
2020-02-29 22:42:43 +01:00
|
|
|
|
2020-12-15 17:21:21 +01:00
|
|
|
final Map<String, List<String>> headers = new HashMap<>();
|
2020-02-29 22:42:43 +01:00
|
|
|
headers.put("X-YouTube-Client-Name", Collections.singletonList("1"));
|
2021-04-25 18:54:26 +02:00
|
|
|
headers.put("X-YouTube-Client-Version",
|
|
|
|
Collections.singletonList(HARDCODED_CLIENT_VERSION));
|
2020-02-26 15:22:59 +01:00
|
|
|
|
2021-04-12 18:24:32 +02:00
|
|
|
// This endpoint is fetched by the YouTube website to get the items of its main menu and is
|
|
|
|
// pretty lightweight (around 30kB)
|
2021-04-25 18:54:26 +02:00
|
|
|
final Response response = getDownloader().post(YOUTUBEI_V1_URL + "guide?key="
|
2021-04-12 18:24:32 +02:00
|
|
|
+ HARDCODED_KEY, headers, body);
|
|
|
|
final String responseBody = response.responseBody();
|
|
|
|
final int responseCode = response.responseCode();
|
|
|
|
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
hardcodedClientVersionAndKeyValid = Optional.of(responseBody.length() > 5000
|
2021-07-09 18:23:46 +02:00
|
|
|
&& responseCode == 200); // Ensure to have a valid response
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
return hardcodedClientVersionAndKeyValid.get();
|
2020-02-28 16:35:24 +01:00
|
|
|
}
|
|
|
|
|
2020-07-26 13:14:25 +02:00
|
|
|
private static void extractClientVersionAndKey() throws IOException, ExtractionException {
|
2021-07-09 18:23:46 +02:00
|
|
|
// Don't extract the client version and the InnerTube key if it has been already extracted
|
2022-03-18 15:09:06 +01:00
|
|
|
if (keyAndVersionExtracted) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2021-04-12 18:24:32 +02:00
|
|
|
// Don't provide a search term in order to have a smaller response
|
2021-06-26 20:04:55 +02:00
|
|
|
final String url = "https://www.youtube.com/results?search_query=&ucbcb=1";
|
2021-04-19 19:07:04 +02:00
|
|
|
final Map<String, List<String>> headers = new HashMap<>();
|
|
|
|
addCookieHeader(headers);
|
|
|
|
final String html = getDownloader().get(url, headers).responseBody();
|
2020-07-26 12:00:56 +02:00
|
|
|
final JsonObject initialData = getInitialData(html);
|
2021-04-12 18:24:32 +02:00
|
|
|
final JsonArray serviceTrackingParams = initialData.getObject("responseContext")
|
|
|
|
.getArray("serviceTrackingParams");
|
2020-02-29 22:42:43 +01:00
|
|
|
String shortClientVersion = null;
|
|
|
|
|
2021-04-12 18:24:32 +02:00
|
|
|
// Try to get version from initial data first
|
2020-07-26 12:00:56 +02:00
|
|
|
for (final Object service : serviceTrackingParams) {
|
|
|
|
final JsonObject s = (JsonObject) service;
|
2020-02-29 22:42:43 +01:00
|
|
|
if (s.getString("service").equals("CSI")) {
|
2020-07-26 12:00:56 +02:00
|
|
|
final JsonArray params = s.getArray("params");
|
|
|
|
for (final Object param : params) {
|
|
|
|
final JsonObject p = (JsonObject) param;
|
2022-03-18 15:09:06 +01:00
|
|
|
final String paramKey = p.getString("key");
|
|
|
|
if (paramKey != null && paramKey.equals("cver")) {
|
2020-07-26 12:00:56 +02:00
|
|
|
clientVersion = p.getString("value");
|
2020-02-24 19:03:54 +01:00
|
|
|
}
|
2020-02-29 22:42:43 +01:00
|
|
|
}
|
|
|
|
} else if (s.getString("service").equals("ECATCHER")) {
|
2021-04-25 18:54:26 +02:00
|
|
|
// Fallback to get a shortened client version which does not contain the last two
|
|
|
|
// digits
|
2020-07-26 12:00:56 +02:00
|
|
|
final JsonArray params = s.getArray("params");
|
|
|
|
for (final Object param : params) {
|
|
|
|
final JsonObject p = (JsonObject) param;
|
2022-03-18 15:09:06 +01:00
|
|
|
final String paramKey = p.getString("key");
|
|
|
|
if (paramKey != null && paramKey.equals("client.version")) {
|
2020-02-29 22:42:43 +01:00
|
|
|
shortClientVersion = p.getString("value");
|
2020-02-24 19:03:54 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-02-29 22:42:43 +01:00
|
|
|
}
|
2020-02-24 19:03:54 +01:00
|
|
|
|
2020-02-29 22:42:43 +01:00
|
|
|
String contextClientVersion;
|
2020-07-26 12:00:56 +02:00
|
|
|
final String[] patterns = {
|
2020-02-29 22:42:43 +01:00
|
|
|
"INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"",
|
|
|
|
"innertube_context_client_version\":\"([0-9\\.]+?)\"",
|
|
|
|
"client.version=([0-9\\.]+)"
|
|
|
|
};
|
2020-07-26 12:00:56 +02:00
|
|
|
for (final String pattern : patterns) {
|
2020-02-29 22:42:43 +01:00
|
|
|
try {
|
|
|
|
contextClientVersion = Parser.matchGroup1(pattern, html);
|
2020-04-15 18:49:58 +02:00
|
|
|
if (!isNullOrEmpty(contextClientVersion)) {
|
2020-07-26 12:00:56 +02:00
|
|
|
clientVersion = contextClientVersion;
|
2020-07-26 13:14:25 +02:00
|
|
|
break;
|
2020-02-26 15:22:59 +01:00
|
|
|
}
|
2021-04-08 16:17:59 +02:00
|
|
|
} catch (final Parser.RegexException ignored) {
|
2021-02-07 22:12:22 +01:00
|
|
|
}
|
2020-02-29 22:42:43 +01:00
|
|
|
}
|
2020-02-24 19:03:54 +01:00
|
|
|
|
2020-07-26 13:14:25 +02:00
|
|
|
if (!isNullOrEmpty(clientVersion) && !isNullOrEmpty(shortClientVersion)) {
|
2020-07-26 12:00:56 +02:00
|
|
|
clientVersion = shortClientVersion;
|
|
|
|
}
|
|
|
|
|
|
|
|
try {
|
|
|
|
key = Parser.matchGroup1("INNERTUBE_API_KEY\":\"([0-9a-zA-Z_-]+?)\"", html);
|
2021-06-05 18:17:26 +02:00
|
|
|
} catch (final Parser.RegexException e1) {
|
2020-07-26 12:00:56 +02:00
|
|
|
try {
|
|
|
|
key = Parser.matchGroup1("innertubeApiKey\":\"([0-9a-zA-Z_-]+?)\"", html);
|
2021-06-05 18:17:26 +02:00
|
|
|
} catch (final Parser.RegexException e2) {
|
|
|
|
throw new ParsingException("Could not extract client version and key");
|
2021-02-07 22:12:22 +01:00
|
|
|
}
|
2020-02-29 22:42:43 +01:00
|
|
|
}
|
2021-06-05 18:17:26 +02:00
|
|
|
keyAndVersionExtracted = true;
|
2020-07-26 12:00:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2021-12-11 16:52:17 +01:00
|
|
|
* Get the client version used by YouTube website on InnerTube requests.
|
2020-07-26 12:00:56 +02:00
|
|
|
*/
|
|
|
|
public static String getClientVersion() throws IOException, ExtractionException {
|
2022-03-18 15:09:06 +01:00
|
|
|
if (!isNullOrEmpty(clientVersion)) {
|
|
|
|
return clientVersion;
|
|
|
|
}
|
2020-07-26 12:00:56 +02:00
|
|
|
|
2021-06-06 15:39:45 +02:00
|
|
|
extractClientVersionAndKey();
|
2021-12-11 16:52:17 +01:00
|
|
|
|
|
|
|
if (keyAndVersionExtracted) {
|
|
|
|
return clientVersion;
|
|
|
|
} else {
|
|
|
|
if (areHardcodedClientVersionAndKeyValid()) {
|
|
|
|
clientVersion = HARDCODED_CLIENT_VERSION;
|
|
|
|
return clientVersion;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
throw new ExtractionException("Could not get YouTube WEB client version");
|
2020-07-26 12:00:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2021-12-11 16:52:17 +01:00
|
|
|
* Get the internal API key used by YouTube website on InnerTube requests.
|
2020-07-26 12:00:56 +02:00
|
|
|
*/
|
|
|
|
public static String getKey() throws IOException, ExtractionException {
|
2022-03-18 15:09:06 +01:00
|
|
|
if (!isNullOrEmpty(key)) {
|
|
|
|
return key;
|
|
|
|
}
|
2021-12-11 16:52:17 +01:00
|
|
|
|
|
|
|
extractClientVersionAndKey();
|
|
|
|
|
|
|
|
if (keyAndVersionExtracted) {
|
2022-03-18 15:09:06 +01:00
|
|
|
return key;
|
2021-12-11 16:52:17 +01:00
|
|
|
} else {
|
|
|
|
if (areHardcodedClientVersionAndKeyValid()) {
|
|
|
|
key = HARDCODED_KEY;
|
|
|
|
return key;
|
|
|
|
}
|
2021-06-06 15:39:45 +02:00
|
|
|
}
|
2020-02-24 19:03:54 +01:00
|
|
|
|
2021-12-11 16:52:17 +01:00
|
|
|
// The ANDROID API key is also valid with the WEB client so return it if we couldn't
|
|
|
|
// extract the WEB API key.
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
return ANDROID_YOUTUBE_KEY;
|
2020-02-24 19:03:54 +01:00
|
|
|
}
|
2020-02-27 17:39:23 +01:00
|
|
|
|
2021-01-14 20:01:52 +01:00
|
|
|
/**
|
2021-02-17 19:21:39 +01:00
|
|
|
* <p>
|
2021-12-11 16:52:17 +01:00
|
|
|
* <b>Only used in tests.</b>
|
2021-02-17 19:21:39 +01:00
|
|
|
* </p>
|
2021-01-14 20:01:52 +01:00
|
|
|
*
|
2021-02-17 19:21:39 +01:00
|
|
|
* <p>
|
2021-01-14 20:01:52 +01:00
|
|
|
* Quick-and-dirty solution to reset global state in between test classes.
|
2021-02-17 19:21:39 +01:00
|
|
|
* </p>
|
|
|
|
* <p>
|
|
|
|
* This is needed for the mocks because in order to reach that state a network request has to
|
|
|
|
* be made. If the global state is not reset and the RecordingDownloader is used,
|
|
|
|
* then only the first test class has that request recorded. Meaning running the other
|
|
|
|
* tests with mocks will fail, because the mock is missing.
|
|
|
|
* </p>
|
2021-01-14 20:01:52 +01:00
|
|
|
*/
|
2021-01-17 18:48:16 +01:00
|
|
|
public static void resetClientVersionAndKey() {
|
2021-01-10 20:24:50 +01:00
|
|
|
clientVersion = null;
|
|
|
|
key = null;
|
2021-12-11 16:52:17 +01:00
|
|
|
keyAndVersionExtracted = false;
|
2021-01-10 20:24:50 +01:00
|
|
|
}
|
|
|
|
|
2021-04-08 16:36:55 +02:00
|
|
|
/**
|
|
|
|
* <p>
|
2021-12-11 16:52:17 +01:00
|
|
|
* <b>Only used in tests.</b>
|
2021-04-08 16:36:55 +02:00
|
|
|
* </p>
|
|
|
|
*/
|
2021-06-26 20:04:55 +02:00
|
|
|
public static void setNumberGenerator(final Random random) {
|
2021-04-08 16:36:55 +02:00
|
|
|
numberGenerator = random;
|
|
|
|
}
|
|
|
|
|
2021-06-06 15:39:45 +02:00
|
|
|
public static boolean isHardcodedYoutubeMusicKeyValid() throws IOException,
|
2021-04-25 18:54:26 +02:00
|
|
|
ReCaptchaException {
|
2021-05-09 16:14:37 +02:00
|
|
|
final String url =
|
|
|
|
"https://music.youtube.com/youtubei/v1/music/get_search_suggestions?alt=json&key="
|
2021-06-05 13:51:56 +02:00
|
|
|
+ HARDCODED_YOUTUBE_MUSIC_KEY[0];
|
2020-03-20 11:05:19 +01:00
|
|
|
|
|
|
|
// @formatter:off
|
2022-03-18 15:09:06 +01:00
|
|
|
final byte[] json = JsonWriter.string()
|
2020-03-20 11:05:19 +01:00
|
|
|
.object()
|
|
|
|
.object("context")
|
|
|
|
.object("client")
|
|
|
|
.value("clientName", "WEB_REMIX")
|
2021-06-05 13:51:56 +02:00
|
|
|
.value("clientVersion", HARDCODED_YOUTUBE_MUSIC_KEY[2])
|
2021-05-09 16:14:37 +02:00
|
|
|
.value("hl", "en-GB")
|
2020-03-20 11:05:19 +01:00
|
|
|
.value("gl", "GB")
|
|
|
|
.array("experimentIds").end()
|
2021-06-24 18:39:16 +02:00
|
|
|
.value("experimentsToken", EMPTY_STRING)
|
2020-03-20 11:05:19 +01:00
|
|
|
.object("locationInfo").end()
|
|
|
|
.object("musicAppInfo").end()
|
|
|
|
.end()
|
|
|
|
.object("capabilities").end()
|
|
|
|
.object("request")
|
|
|
|
.array("internalExperimentFlags").end()
|
|
|
|
.object("sessionIndex").end()
|
|
|
|
.end()
|
|
|
|
.object("activePlayers").end()
|
|
|
|
.object("user")
|
|
|
|
.value("enableSafetyMode", false)
|
|
|
|
.end()
|
|
|
|
.end()
|
2021-04-11 17:41:40 +02:00
|
|
|
.value("input", "")
|
2021-02-07 22:12:22 +01:00
|
|
|
.end().done().getBytes(UTF_8);
|
2020-03-20 11:05:19 +01:00
|
|
|
// @formatter:on
|
|
|
|
|
2020-12-15 17:21:21 +01:00
|
|
|
final Map<String, List<String>> headers = new HashMap<>();
|
2021-04-25 18:54:26 +02:00
|
|
|
headers.put("X-YouTube-Client-Name", Collections.singletonList(
|
2021-06-05 13:51:56 +02:00
|
|
|
HARDCODED_YOUTUBE_MUSIC_KEY[1]));
|
2021-04-25 18:54:26 +02:00
|
|
|
headers.put("X-YouTube-Client-Version", Collections.singletonList(
|
2021-06-05 13:51:56 +02:00
|
|
|
HARDCODED_YOUTUBE_MUSIC_KEY[2]));
|
2020-03-20 11:05:19 +01:00
|
|
|
headers.put("Origin", Collections.singletonList("https://music.youtube.com"));
|
2020-03-20 14:14:02 +01:00
|
|
|
headers.put("Referer", Collections.singletonList("music.youtube.com"));
|
2020-03-20 11:05:19 +01:00
|
|
|
headers.put("Content-Type", Collections.singletonList("application/json"));
|
|
|
|
|
2021-04-12 18:24:32 +02:00
|
|
|
final Response response = getDownloader().post(url, headers, json);
|
2021-04-25 18:54:26 +02:00
|
|
|
// Ensure to have a valid response
|
2021-06-06 15:39:45 +02:00
|
|
|
return response.responseBody().length() > 500 && response.responseCode() == 200;
|
2020-03-20 11:05:19 +01:00
|
|
|
}
|
|
|
|
|
2021-06-06 15:39:45 +02:00
|
|
|
public static String[] getYoutubeMusicKey() throws IOException, ReCaptchaException,
|
2021-04-25 18:54:26 +02:00
|
|
|
Parser.RegexException {
|
2022-03-18 15:09:06 +01:00
|
|
|
if (youtubeMusicKey != null && youtubeMusicKey.length == 3) {
|
|
|
|
return youtubeMusicKey;
|
|
|
|
}
|
2021-06-06 15:39:45 +02:00
|
|
|
if (isHardcodedYoutubeMusicKeyValid()) {
|
2022-03-18 15:09:06 +01:00
|
|
|
youtubeMusicKey = HARDCODED_YOUTUBE_MUSIC_KEY;
|
|
|
|
return youtubeMusicKey;
|
2021-04-25 18:54:26 +02:00
|
|
|
}
|
2020-03-17 11:33:39 +01:00
|
|
|
|
|
|
|
final String url = "https://music.youtube.com/";
|
2021-04-19 19:07:04 +02:00
|
|
|
final Map<String, List<String>> headers = new HashMap<>();
|
|
|
|
addCookieHeader(headers);
|
|
|
|
final String html = getDownloader().get(url, headers).responseBody();
|
2020-03-17 11:33:39 +01:00
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
String innertubeApiKey;
|
2020-03-20 11:05:19 +01:00
|
|
|
try {
|
2022-03-18 15:09:06 +01:00
|
|
|
innertubeApiKey = Parser.matchGroup1("INNERTUBE_API_KEY\":\"([0-9a-zA-Z_-]+?)\"", html);
|
2021-04-08 16:17:59 +02:00
|
|
|
} catch (final Parser.RegexException e) {
|
2022-03-18 15:09:06 +01:00
|
|
|
innertubeApiKey = Parser.matchGroup1("innertube_api_key\":\"([0-9a-zA-Z_-]+?)\"", html);
|
2020-03-20 11:05:19 +01:00
|
|
|
}
|
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
final String innertubeClientName
|
|
|
|
= Parser.matchGroup1("INNERTUBE_CONTEXT_CLIENT_NAME\":([0-9]+?),", html);
|
2020-03-20 11:05:19 +01:00
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
String innertubeClientVersion;
|
2020-03-20 11:05:19 +01:00
|
|
|
try {
|
2022-03-18 15:09:06 +01:00
|
|
|
innertubeClientVersion = Parser.matchGroup1(
|
2021-04-25 18:54:26 +02:00
|
|
|
"INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"", html);
|
2021-04-08 16:17:59 +02:00
|
|
|
} catch (final Parser.RegexException e) {
|
2020-03-20 11:05:19 +01:00
|
|
|
try {
|
2022-03-18 15:09:06 +01:00
|
|
|
innertubeClientVersion = Parser.matchGroup1(
|
2021-04-25 18:54:26 +02:00
|
|
|
"INNERTUBE_CLIENT_VERSION\":\"([0-9\\.]+?)\"", html);
|
2021-04-08 16:17:59 +02:00
|
|
|
} catch (final Parser.RegexException ee) {
|
2022-03-18 15:09:06 +01:00
|
|
|
innertubeClientVersion = Parser.matchGroup1(
|
2021-04-25 18:54:26 +02:00
|
|
|
"innertube_context_client_version\":\"([0-9\\.]+?)\"", html);
|
2020-03-20 11:05:19 +01:00
|
|
|
}
|
|
|
|
}
|
2020-03-17 11:33:39 +01:00
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
youtubeMusicKey = new String[]{
|
|
|
|
innertubeApiKey,
|
|
|
|
innertubeClientName,
|
|
|
|
innertubeClientVersion
|
|
|
|
};
|
|
|
|
return youtubeMusicKey;
|
2020-03-17 11:33:39 +01:00
|
|
|
}
|
|
|
|
|
2020-12-15 17:21:21 +01:00
|
|
|
@Nullable
|
2021-06-11 13:34:23 +02:00
|
|
|
public static String getUrlFromNavigationEndpoint(@Nonnull final JsonObject navigationEndpoint)
|
2021-04-25 18:54:26 +02:00
|
|
|
throws ParsingException {
|
2020-04-16 16:08:14 +02:00
|
|
|
if (navigationEndpoint.has("urlEndpoint")) {
|
2020-02-27 17:39:23 +01:00
|
|
|
String internUrl = navigationEndpoint.getObject("urlEndpoint").getString("url");
|
2021-02-12 22:22:11 +01:00
|
|
|
if (internUrl.startsWith("https://www.youtube.com/redirect?")) {
|
2021-02-13 12:10:41 +01:00
|
|
|
// remove https://www.youtube.com part to fall in the next if block
|
2021-02-12 22:22:11 +01:00
|
|
|
internUrl = internUrl.substring(23);
|
|
|
|
}
|
|
|
|
|
2020-02-27 17:39:23 +01:00
|
|
|
if (internUrl.startsWith("/redirect?")) {
|
|
|
|
// q parameter can be the first parameter
|
|
|
|
internUrl = internUrl.substring(10);
|
2022-03-18 15:09:06 +01:00
|
|
|
final String[] params = internUrl.split("&");
|
|
|
|
for (final String param : params) {
|
2020-02-27 17:39:23 +01:00
|
|
|
if (param.split("=")[0].equals("q")) {
|
|
|
|
try {
|
2022-03-18 15:09:06 +01:00
|
|
|
return URLDecoder.decode(param.split("=")[1], UTF_8);
|
2021-04-08 16:17:59 +02:00
|
|
|
} catch (final UnsupportedEncodingException e) {
|
2020-02-27 17:39:23 +01:00
|
|
|
return null;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (internUrl.startsWith("http")) {
|
|
|
|
return internUrl;
|
2021-04-25 18:54:26 +02:00
|
|
|
} else if (internUrl.startsWith("/channel") || internUrl.startsWith("/user")
|
|
|
|
|| internUrl.startsWith("/watch")) {
|
2021-02-12 22:22:11 +01:00
|
|
|
return "https://www.youtube.com" + internUrl;
|
2020-02-27 17:39:23 +01:00
|
|
|
}
|
2020-04-16 16:08:14 +02:00
|
|
|
} else if (navigationEndpoint.has("browseEndpoint")) {
|
2020-02-29 22:57:25 +01:00
|
|
|
final JsonObject browseEndpoint = navigationEndpoint.getObject("browseEndpoint");
|
|
|
|
final String canonicalBaseUrl = browseEndpoint.getString("canonicalBaseUrl");
|
|
|
|
final String browseId = browseEndpoint.getString("browseId");
|
|
|
|
|
|
|
|
// All channel ids are prefixed with UC
|
|
|
|
if (browseId != null && browseId.startsWith("UC")) {
|
|
|
|
return "https://www.youtube.com/channel/" + browseId;
|
|
|
|
}
|
|
|
|
|
2020-04-15 18:49:58 +02:00
|
|
|
if (!isNullOrEmpty(canonicalBaseUrl)) {
|
2020-02-29 22:57:25 +01:00
|
|
|
return "https://www.youtube.com" + canonicalBaseUrl;
|
|
|
|
}
|
|
|
|
|
2021-04-25 18:54:26 +02:00
|
|
|
throw new ParsingException("canonicalBaseUrl is null and browseId is not a channel (\""
|
|
|
|
+ browseEndpoint + "\")");
|
2020-04-16 16:08:14 +02:00
|
|
|
} else if (navigationEndpoint.has("watchEndpoint")) {
|
2022-03-18 15:09:06 +01:00
|
|
|
final StringBuilder url = new StringBuilder();
|
2021-04-25 18:54:26 +02:00
|
|
|
url.append("https://www.youtube.com/watch?v=").append(navigationEndpoint
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
.getObject("watchEndpoint").getString(VIDEO_ID));
|
2020-04-16 19:28:27 +02:00
|
|
|
if (navigationEndpoint.getObject("watchEndpoint").has("playlistId")) {
|
2020-12-12 20:40:13 +01:00
|
|
|
url.append("&list=").append(navigationEndpoint.getObject("watchEndpoint")
|
2020-04-16 19:28:27 +02:00
|
|
|
.getString("playlistId"));
|
|
|
|
}
|
|
|
|
if (navigationEndpoint.getObject("watchEndpoint").has("startTimeSeconds")) {
|
|
|
|
url.append("&t=").append(navigationEndpoint.getObject("watchEndpoint")
|
|
|
|
.getInt("startTimeSeconds"));
|
|
|
|
}
|
2020-02-27 17:39:23 +01:00
|
|
|
return url.toString();
|
2020-04-16 16:08:14 +02:00
|
|
|
} else if (navigationEndpoint.has("watchPlaylistEndpoint")) {
|
2022-03-18 15:09:06 +01:00
|
|
|
return "https://www.youtube.com/playlist?list="
|
|
|
|
+ navigationEndpoint.getObject("watchPlaylistEndpoint").getString("playlistId");
|
2020-02-27 17:39:23 +01:00
|
|
|
}
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
2020-04-20 14:27:33 +02:00
|
|
|
/**
|
|
|
|
* Get the text from a JSON object that has either a simpleText or a runs array.
|
2021-02-07 22:12:22 +01:00
|
|
|
*
|
2020-04-20 14:27:33 +02:00
|
|
|
* @param textObject JSON object to get the text from
|
|
|
|
* @param html whether to return HTML, by parsing the navigationEndpoint
|
2020-05-01 13:55:15 +02:00
|
|
|
* @return text in the JSON object or {@code null}
|
2020-04-20 14:27:33 +02:00
|
|
|
*/
|
2020-12-15 17:21:21 +01:00
|
|
|
@Nullable
|
2021-04-25 18:54:26 +02:00
|
|
|
public static String getTextFromObject(final JsonObject textObject, final boolean html)
|
|
|
|
throws ParsingException {
|
2022-03-18 15:09:06 +01:00
|
|
|
if (isNullOrEmpty(textObject)) {
|
|
|
|
return null;
|
|
|
|
}
|
2020-05-01 13:55:15 +02:00
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
if (textObject.has("simpleText")) {
|
|
|
|
return textObject.getString("simpleText");
|
|
|
|
}
|
2020-02-27 17:39:23 +01:00
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
if (textObject.getArray("runs").isEmpty()) {
|
|
|
|
return null;
|
|
|
|
}
|
2020-05-01 13:55:15 +02:00
|
|
|
|
2020-12-15 17:21:21 +01:00
|
|
|
final StringBuilder textBuilder = new StringBuilder();
|
|
|
|
for (final Object textPart : textObject.getArray("runs")) {
|
2022-03-18 15:09:06 +01:00
|
|
|
final String text = ((JsonObject) textPart).getString("text");
|
2020-04-16 16:08:14 +02:00
|
|
|
if (html && ((JsonObject) textPart).has("navigationEndpoint")) {
|
2022-03-18 15:09:06 +01:00
|
|
|
final String url = getUrlFromNavigationEndpoint(((JsonObject) textPart)
|
2021-04-25 18:54:26 +02:00
|
|
|
.getObject("navigationEndpoint"));
|
2020-04-15 18:49:58 +02:00
|
|
|
if (!isNullOrEmpty(url)) {
|
2021-04-25 18:54:26 +02:00
|
|
|
textBuilder.append("<a href=\"").append(url).append("\">").append(text)
|
|
|
|
.append("</a>");
|
2020-02-27 17:39:23 +01:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
textBuilder.append(text);
|
|
|
|
}
|
|
|
|
|
|
|
|
String text = textBuilder.toString();
|
|
|
|
|
|
|
|
if (html) {
|
|
|
|
text = text.replaceAll("\\n", "<br>");
|
|
|
|
text = text.replaceAll(" ", " ");
|
|
|
|
}
|
|
|
|
|
|
|
|
return text;
|
|
|
|
}
|
|
|
|
|
2020-12-15 17:21:21 +01:00
|
|
|
@Nullable
|
2021-05-29 14:43:26 +02:00
|
|
|
public static String getTextFromObject(final JsonObject textObject) throws ParsingException {
|
2020-02-27 17:39:23 +01:00
|
|
|
return getTextFromObject(textObject, false);
|
|
|
|
}
|
2020-02-28 09:36:33 +01:00
|
|
|
|
2021-03-24 09:04:43 +01:00
|
|
|
@Nullable
|
2022-03-18 15:09:06 +01:00
|
|
|
public static String getTextAtKey(@Nonnull final JsonObject jsonObject, final String theKey)
|
2021-03-24 09:04:43 +01:00
|
|
|
throws ParsingException {
|
2022-03-18 15:09:06 +01:00
|
|
|
if (jsonObject.isString(theKey)) {
|
|
|
|
return jsonObject.getString(theKey);
|
2021-03-24 09:04:43 +01:00
|
|
|
} else {
|
2022-03-18 15:09:06 +01:00
|
|
|
return getTextFromObject(jsonObject.getObject(theKey));
|
2021-03-24 09:04:43 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
public static String fixThumbnailUrl(@Nonnull final String thumbnailUrl) {
|
|
|
|
String result = thumbnailUrl;
|
|
|
|
if (result.startsWith("//")) {
|
|
|
|
result = result.substring(2);
|
2020-02-28 09:36:33 +01:00
|
|
|
}
|
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
if (result.startsWith(HTTP)) {
|
|
|
|
result = Utils.replaceHttpWithHttps(result);
|
|
|
|
} else if (!result.startsWith(HTTPS)) {
|
|
|
|
result = "https://" + result;
|
2020-02-28 09:36:33 +01:00
|
|
|
}
|
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
return result;
|
2020-02-28 09:36:33 +01:00
|
|
|
}
|
2020-02-29 16:42:04 +01:00
|
|
|
|
2022-02-02 20:23:11 +01:00
|
|
|
public static String getThumbnailUrlFromInfoItem(final JsonObject infoItem)
|
|
|
|
throws ParsingException {
|
|
|
|
// TODO: Don't simply get the first item, but look at all thumbnails and their resolution
|
|
|
|
try {
|
|
|
|
return fixThumbnailUrl(infoItem.getObject("thumbnail").getArray("thumbnails")
|
|
|
|
.getObject(0).getString("url"));
|
|
|
|
} catch (final Exception e) {
|
|
|
|
throw new ParsingException("Could not get thumbnail url", e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-06-11 13:34:23 +02:00
|
|
|
@Nonnull
|
|
|
|
public static String getValidJsonResponseBody(@Nonnull final Response response)
|
2020-04-01 16:01:21 +02:00
|
|
|
throws ParsingException, MalformedURLException {
|
2020-03-01 01:50:31 +01:00
|
|
|
if (response.responseCode() == 404) {
|
2020-04-16 19:28:27 +02:00
|
|
|
throw new ContentNotAvailableException("Not found"
|
|
|
|
+ " (\"" + response.responseCode() + " " + response.responseMessage() + "\")");
|
2020-03-01 01:50:31 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
final String responseBody = response.responseBody();
|
2021-04-12 18:24:32 +02:00
|
|
|
if (responseBody.length() < 50) { // Ensure to have a valid response
|
2020-02-29 16:42:04 +01:00
|
|
|
throw new ParsingException("JSON response is too short");
|
|
|
|
}
|
|
|
|
|
2020-03-01 01:52:25 +01:00
|
|
|
// Check if the request was redirected to the error page.
|
|
|
|
final URL latestUrl = new URL(response.latestUrl());
|
|
|
|
if (latestUrl.getHost().equalsIgnoreCase("www.youtube.com")) {
|
|
|
|
final String path = latestUrl.getPath();
|
|
|
|
if (path.equalsIgnoreCase("/oops") || path.equalsIgnoreCase("/error")) {
|
|
|
|
throw new ContentNotAvailableException("Content unavailable");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
final String responseContentType = response.getHeader("Content-Type");
|
2020-04-01 16:01:21 +02:00
|
|
|
if (responseContentType != null
|
|
|
|
&& responseContentType.toLowerCase().contains("text/html")) {
|
2020-04-16 19:28:27 +02:00
|
|
|
throw new ParsingException("Got HTML document, expected JSON response"
|
|
|
|
+ " (latest url was: \"" + response.latestUrl() + "\")");
|
2020-03-01 01:52:25 +01:00
|
|
|
}
|
|
|
|
|
2020-04-01 16:01:21 +02:00
|
|
|
return responseBody;
|
|
|
|
}
|
|
|
|
|
2021-04-08 16:17:59 +02:00
|
|
|
public static JsonObject getJsonPostResponse(final String endpoint,
|
2021-04-11 17:01:43 +02:00
|
|
|
final byte[] body,
|
|
|
|
final Localization localization)
|
2021-04-08 16:17:59 +02:00
|
|
|
throws IOException, ExtractionException {
|
2021-04-12 18:24:32 +02:00
|
|
|
final Map<String, List<String>> headers = new HashMap<>();
|
2021-04-19 19:07:04 +02:00
|
|
|
addClientInfoHeaders(headers);
|
2021-06-26 20:04:55 +02:00
|
|
|
headers.put("Content-Type", Collections.singletonList("application/json"));
|
2021-04-08 16:17:59 +02:00
|
|
|
|
2021-04-25 18:54:26 +02:00
|
|
|
final Response response = getDownloader().post(YOUTUBEI_V1_URL + endpoint + "?key="
|
|
|
|
+ getKey(), headers, body, localization);
|
2021-04-08 16:17:59 +02:00
|
|
|
|
|
|
|
return JsonUtils.toJsonObject(getValidJsonResponseBody(response));
|
|
|
|
}
|
|
|
|
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
public static JsonObject getJsonAndroidPostResponse(
|
|
|
|
final String endpoint,
|
|
|
|
final byte[] body,
|
|
|
|
@Nonnull final ContentCountry contentCountry,
|
|
|
|
final Localization localization,
|
|
|
|
@Nullable final String endPartOfUrlRequest) throws IOException, ExtractionException {
|
2021-05-29 14:43:26 +02:00
|
|
|
final Map<String, List<String>> headers = new HashMap<>();
|
|
|
|
headers.put("Content-Type", Collections.singletonList("application/json"));
|
|
|
|
// Spoofing an Android 11 device with the hardcoded version of the Android app
|
|
|
|
headers.put("User-Agent", Collections.singletonList("com.google.android.youtube/"
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
+ MOBILE_YOUTUBE_CLIENT_VERSION + " (Linux; U; Android 11; "
|
2021-05-29 14:43:26 +02:00
|
|
|
+ contentCountry.getCountryCode() + ") gzip"));
|
|
|
|
headers.put("x-goog-api-format-version", Collections.singletonList("2"));
|
|
|
|
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
final String baseEndpointUrl = "https://youtubei.googleapis.com/youtubei/v1/" + endpoint
|
|
|
|
+ "?key=" + ANDROID_YOUTUBE_KEY;
|
2020-04-16 19:28:27 +02:00
|
|
|
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
final Response response = getDownloader().post(isNullOrEmpty(endPartOfUrlRequest)
|
|
|
|
? baseEndpointUrl : baseEndpointUrl + endPartOfUrlRequest,
|
|
|
|
headers, body, localization);
|
2020-04-16 19:28:27 +02:00
|
|
|
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
return JsonUtils.toJsonObject(getValidJsonResponseBody(response));
|
2021-03-03 19:49:26 +01:00
|
|
|
}
|
|
|
|
|
2021-06-11 13:34:23 +02:00
|
|
|
@Nonnull
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
public static JsonBuilder<JsonObject> prepareDesktopJsonBuilder(
|
|
|
|
@Nonnull final Localization localization,
|
|
|
|
@Nonnull final ContentCountry contentCountry)
|
2021-04-02 21:34:47 +02:00
|
|
|
throws IOException, ExtractionException {
|
|
|
|
// @formatter:off
|
|
|
|
return JsonObject.builder()
|
|
|
|
.object("context")
|
|
|
|
.object("client")
|
2021-04-30 19:06:56 +02:00
|
|
|
.value("hl", localization.getLocalizationCode())
|
|
|
|
.value("gl", contentCountry.getCountryCode())
|
2021-06-24 18:39:16 +02:00
|
|
|
.value("clientName", "WEB")
|
|
|
|
.value("clientVersion", getClientVersion())
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
.value("originalUrl", "https://www.youtube.com")
|
|
|
|
.value("platform", "DESKTOP")
|
|
|
|
.end()
|
|
|
|
.object("request")
|
|
|
|
.array("internalExperimentFlags")
|
|
|
|
.end()
|
|
|
|
.value("useSsl", true)
|
2021-06-24 18:39:16 +02:00
|
|
|
.end()
|
|
|
|
.object("user")
|
2021-06-26 20:04:55 +02:00
|
|
|
// TO DO: provide a way to enable restricted mode with:
|
|
|
|
// .value("enableSafetyMode", boolean)
|
2021-06-24 18:39:16 +02:00
|
|
|
.value("lockedSafetyMode", false)
|
2021-04-02 21:34:47 +02:00
|
|
|
.end()
|
|
|
|
.end();
|
2021-05-29 14:43:26 +02:00
|
|
|
// @formatter:on
|
|
|
|
}
|
|
|
|
|
2021-06-11 13:34:23 +02:00
|
|
|
@Nonnull
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
public static JsonBuilder<JsonObject> prepareAndroidMobileJsonBuilder(
|
|
|
|
@Nonnull final Localization localization,
|
|
|
|
@Nonnull final ContentCountry contentCountry) {
|
2021-05-29 14:43:26 +02:00
|
|
|
// @formatter:off
|
|
|
|
return JsonObject.builder()
|
|
|
|
.object("context")
|
|
|
|
.object("client")
|
|
|
|
.value("clientName", "ANDROID")
|
2021-06-05 13:51:56 +02:00
|
|
|
.value("clientVersion", MOBILE_YOUTUBE_CLIENT_VERSION)
|
2021-05-29 14:43:26 +02:00
|
|
|
.value("hl", localization.getLocalizationCode())
|
|
|
|
.value("gl", contentCountry.getCountryCode())
|
|
|
|
.end()
|
2021-06-24 18:39:16 +02:00
|
|
|
.object("user")
|
|
|
|
// TO DO: provide a way to enable restricted mode with:
|
|
|
|
// .value("enableSafetyMode", boolean)
|
|
|
|
.value("lockedSafetyMode", false)
|
|
|
|
.end()
|
2021-05-29 14:43:26 +02:00
|
|
|
.end();
|
2021-04-02 21:34:47 +02:00
|
|
|
// @formatter:on
|
|
|
|
}
|
|
|
|
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
@Nonnull
|
|
|
|
public static JsonBuilder<JsonObject> prepareDesktopEmbedVideoJsonBuilder(
|
|
|
|
@Nonnull final Localization localization,
|
|
|
|
@Nonnull final ContentCountry contentCountry,
|
|
|
|
@Nonnull final String videoId) throws IOException, ExtractionException {
|
|
|
|
// @formatter:off
|
|
|
|
return JsonObject.builder()
|
|
|
|
.object("context")
|
|
|
|
.object("client")
|
|
|
|
.value("hl", localization.getLocalizationCode())
|
|
|
|
.value("gl", contentCountry.getCountryCode())
|
|
|
|
.value("clientName", "WEB")
|
|
|
|
.value("clientVersion", getClientVersion())
|
|
|
|
.value("clientScreen", "EMBED")
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
.value("originalUrl", "https://www.youtube.com")
|
|
|
|
.value("platform", "DESKTOP")
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
.end()
|
|
|
|
.object("thirdParty")
|
|
|
|
.value("embedUrl", "https://www.youtube.com/watch?v=" + videoId)
|
|
|
|
.end()
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
.object("request")
|
|
|
|
.array("internalExperimentFlags")
|
|
|
|
.end()
|
|
|
|
.value("useSsl", true)
|
|
|
|
.end()
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
.object("user")
|
|
|
|
// TO DO: provide a way to enable restricted mode with:
|
|
|
|
// .value("enableSafetyMode", boolean)
|
|
|
|
.value("lockedSafetyMode", false)
|
|
|
|
.end()
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
.end();
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
// @formatter:on
|
|
|
|
}
|
|
|
|
|
|
|
|
@Nonnull
|
|
|
|
public static JsonBuilder<JsonObject> prepareAndroidMobileEmbedVideoJsonBuilder(
|
|
|
|
@Nonnull final Localization localization,
|
|
|
|
@Nonnull final ContentCountry contentCountry,
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
@Nonnull final String videoId,
|
|
|
|
@Nonnull final String contentPlaybackNonce) {
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
// @formatter:off
|
|
|
|
return JsonObject.builder()
|
|
|
|
.object("context")
|
|
|
|
.object("client")
|
|
|
|
.value("clientName", "ANDROID")
|
|
|
|
.value("clientVersion", MOBILE_YOUTUBE_CLIENT_VERSION)
|
|
|
|
.value("clientScreen", "EMBED")
|
|
|
|
.value("hl", localization.getLocalizationCode())
|
|
|
|
.value("gl", contentCountry.getCountryCode())
|
|
|
|
.end()
|
|
|
|
.object("thirdParty")
|
|
|
|
.value("embedUrl", "https://www.youtube.com/watch?v=" + videoId)
|
|
|
|
.end()
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
.object("request")
|
|
|
|
.array("internalExperimentFlags")
|
|
|
|
.end()
|
|
|
|
.value("useSsl", true)
|
|
|
|
.end()
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
.object("user")
|
|
|
|
// TO DO: provide a way to enable restricted mode with:
|
|
|
|
// .value("enableSafetyMode", boolean)
|
|
|
|
.value("lockedSafetyMode", false)
|
|
|
|
.end()
|
|
|
|
.end()
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
.value(CPN, contentPlaybackNonce)
|
|
|
|
.value(VIDEO_ID, videoId);
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
// @formatter:on
|
|
|
|
}
|
|
|
|
|
|
|
|
@Nonnull
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
public static byte[] createDesktopPlayerBody(
|
|
|
|
@Nonnull final Localization localization,
|
|
|
|
@Nonnull final ContentCountry contentCountry,
|
|
|
|
@Nonnull final String videoId,
|
|
|
|
@Nonnull final String sts,
|
|
|
|
final boolean isEmbedClientScreen,
|
|
|
|
@Nonnull final String contentPlaybackNonce) throws IOException, ExtractionException {
|
|
|
|
// @formatter:off
|
|
|
|
return JsonWriter.string((isEmbedClientScreen
|
|
|
|
? prepareDesktopEmbedVideoJsonBuilder(localization, contentCountry,
|
|
|
|
videoId)
|
|
|
|
: prepareDesktopJsonBuilder(localization, contentCountry))
|
|
|
|
.object("playbackContext")
|
|
|
|
.object("contentPlaybackContext")
|
|
|
|
.value("currentUrl", "/watch?v=" + videoId)
|
|
|
|
.value("vis", 0)
|
|
|
|
.value("splay", false)
|
|
|
|
.value("autoCaptionsDefaultOn", false)
|
|
|
|
.value("autonavState", "STATE_NONE")
|
|
|
|
.value("html5Preference", "HTML5_PREF_WANTS")
|
|
|
|
.value("signatureTimestamp", sts)
|
|
|
|
.value("referer", "https://www.youtube.com/watch?v=" + videoId)
|
|
|
|
.value("lactMilliseconds", "-1")
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
.end()
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
.end()
|
|
|
|
.value(CPN, contentPlaybackNonce)
|
|
|
|
.value(VIDEO_ID, videoId)
|
|
|
|
.done())
|
|
|
|
.getBytes(StandardCharsets.UTF_8);
|
|
|
|
// @formatter:on
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
}
|
|
|
|
|
2021-04-07 12:25:59 +02:00
|
|
|
/**
|
|
|
|
* Add required headers and cookies to an existing headers Map.
|
|
|
|
* @see #addClientInfoHeaders(Map)
|
|
|
|
* @see #addCookieHeader(Map)
|
|
|
|
*/
|
|
|
|
public static void addYouTubeHeaders(final Map<String, List<String>> headers)
|
|
|
|
throws IOException, ExtractionException {
|
|
|
|
addClientInfoHeaders(headers);
|
|
|
|
addCookieHeader(headers);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2021-04-19 19:07:04 +02:00
|
|
|
* Add the <code>X-YouTube-Client-Name</code>, <code>X-YouTube-Client-Version</code>,
|
|
|
|
* <code>Origin</code>, and <code>Referer</code> headers.
|
2021-04-07 12:25:59 +02:00
|
|
|
* @param headers The headers which should be completed
|
|
|
|
*/
|
2021-06-11 13:34:23 +02:00
|
|
|
public static void addClientInfoHeaders(@Nonnull final Map<String, List<String>> headers)
|
2021-04-07 12:25:59 +02:00
|
|
|
throws IOException, ExtractionException {
|
2021-06-24 18:39:16 +02:00
|
|
|
headers.computeIfAbsent("Origin", k -> Collections.singletonList(
|
|
|
|
"https://www.youtube.com"));
|
|
|
|
headers.computeIfAbsent("Referer", k -> Collections.singletonList(
|
|
|
|
"https://www.youtube.com"));
|
|
|
|
headers.computeIfAbsent("X-YouTube-Client-Name", k -> Collections.singletonList("1"));
|
2021-04-07 12:25:59 +02:00
|
|
|
if (headers.get("X-YouTube-Client-Version") == null) {
|
|
|
|
headers.put("X-YouTube-Client-Version", Collections.singletonList(getClientVersion()));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Add the <code>CONSENT</code> cookie to prevent redirect to <code>consent.youtube.com</code>
|
|
|
|
* @see #CONSENT_COOKIE
|
|
|
|
* @param headers the headers which should be completed
|
|
|
|
*/
|
2021-06-26 20:04:55 +02:00
|
|
|
public static void addCookieHeader(@Nonnull final Map<String, List<String>> headers) {
|
2021-04-07 12:25:59 +02:00
|
|
|
if (headers.get("Cookie") == null) {
|
2021-12-11 16:52:17 +01:00
|
|
|
headers.put("Cookie", Arrays.asList(generateConsentCookie()));
|
2021-04-07 12:25:59 +02:00
|
|
|
} else {
|
2021-04-08 16:36:55 +02:00
|
|
|
headers.get("Cookie").add(generateConsentCookie());
|
2021-04-07 12:25:59 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-06-11 13:34:23 +02:00
|
|
|
@Nonnull
|
2021-04-08 16:36:55 +02:00
|
|
|
public static String generateConsentCookie() {
|
2021-07-03 13:14:22 +02:00
|
|
|
final int statusCode = 100 + numberGenerator.nextInt(900);
|
|
|
|
return CONSENT_COOKIE + statusCode;
|
2021-04-08 16:36:55 +02:00
|
|
|
}
|
|
|
|
|
2021-06-11 13:34:23 +02:00
|
|
|
public static String extractCookieValue(final String cookieName,
|
|
|
|
@Nonnull final Response response) {
|
2021-04-07 12:25:59 +02:00
|
|
|
final List<String> cookies = response.responseHeaders().get("set-cookie");
|
2022-03-01 23:02:56 +01:00
|
|
|
if (cookies == null) {
|
|
|
|
return EMPTY_STRING;
|
|
|
|
}
|
|
|
|
|
|
|
|
String result = EMPTY_STRING;
|
2021-04-07 12:25:59 +02:00
|
|
|
for (final String cookie : cookies) {
|
2022-03-01 23:02:56 +01:00
|
|
|
final int startIndex = cookie.indexOf(cookieName);
|
2021-04-07 12:25:59 +02:00
|
|
|
if (startIndex != -1) {
|
|
|
|
result = cookie.substring(startIndex + cookieName.length() + "=".length(),
|
|
|
|
cookie.indexOf(";", startIndex));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2020-03-01 01:52:25 +01:00
|
|
|
/**
|
|
|
|
* Shared alert detection function, multiple endpoints return the error similarly structured.
|
|
|
|
* <p>
|
|
|
|
* Will check if the object has an alert of the type "ERROR".
|
2020-04-01 16:01:21 +02:00
|
|
|
* </p>
|
2020-03-01 01:52:25 +01:00
|
|
|
*
|
|
|
|
* @param initialData the object which will be checked if an alert is present
|
|
|
|
* @throws ContentNotAvailableException if an alert is detected
|
|
|
|
*/
|
2021-06-11 13:34:23 +02:00
|
|
|
public static void defaultAlertsCheck(@Nonnull final JsonObject initialData)
|
|
|
|
throws ParsingException {
|
2020-03-01 01:52:25 +01:00
|
|
|
final JsonArray alerts = initialData.getArray("alerts");
|
2020-04-15 18:49:58 +02:00
|
|
|
if (!isNullOrEmpty(alerts)) {
|
2020-03-01 01:52:25 +01:00
|
|
|
final JsonObject alertRenderer = alerts.getObject(0).getObject("alertRenderer");
|
2020-05-03 10:28:45 +02:00
|
|
|
final String alertText = getTextFromObject(alertRenderer.getObject("text"));
|
|
|
|
final String alertType = alertRenderer.getString("type", EMPTY_STRING);
|
2020-03-01 01:52:25 +01:00
|
|
|
if (alertType.equalsIgnoreCase("ERROR")) {
|
2021-03-22 10:35:05 +01:00
|
|
|
if (alertText != null && alertText.contains("This account has been terminated")) {
|
2021-03-23 00:15:21 +01:00
|
|
|
if (alertText.contains("violation") || alertText.contains("violating")
|
|
|
|
|| alertText.contains("infringement")) {
|
2021-06-11 13:34:23 +02:00
|
|
|
// Possible error messages:
|
2022-03-18 15:09:06 +01:00
|
|
|
// "This account has been terminated for a violation of YouTube's Terms of
|
|
|
|
// Service."
|
|
|
|
// "This account has been terminated due to multiple or severe violations of
|
|
|
|
// YouTube's policy prohibiting hate speech."
|
|
|
|
// "This account has been terminated due to multiple or severe violations of
|
|
|
|
// YouTube's policy prohibiting content designed to harass, bully or
|
|
|
|
// threaten."
|
|
|
|
// "This account has been terminated due to multiple or severe violations
|
|
|
|
// of YouTube's policy against spam, deceptive practices and misleading
|
|
|
|
// content or other Terms of Service violations."
|
|
|
|
// "This account has been terminated due to multiple or severe violations of
|
|
|
|
// YouTube's policy on nudity or sexual content."
|
|
|
|
// "This account has been terminated for violating YouTube's Community
|
|
|
|
// Guidelines."
|
|
|
|
// "This account has been terminated because we received multiple
|
|
|
|
// third-party claims of copyright infringement regarding material that
|
|
|
|
// the user posted."
|
|
|
|
// "This account has been terminated because it is linked to an account that
|
|
|
|
// received multiple third-party claims of copyright infringement."
|
|
|
|
throw new AccountTerminatedException(alertText,
|
|
|
|
AccountTerminatedException.Reason.VIOLATION);
|
2021-03-22 10:35:05 +01:00
|
|
|
} else {
|
|
|
|
throw new AccountTerminatedException(alertText);
|
|
|
|
}
|
|
|
|
}
|
2020-03-01 01:52:25 +01:00
|
|
|
throw new ContentNotAvailableException("Got error: \"" + alertText + "\"");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-12-20 19:54:12 +01:00
|
|
|
|
|
|
|
@Nonnull
|
2021-06-11 13:34:23 +02:00
|
|
|
public static List<MetaInfo> getMetaInfo(@Nonnull final JsonArray contents)
|
|
|
|
throws ParsingException {
|
2020-12-20 19:54:12 +01:00
|
|
|
final List<MetaInfo> metaInfo = new ArrayList<>();
|
|
|
|
for (final Object content : contents) {
|
|
|
|
final JsonObject resultObject = (JsonObject) content;
|
|
|
|
if (resultObject.has("itemSectionRenderer")) {
|
2022-03-18 15:09:06 +01:00
|
|
|
for (final Object sectionContentObject
|
|
|
|
: resultObject.getObject("itemSectionRenderer").getArray("contents")) {
|
2020-12-20 19:54:12 +01:00
|
|
|
|
|
|
|
final JsonObject sectionContent = (JsonObject) sectionContentObject;
|
|
|
|
if (sectionContent.has("infoPanelContentRenderer")) {
|
2021-04-25 18:54:26 +02:00
|
|
|
metaInfo.add(getInfoPanelContent(sectionContent
|
|
|
|
.getObject("infoPanelContentRenderer")));
|
2020-12-20 19:54:12 +01:00
|
|
|
}
|
|
|
|
if (sectionContent.has("clarificationRenderer")) {
|
2021-04-25 18:54:26 +02:00
|
|
|
metaInfo.add(getClarificationRendererContent(sectionContent
|
|
|
|
.getObject("clarificationRenderer")
|
2020-12-20 19:54:12 +01:00
|
|
|
));
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return metaInfo;
|
|
|
|
}
|
|
|
|
|
|
|
|
@Nonnull
|
2021-06-11 13:34:23 +02:00
|
|
|
private static MetaInfo getInfoPanelContent(@Nonnull final JsonObject infoPanelContentRenderer)
|
2020-12-20 19:54:12 +01:00
|
|
|
throws ParsingException {
|
|
|
|
final MetaInfo metaInfo = new MetaInfo();
|
|
|
|
final StringBuilder sb = new StringBuilder();
|
|
|
|
for (final Object paragraph : infoPanelContentRenderer.getArray("paragraphs")) {
|
|
|
|
if (sb.length() != 0) {
|
|
|
|
sb.append("<br>");
|
|
|
|
}
|
|
|
|
sb.append(YoutubeParsingHelper.getTextFromObject((JsonObject) paragraph));
|
|
|
|
}
|
|
|
|
metaInfo.setContent(new Description(sb.toString(), Description.HTML));
|
|
|
|
if (infoPanelContentRenderer.has("sourceEndpoint")) {
|
|
|
|
final String metaInfoLinkUrl = YoutubeParsingHelper.getUrlFromNavigationEndpoint(
|
|
|
|
infoPanelContentRenderer.getObject("sourceEndpoint"));
|
|
|
|
try {
|
2021-04-25 18:54:26 +02:00
|
|
|
metaInfo.addUrl(new URL(Objects.requireNonNull(extractCachedUrlIfNeeded(
|
|
|
|
metaInfoLinkUrl))));
|
2020-12-20 19:54:12 +01:00
|
|
|
} catch (final NullPointerException | MalformedURLException e) {
|
|
|
|
throw new ParsingException("Could not get metadata info URL", e);
|
|
|
|
}
|
|
|
|
|
|
|
|
final String metaInfoLinkText = YoutubeParsingHelper.getTextFromObject(
|
|
|
|
infoPanelContentRenderer.getObject("inlineSource"));
|
|
|
|
if (isNullOrEmpty(metaInfoLinkText)) {
|
|
|
|
throw new ParsingException("Could not get metadata info link text.");
|
|
|
|
}
|
|
|
|
metaInfo.addUrlText(metaInfoLinkText);
|
|
|
|
}
|
|
|
|
|
|
|
|
return metaInfo;
|
|
|
|
}
|
|
|
|
|
|
|
|
@Nonnull
|
2022-03-18 15:09:06 +01:00
|
|
|
private static MetaInfo getClarificationRendererContent(
|
|
|
|
@Nonnull final JsonObject clarificationRenderer) throws ParsingException {
|
2020-12-20 19:54:12 +01:00
|
|
|
final MetaInfo metaInfo = new MetaInfo();
|
|
|
|
|
2021-04-25 18:54:26 +02:00
|
|
|
final String title = YoutubeParsingHelper.getTextFromObject(clarificationRenderer
|
|
|
|
.getObject("contentTitle"));
|
|
|
|
final String text = YoutubeParsingHelper.getTextFromObject(clarificationRenderer
|
|
|
|
.getObject("text"));
|
2021-02-07 22:12:22 +01:00
|
|
|
if (title == null || text == null) {
|
2020-12-20 19:54:12 +01:00
|
|
|
throw new ParsingException("Could not extract clarification renderer content");
|
|
|
|
}
|
|
|
|
metaInfo.setTitle(title);
|
|
|
|
metaInfo.setContent(new Description(text, Description.PLAIN_TEXT));
|
|
|
|
|
|
|
|
if (clarificationRenderer.has("actionButton")) {
|
|
|
|
final JsonObject actionButton = clarificationRenderer.getObject("actionButton")
|
|
|
|
.getObject("buttonRenderer");
|
|
|
|
try {
|
2021-04-25 18:54:26 +02:00
|
|
|
final String url = YoutubeParsingHelper.getUrlFromNavigationEndpoint(actionButton
|
|
|
|
.getObject("command"));
|
2020-12-20 19:54:12 +01:00
|
|
|
metaInfo.addUrl(new URL(Objects.requireNonNull(extractCachedUrlIfNeeded(url))));
|
|
|
|
} catch (final NullPointerException | MalformedURLException e) {
|
|
|
|
throw new ParsingException("Could not get metadata info URL", e);
|
|
|
|
}
|
|
|
|
|
|
|
|
final String metaInfoLinkText = YoutubeParsingHelper.getTextFromObject(
|
|
|
|
actionButton.getObject("text"));
|
|
|
|
if (isNullOrEmpty(metaInfoLinkText)) {
|
|
|
|
throw new ParsingException("Could not get metadata info link text.");
|
|
|
|
}
|
|
|
|
metaInfo.addUrlText(metaInfoLinkText);
|
|
|
|
}
|
|
|
|
|
2021-04-25 18:54:26 +02:00
|
|
|
if (clarificationRenderer.has("secondaryEndpoint") && clarificationRenderer
|
|
|
|
.has("secondarySource")) {
|
|
|
|
final String url = getUrlFromNavigationEndpoint(clarificationRenderer
|
|
|
|
.getObject("secondaryEndpoint"));
|
2021-06-11 13:34:23 +02:00
|
|
|
// Ignore Google URLs, because those point to a Google search about "Covid-19"
|
2020-12-20 19:54:12 +01:00
|
|
|
if (url != null && !isGoogleURL(url)) {
|
|
|
|
try {
|
|
|
|
metaInfo.addUrl(new URL(url));
|
2021-04-25 18:54:26 +02:00
|
|
|
final String description = getTextFromObject(clarificationRenderer
|
|
|
|
.getObject("secondarySource"));
|
2020-12-20 19:54:12 +01:00
|
|
|
metaInfo.addUrlText(description == null ? url : description);
|
2021-04-08 16:17:59 +02:00
|
|
|
} catch (final MalformedURLException e) {
|
2020-12-20 19:54:12 +01:00
|
|
|
throw new ParsingException("Could not get metadata info secondary URL", e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return metaInfo;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Sometimes, YouTube provides URLs which use Google's cache. They look like
|
|
|
|
* {@code https://webcache.googleusercontent.com/search?q=cache:CACHED_URL}
|
2021-02-07 22:12:22 +01:00
|
|
|
*
|
2020-12-20 19:54:12 +01:00
|
|
|
* @param url the URL which might refer to the Google's webcache
|
|
|
|
* @return the URL which is referring to the original site
|
|
|
|
*/
|
|
|
|
public static String extractCachedUrlIfNeeded(final String url) {
|
|
|
|
if (url == null) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
if (url.contains("webcache.googleusercontent.com")) {
|
|
|
|
return url.split("cache:")[1];
|
|
|
|
}
|
|
|
|
return url;
|
|
|
|
}
|
2021-01-22 01:44:58 +01:00
|
|
|
|
|
|
|
public static boolean isVerified(final JsonArray badges) {
|
|
|
|
if (Utils.isNullOrEmpty(badges)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
for (final Object badge : badges) {
|
2021-01-22 01:44:58 +01:00
|
|
|
final String style = ((JsonObject) badge).getObject("metadataBadgeRenderer")
|
|
|
|
.getString("style");
|
|
|
|
if (style != null && (style.equals("BADGE_STYLE_TYPE_VERIFIED")
|
|
|
|
|| style.equals("BADGE_STYLE_TYPE_VERIFIED_ARTIST"))) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
2021-03-05 13:33:25 +01:00
|
|
|
|
2021-06-11 13:34:23 +02:00
|
|
|
@Nonnull
|
|
|
|
public static String unescapeDocument(@Nonnull final String doc) {
|
2021-03-05 13:33:25 +01:00
|
|
|
return doc
|
|
|
|
.replaceAll("\\\\x22", "\"")
|
|
|
|
.replaceAll("\\\\x7b", "{")
|
|
|
|
.replaceAll("\\\\x7d", "}")
|
|
|
|
.replaceAll("\\\\x5b", "[")
|
|
|
|
.replaceAll("\\\\x5d", "]");
|
|
|
|
}
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Generate a content playback nonce (also called {@code cpn}), sent by YouTube clients in
|
|
|
|
* playback requests (and also for some clients, in the player request body).
|
|
|
|
*
|
|
|
|
* @return a content playback nonce string
|
|
|
|
*/
|
|
|
|
@Nonnull
|
|
|
|
public static String generateContentPlaybackNonce() {
|
|
|
|
final SecureRandom random = new SecureRandom();
|
|
|
|
final StringBuilder stringBuilder = new StringBuilder();
|
|
|
|
|
|
|
|
for (int i = 0; i < 16; i++) {
|
|
|
|
stringBuilder.append(CONTENT_PLAYBACK_NONCE_ALPHABET.charAt(
|
|
|
|
(random.nextInt(128) + 1) & 63));
|
|
|
|
}
|
|
|
|
|
|
|
|
return stringBuilder.toString();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Try to generate a {@code t} parameter, sent by mobile clients as a query of the player
|
|
|
|
* request.
|
|
|
|
*
|
|
|
|
* <p>
|
|
|
|
* Some researches needs to be done to know how this parameter, unique at each request, is
|
|
|
|
* generated.
|
|
|
|
* </p>
|
|
|
|
*
|
|
|
|
* @return a 12 characters string to try to reproduce the {@code} parameter
|
|
|
|
*/
|
|
|
|
@Nonnull
|
|
|
|
public static String generateTParameter() {
|
|
|
|
final SecureRandom random = new SecureRandom();
|
|
|
|
final StringBuilder stringBuilder = new StringBuilder();
|
|
|
|
|
|
|
|
for (int i = 0; i < 12; i++) {
|
|
|
|
stringBuilder.append(CONTENT_PLAYBACK_NONCE_ALPHABET.charAt(
|
|
|
|
(random.nextInt(128) + 1) & 63));
|
|
|
|
}
|
|
|
|
|
|
|
|
return stringBuilder.toString();
|
|
|
|
}
|
2017-03-01 18:47:52 +01:00
|
|
|
}
|