NewPipeExtractor/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java

1752 lines
73 KiB
Java
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Created by Christian Schabesberger on 02.03.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
* YoutubeParsingHelper.java is part of NewPipe Extractor.
*
* NewPipe Extractor is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe Extractor is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe Extractor. If not, see <https://www.gnu.org/licenses/>.
*/
package org.schabi.newpipe.extractor.services.youtube;
import static org.schabi.newpipe.extractor.NewPipe.getDownloader;
import static org.schabi.newpipe.extractor.utils.Utils.HTTP;
import static org.schabi.newpipe.extractor.utils.Utils.HTTPS;
import static org.schabi.newpipe.extractor.utils.Utils.getStringResultFromRegexArray;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
import static java.util.Collections.singletonList;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonBuilder;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;
import com.grack.nanojson.JsonWriter;
import org.schabi.newpipe.extractor.MetaInfo;
import org.schabi.newpipe.extractor.downloader.Response;
import org.schabi.newpipe.extractor.exceptions.AccountTerminatedException;
import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.localization.ContentCountry;
import org.schabi.newpipe.extractor.localization.Localization;
import org.schabi.newpipe.extractor.playlist.PlaylistInfo;
import org.schabi.newpipe.extractor.stream.Description;
import org.schabi.newpipe.extractor.utils.JsonUtils;
import org.schabi.newpipe.extractor.utils.Parser;
import org.schabi.newpipe.extractor.utils.RandomStringFromAlphabetGenerator;
import org.schabi.newpipe.extractor.utils.Utils;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.security.SecureRandom;
import java.time.LocalDate;
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.time.format.DateTimeParseException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Random;
import java.util.Set;
import java.util.regex.Pattern;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
public final class YoutubeParsingHelper {
private YoutubeParsingHelper() {
}
/**
* The base URL of requests of the {@code WEB} clients to the InnerTube internal API.
*/
public static final String YOUTUBEI_V1_URL = "https://www.youtube.com/youtubei/v1/";
/**
* The base URL of requests of non-web clients to the InnerTube internal API.
*/
public static final String YOUTUBEI_V1_GAPIS_URL =
"https://youtubei.googleapis.com/youtubei/v1/";
/**
* A parameter to disable pretty-printed response of InnerTube requests, to reduce response
* sizes.
*
* <p>
* Sent in query parameters of the requests, <b>after</b> the API key.
* </p>
**/
public static final String DISABLE_PRETTY_PRINT_PARAMETER = "&prettyPrint=false";
/**
* A parameter sent by official clients named {@code contentPlaybackNonce}.
*
* <p>
* It is sent by official clients on videoplayback requests, and by all clients (except the
* {@code WEB} one to the player requests.
* </p>
*
* <p>
* It is composed of 16 characters which are generated from
* {@link #CONTENT_PLAYBACK_NONCE_ALPHABET this alphabet}, with the use of strong random
* values.
* </p>
*
* @see #generateContentPlaybackNonce()
*/
public static final String CPN = "cpn";
public static final String VIDEO_ID = "videoId";
/**
* A parameter sent by official clients named {@code contentCheckOk}.
*
* <p>
* Setting it to {@code true} allows us to get streaming data on videos with a warning about
* what the sensible content they contain.
* </p>
*/
public static final String CONTENT_CHECK_OK = "contentCheckOk";
/**
* A parameter which may be sent by official clients named {@code racyCheckOk}.
*
* <p>
* What this parameter does is not really known, but it seems to be linked to sensitive
* contents such as age-restricted content.
* </p>
*/
public static final String RACY_CHECK_OK = "racyCheckOk";
/**
* The client version for InnerTube requests with the {@code WEB} client, used as the last
* fallback if the extraction of the real one failed.
*/
private static final String HARDCODED_CLIENT_VERSION = "2.20220809.02.00";
/**
* The InnerTube API key which should be used by YouTube's desktop website, used as a fallback
* if the extraction of the real one failed.
*/
private static final String HARDCODED_KEY = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8";
/**
* The hardcoded client version of the Android app used for InnerTube requests with this
* client.
*
* <p>
* It can be extracted by getting the latest release version of the app in an APK repository
* such as <a href="https://www.apkmirror.com/apk/google-inc/youtube/">APKMirror</a>.
* </p>
*/
private static final String ANDROID_YOUTUBE_CLIENT_VERSION = "17.31.35";
/**
* The InnerTube API key used by the {@code ANDROID} client. Found with the help of
* reverse-engineering app network requests.
*/
private static final String ANDROID_YOUTUBE_KEY = "AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w";
/**
* The hardcoded client version of the iOS app used for InnerTube requests with this
* client.
*
* <p>
* It can be extracted by getting the latest release version of the app on
* <a href="https://apps.apple.com/us/app/youtube-watch-listen-stream/id544007664/">the App
* Store page of the YouTube app</a>, in the {@code Whats New} section.
* </p>
*/
private static final String IOS_YOUTUBE_CLIENT_VERSION = "17.31.4";
/**
* The InnerTube API key used by the {@code iOS} client. Found with the help of
* reverse-engineering app network requests.
*/
private static final String IOS_YOUTUBE_KEY = "AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc";
/**
* The hardcoded client version used for InnerTube requests with the TV HTML5 embed client.
*/
private static final String TVHTML5_SIMPLY_EMBED_CLIENT_VERSION = "2.0";
private static String clientVersion;
private static String key;
private static final String[] HARDCODED_YOUTUBE_MUSIC_KEY =
{"AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30", "67", "1.20220808.01.00"};
private static String[] youtubeMusicKey;
private static boolean keyAndVersionExtracted = false;
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
private static Optional<Boolean> hardcodedClientVersionAndKeyValid = Optional.empty();
private static final String[] INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES =
{"INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"",
"innertube_context_client_version\":\"([0-9\\.]+?)\"",
"client.version=([0-9\\.]+)"};
private static final String[] INNERTUBE_API_KEY_REGEXES =
{"INNERTUBE_API_KEY\":\"([0-9a-zA-Z_-]+?)\"",
"innertubeApiKey\":\"([0-9a-zA-Z_-]+?)\""};
private static final String[] INITIAL_DATA_REGEXES =
{"window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});",
"var\\s*ytInitialData\\s*=\\s*(\\{.*?\\});"};
private static final String INNERTUBE_CLIENT_NAME_REGEX =
"INNERTUBE_CONTEXT_CLIENT_NAME\":([0-9]+?),";
private static final String CONTENT_PLAYBACK_NONCE_ALPHABET =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
/**
* The device machine id for the iPhone 13, used to get 60fps with the {@code iOS} client.
*
* <p>
* See <a href="https://gist.github.com/adamawolf/3048717">this GitHub Gist</a> for more
* information.
* </p>
*/
private static final String IOS_DEVICE_MODEL = "iPhone14,5";
private static Random numberGenerator = new SecureRandom();
private static final String FEED_BASE_CHANNEL_ID =
"https://www.youtube.com/feeds/videos.xml?channel_id=";
private static final String FEED_BASE_USER = "https://www.youtube.com/feeds/videos.xml?user=";
private static final Pattern C_WEB_PATTERN = Pattern.compile("&c=WEB");
private static final Pattern C_TVHTML5_SIMPLY_EMBEDDED_PLAYER_PATTERN =
Pattern.compile("&c=TVHTML5_SIMPLY_EMBEDDED_PLAYER");
private static final Pattern C_ANDROID_PATTERN = Pattern.compile("&c=ANDROID");
private static final Pattern C_IOS_PATTERN = Pattern.compile("&c=IOS");
private static final Set<String> GOOGLE_URLS = Set.of("google.", "m.google.", "www.google.");
private static final Set<String> INVIDIOUS_URLS = Set.of("invidio.us", "dev.invidio.us",
"www.invidio.us", "redirect.invidious.io", "invidious.snopyta.org", "yewtu.be",
"tube.connect.cafe", "tubus.eduvid.org", "invidious.kavin.rocks", "invidious.site",
"invidious-us.kavin.rocks", "piped.kavin.rocks", "vid.mint.lgbt", "invidiou.site",
"invidious.fdn.fr", "invidious.048596.xyz", "invidious.zee.li", "vid.puffyan.us",
"ytprivate.com", "invidious.namazso.eu", "invidious.silkky.cloud", "ytb.trom.tf",
"invidious.exonip.de", "inv.riverside.rocks", "invidious.blamefran.net", "y.com.cm",
"invidious.moomoo.me", "yt.cyberhost.uk");
private static final Set<String> YOUTUBE_URLS = Set.of("youtube.com", "www.youtube.com",
"m.youtube.com", "music.youtube.com");
/**
* Determines how the consent cookie (that is required for YouTube) will be generated.
*
* <p>
* {@code false} (default) will use {@code PENDING+}.
* {@code true} will use {@code YES+}.
* </p>
*
* <p>
* Setting this value to <code>true</code> is currently needed if you want to watch
* Mix Playlists in some countries (EU).
* </p>
*
* @see #generateConsentCookie()
*/
private static boolean consentAccepted = false;
private static boolean isGoogleURL(final String url) {
final String cachedUrl = extractCachedUrlIfNeeded(url);
try {
final URL u = new URL(cachedUrl);
return GOOGLE_URLS.stream().anyMatch(item -> u.getHost().startsWith(item));
} catch (final MalformedURLException e) {
return false;
}
}
public static boolean isYoutubeURL(@Nonnull final URL url) {
return YOUTUBE_URLS.contains(url.getHost().toLowerCase(Locale.ROOT));
}
public static boolean isYoutubeServiceURL(@Nonnull final URL url) {
final String host = url.getHost();
return host.equalsIgnoreCase("www.youtube-nocookie.com")
|| host.equalsIgnoreCase("youtu.be");
}
public static boolean isHooktubeURL(@Nonnull final URL url) {
final String host = url.getHost();
return host.equalsIgnoreCase("hooktube.com");
}
public static boolean isInvidiousURL(@Nonnull final URL url) {
return INVIDIOUS_URLS.contains(url.getHost().toLowerCase(Locale.ROOT));
}
public static boolean isY2ubeURL(@Nonnull final URL url) {
return url.getHost().equalsIgnoreCase("y2u.be");
}
/**
* Parses the duration string of the video expecting ":" or "." as separators
*
* @return the duration in seconds
* @throws ParsingException when more than 3 separators are found
*/
public static int parseDurationString(@Nonnull final String input)
throws ParsingException, NumberFormatException {
// If time separator : is not detected, try . instead
final String[] splitInput = input.contains(":")
? input.split(":")
: input.split("\\.");
final int[] units = {24, 60, 60, 1};
final int offset = units.length - splitInput.length;
if (offset < 0) {
throw new ParsingException("Error duration string with unknown format: " + input);
}
int duration = 0;
for (int i = 0; i < splitInput.length; i++) {
duration = units[i + offset] * (duration + convertDurationToInt(splitInput[i]));
}
return duration;
}
/**
* Tries to convert a duration string to an integer without throwing an exception.
* <br/>
* Helper method for {@link #parseDurationString(String)}.
* <br/>
* Note: This method is also used as a workaround for NewPipe#8034 (YT shorts no longer
* display any duration in channels).
*
* @param input The string to process
* @return The converted integer or 0 if the conversion failed.
*/
private static int convertDurationToInt(final String input) {
if (input == null || input.isEmpty()) {
return 0;
}
final String clearedInput = Utils.removeNonDigitCharacters(input);
try {
return Integer.parseInt(clearedInput);
} catch (final NumberFormatException ex) {
return 0;
}
}
@Nonnull
public static String getFeedUrlFrom(@Nonnull final String channelIdOrUser) {
if (channelIdOrUser.startsWith("user/")) {
return FEED_BASE_USER + channelIdOrUser.replace("user/", "");
} else if (channelIdOrUser.startsWith("channel/")) {
return FEED_BASE_CHANNEL_ID + channelIdOrUser.replace("channel/", "");
} else {
return FEED_BASE_CHANNEL_ID + channelIdOrUser;
}
}
public static OffsetDateTime parseDateFrom(final String textualUploadDate)
throws ParsingException {
try {
return OffsetDateTime.parse(textualUploadDate);
} catch (final DateTimeParseException e) {
try {
return LocalDate.parse(textualUploadDate).atStartOfDay().atOffset(ZoneOffset.UTC);
} catch (final DateTimeParseException e1) {
throw new ParsingException("Could not parse date: \"" + textualUploadDate + "\"",
e1);
}
}
}
/**
* Checks if the given playlist id is a YouTube Mix (auto-generated playlist)
* Ids from a YouTube Mix start with "RD"
*
* @param playlistId the playlist id
* @return Whether given id belongs to a YouTube Mix
*/
public static boolean isYoutubeMixId(@Nonnull final String playlistId) {
return playlistId.startsWith("RD")
&& !isYoutubeMusicMixId(playlistId);
}
/**
* Checks if the given playlist id is a YouTube My Mix (auto-generated playlist)
* Ids from a YouTube My Mix start with "RDMM"
*
* @param playlistId the playlist id
* @return Whether given id belongs to a YouTube My Mix
*/
public static boolean isYoutubeMyMixId(@Nonnull final String playlistId) {
return playlistId.startsWith("RDMM");
}
/**
* Checks if the given playlist id is a YouTube Music Mix (auto-generated playlist)
* Ids from a YouTube Music Mix start with "RDAMVM" or "RDCLAK"
*
* @param playlistId the playlist id
* @return Whether given id belongs to a YouTube Music Mix
*/
public static boolean isYoutubeMusicMixId(@Nonnull final String playlistId) {
return playlistId.startsWith("RDAMVM") || playlistId.startsWith("RDCLAK");
}
/**
* Checks if the given playlist id is a YouTube Channel Mix (auto-generated playlist)
* Ids from a YouTube channel Mix start with "RDCM"
*
* @return Whether given id belongs to a YouTube Channel Mix
*/
public static boolean isYoutubeChannelMixId(@Nonnull final String playlistId) {
return playlistId.startsWith("RDCM");
}
/**
* Checks if the given playlist id is a YouTube Genre Mix (auto-generated playlist)
* Ids from a YouTube Genre Mix start with "RDGMEM"
*
* @return Whether given id belongs to a YouTube Genre Mix
*/
public static boolean isYoutubeGenreMixId(@Nonnull final String playlistId) {
return playlistId.startsWith("RDGMEM");
}
/**
* @param playlistId the playlist id to parse
* @return the {@link PlaylistInfo.PlaylistType} extracted from the playlistId (mix playlist
* types included)
* @throws ParsingException if the playlistId is null or empty, if the playlistId is not a mix,
* if it is a mix but it's not based on a specific stream (this is the
* case for channel or genre mixes)
*/
@Nonnull
public static String extractVideoIdFromMixId(final String playlistId)
throws ParsingException {
if (isNullOrEmpty(playlistId)) {
throw new ParsingException("Video id could not be determined from empty playlist id");
} else if (isYoutubeMyMixId(playlistId)) {
return playlistId.substring(4);
} else if (isYoutubeMusicMixId(playlistId)) {
return playlistId.substring(6);
} else if (isYoutubeChannelMixId(playlistId)) {
// Channel mixes are of the form RMCM{channelId}, so videoId can't be determined
throw new ParsingException("Video id could not be determined from channel mix id: "
+ playlistId);
} else if (isYoutubeGenreMixId(playlistId)) {
// Genre mixes are of the form RDGMEM{garbage}, so videoId can't be determined
throw new ParsingException("Video id could not be determined from genre mix id: "
+ playlistId);
} else if (isYoutubeMixId(playlistId)) { // normal mix
if (playlistId.length() != 13) {
// Stream YouTube mixes are of the form RD{videoId}, but if videoId is not exactly
// 11 characters then it can't be a video id, hence we are dealing with a different
// type of mix (e.g. genre mixes handled above, of the form RDGMEM{garbage})
throw new ParsingException("Video id could not be determined from mix id: "
+ playlistId);
}
return playlistId.substring(2);
} else { // not a mix
throw new ParsingException("Video id could not be determined from playlist id: "
+ playlistId);
}
}
/**
* @param playlistId the playlist id to parse
* @return the {@link PlaylistInfo.PlaylistType} extracted from the playlistId (mix playlist
* types included)
* @throws ParsingException if the playlistId is null or empty
*/
@Nonnull
public static PlaylistInfo.PlaylistType extractPlaylistTypeFromPlaylistId(
final String playlistId) throws ParsingException {
if (isNullOrEmpty(playlistId)) {
throw new ParsingException("Could not extract playlist type from empty playlist id");
} else if (isYoutubeMusicMixId(playlistId)) {
return PlaylistInfo.PlaylistType.MIX_MUSIC;
} else if (isYoutubeChannelMixId(playlistId)) {
return PlaylistInfo.PlaylistType.MIX_CHANNEL;
} else if (isYoutubeGenreMixId(playlistId)) {
return PlaylistInfo.PlaylistType.MIX_GENRE;
} else if (isYoutubeMixId(playlistId)) { // normal mix
// Either a normal mix based on a stream, or a "my mix" (still based on a stream).
// NOTE: if YouTube introduces even more types of mixes that still start with RD,
// they will default to this, even though they might not be based on a stream.
return PlaylistInfo.PlaylistType.MIX_STREAM;
} else {
// not a known type of mix: just consider it a normal playlist
return PlaylistInfo.PlaylistType.NORMAL;
}
}
/**
* @param playlistUrl the playlist url to parse
* @return the {@link PlaylistInfo.PlaylistType} extracted from the playlistUrl's list param
* (mix playlist types included)
* @throws ParsingException if the playlistUrl is malformed, if has no list param or if the list
* param is empty
*/
public static PlaylistInfo.PlaylistType extractPlaylistTypeFromPlaylistUrl(
final String playlistUrl) throws ParsingException {
try {
return extractPlaylistTypeFromPlaylistId(
Utils.getQueryValue(Utils.stringToURL(playlistUrl), "list"));
} catch (final MalformedURLException e) {
throw new ParsingException("Could not extract playlist type from malformed url", e);
}
}
private static JsonObject getInitialData(final String html) throws ParsingException {
try {
return JsonParser.object().from(getStringResultFromRegexArray(html,
INITIAL_DATA_REGEXES, 1));
} catch (final JsonParserException | Parser.RegexException e) {
throw new ParsingException("Could not get ytInitialData", e);
}
}
public static boolean areHardcodedClientVersionAndKeyValid()
throws IOException, ExtractionException {
if (hardcodedClientVersionAndKeyValid.isPresent()) {
return hardcodedClientVersionAndKeyValid.get();
}
// @formatter:off
final byte[] body = JsonWriter.string()
.object()
.object("context")
.object("client")
.value("hl", "en-GB")
.value("gl", "GB")
.value("clientName", "WEB")
.value("clientVersion", HARDCODED_CLIENT_VERSION)
.end()
.object("user")
.value("lockedSafetyMode", false)
.end()
.value("fetchLiveState", true)
.end()
.end().done().getBytes(StandardCharsets.UTF_8);
// @formatter:on
final Map<String, List<String>> headers = new HashMap<>();
headers.put("X-YouTube-Client-Name", singletonList("1"));
headers.put("X-YouTube-Client-Version",
singletonList(HARDCODED_CLIENT_VERSION));
// This endpoint is fetched by the YouTube website to get the items of its main menu and is
// pretty lightweight (around 30kB)
final Response response = getDownloader().post(YOUTUBEI_V1_URL + "guide?key="
+ HARDCODED_KEY + DISABLE_PRETTY_PRINT_PARAMETER, headers, body);
final String responseBody = response.responseBody();
final int responseCode = response.responseCode();
hardcodedClientVersionAndKeyValid = Optional.of(responseBody.length() > 5000
&& responseCode == 200); // Ensure to have a valid response
return hardcodedClientVersionAndKeyValid.get();
}
private static void extractClientVersionAndKeyFromSwJs()
throws IOException, ExtractionException {
if (keyAndVersionExtracted) {
return;
}
final String url = "https://www.youtube.com/sw.js";
final Map<String, List<String>> headers = new HashMap<>();
headers.put("Origin", singletonList("https://www.youtube.com"));
headers.put("Referer", singletonList("https://www.youtube.com"));
final String response = getDownloader().get(url, headers).responseBody();
try {
clientVersion = getStringResultFromRegexArray(response,
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
key = getStringResultFromRegexArray(response, INNERTUBE_API_KEY_REGEXES, 1);
} catch (final Parser.RegexException e) {
throw new ParsingException("Could not extract YouTube WEB InnerTube client version "
+ "and API key from sw.js", e);
}
keyAndVersionExtracted = true;
}
private static void extractClientVersionAndKeyFromHtmlSearchResultsPage()
throws IOException, ExtractionException {
// Don't extract the client version and the InnerTube key if it has been already extracted
if (keyAndVersionExtracted) {
return;
}
// Don't provide a search term in order to have a smaller response
final String url = "https://www.youtube.com/results?search_query=&ucbcb=1";
final String html = getDownloader().get(url, getCookieHeader()).responseBody();
final JsonObject initialData = getInitialData(html);
final JsonArray serviceTrackingParams = initialData.getObject("responseContext")
.getArray("serviceTrackingParams");
clientVersion = getClientVersionFromServiceTrackingParam(
serviceTrackingParams, "CSI", "cver");
if (clientVersion == null) {
try {
clientVersion = getStringResultFromRegexArray(html,
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
} catch (final Parser.RegexException ignored) {
}
}
// Fallback to get a shortened client version which does not contain the last two
// digits
if (isNullOrEmpty(clientVersion)) {
clientVersion = getClientVersionFromServiceTrackingParam(
serviceTrackingParams, "ECATCHER", "client.version");
}
try {
key = getStringResultFromRegexArray(html, INNERTUBE_API_KEY_REGEXES, 1);
} catch (final Parser.RegexException ignored) {
}
if (isNullOrEmpty(key)) {
throw new ParsingException(
// CHECKSTYLE:OFF
"Could not extract YouTube WEB InnerTube API key from HTML search results page");
// CHECKSTYLE:ON
}
if (clientVersion == null) {
throw new ParsingException(
// CHECKSTYLE:OFF
"Could not extract YouTube WEB InnerTube client version from HTML search results page");
// CHECKSTYLE:ON
}
keyAndVersionExtracted = true;
}
@Nullable
private static String getClientVersionFromServiceTrackingParam(
@Nonnull final JsonArray serviceTrackingParams,
@Nonnull final String serviceName,
@Nonnull final String clientVersionKey) {
for (int i = 0 ; i < serviceTrackingParams.size(); i++) {
JsonObject item = serviceTrackingParams.getObject(i);
if (item != null && item.getString("service").equals(serviceName) && item.getArray("params") != null) {
JsonArray paramsArray = item.getArray("params");
for (int j = 0 ; j < paramsArray.size(); j++) {
JsonObject paramItem = paramsArray.getObject(i);
if (paramItem != null && paramItem.getString("key").equals(clientVersionKey)) {
return paramItem.getString("value");
}
}
}
}
return null;
}
/**
* Get the client version used by YouTube website on InnerTube requests.
*/
public static String getClientVersion() throws IOException, ExtractionException {
if (!isNullOrEmpty(clientVersion)) {
return clientVersion;
}
// Always extract the latest client version, by trying first to extract it from the
// JavaScript service worker, then from HTML search results page as a fallback, to prevent
// fingerprinting based on the client version used
try {
extractClientVersionAndKeyFromSwJs();
} catch (final Exception e) {
extractClientVersionAndKeyFromHtmlSearchResultsPage();
}
if (keyAndVersionExtracted) {
return clientVersion;
}
// Fallback to the hardcoded one if it is valid
if (areHardcodedClientVersionAndKeyValid()) {
clientVersion = HARDCODED_CLIENT_VERSION;
return clientVersion;
}
throw new ExtractionException("Could not get YouTube WEB client version");
}
/**
* Get the internal API key used by YouTube website on InnerTube requests.
*/
public static String getKey() throws IOException, ExtractionException {
if (!isNullOrEmpty(key)) {
return key;
}
// Always extract the key used by the website, by trying first to extract it from the
// JavaScript service worker, then from HTML search results page as a fallback, to prevent
// fingerprinting based on the key and/or invalid key issues
try {
extractClientVersionAndKeyFromSwJs();
} catch (final Exception e) {
extractClientVersionAndKeyFromHtmlSearchResultsPage();
}
if (keyAndVersionExtracted) {
return key;
}
// Fallback to the hardcoded one if it's valid
if (areHardcodedClientVersionAndKeyValid()) {
key = HARDCODED_KEY;
return key;
}
// The ANDROID API key is also valid with the WEB client so return it if we couldn't
// extract the WEB API key. This can be used as a way to fingerprint the extractor in this
// case
return ANDROID_YOUTUBE_KEY;
}
/**
* <p>
* <b>Only used in tests.</b>
* </p>
*
* <p>
* Quick-and-dirty solution to reset global state in between test classes.
* </p>
* <p>
* This is needed for the mocks because in order to reach that state a network request has to
* be made. If the global state is not reset and the RecordingDownloader is used,
* then only the first test class has that request recorded. Meaning running the other
* tests with mocks will fail, because the mock is missing.
* </p>
*/
public static void resetClientVersionAndKey() {
clientVersion = null;
key = null;
keyAndVersionExtracted = false;
}
/**
* <p>
* <b>Only used in tests.</b>
* </p>
*/
public static void setNumberGenerator(final Random random) {
numberGenerator = random;
}
public static boolean isHardcodedYoutubeMusicKeyValid() throws IOException,
ReCaptchaException {
final String url =
"https://music.youtube.com/youtubei/v1/music/get_search_suggestions?alt=json&key="
+ HARDCODED_YOUTUBE_MUSIC_KEY[0] + DISABLE_PRETTY_PRINT_PARAMETER;
// @formatter:off
final byte[] json = JsonWriter.string()
.object()
.object("context")
.object("client")
.value("clientName", "WEB_REMIX")
.value("clientVersion", HARDCODED_YOUTUBE_MUSIC_KEY[2])
.value("hl", "en-GB")
.value("gl", "GB")
.array("experimentIds").end()
.value("experimentsToken", "")
.object("locationInfo").end()
.object("musicAppInfo").end()
.end()
.object("capabilities").end()
.object("request")
.array("internalExperimentFlags").end()
.object("sessionIndex").end()
.end()
.object("activePlayers").end()
.object("user")
.value("enableSafetyMode", false)
.end()
.end()
.value("input", "")
.end().done().getBytes(StandardCharsets.UTF_8);
// @formatter:on
final Map<String, List<String>> headers = new HashMap<>();
headers.put("X-YouTube-Client-Name", singletonList(
HARDCODED_YOUTUBE_MUSIC_KEY[1]));
headers.put("X-YouTube-Client-Version", singletonList(
HARDCODED_YOUTUBE_MUSIC_KEY[2]));
headers.put("Origin", singletonList("https://music.youtube.com"));
headers.put("Referer", singletonList("music.youtube.com"));
headers.put("Content-Type", singletonList("application/json"));
final Response response = getDownloader().post(url, headers, json);
// Ensure to have a valid response
return response.responseBody().length() > 500 && response.responseCode() == 200;
}
public static String[] getYoutubeMusicKey()
throws IOException, ReCaptchaException, Parser.RegexException {
if (youtubeMusicKey != null && youtubeMusicKey.length == 3) {
return youtubeMusicKey;
}
if (isHardcodedYoutubeMusicKeyValid()) {
youtubeMusicKey = HARDCODED_YOUTUBE_MUSIC_KEY;
return youtubeMusicKey;
}
String musicClientVersion;
String musicKey;
String musicClientName;
try {
final String url = "https://music.youtube.com/sw.js";
final Map<String, List<String>> headers = new HashMap<>();
headers.put("Origin", singletonList("https://music.youtube.com"));
headers.put("Referer", singletonList("https://music.youtube.com"));
final String response = getDownloader().get(url, headers).responseBody();
musicClientVersion = getStringResultFromRegexArray(response,
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
musicKey = getStringResultFromRegexArray(response, INNERTUBE_API_KEY_REGEXES, 1);
musicClientName = Parser.matchGroup1(INNERTUBE_CLIENT_NAME_REGEX, response);
} catch (final Exception e) {
final String url = "https://music.youtube.com/?ucbcb=1";
final String html = getDownloader().get(url, getCookieHeader()).responseBody();
musicKey = getStringResultFromRegexArray(html, INNERTUBE_API_KEY_REGEXES, 1);
musicClientVersion = getStringResultFromRegexArray(html,
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES);
musicClientName = Parser.matchGroup1(INNERTUBE_CLIENT_NAME_REGEX, html);
}
youtubeMusicKey = new String[] {musicKey, musicClientName, musicClientVersion};
return youtubeMusicKey;
}
@Nullable
public static String getUrlFromNavigationEndpoint(@Nonnull final JsonObject navigationEndpoint)
throws ParsingException {
if (navigationEndpoint.has("urlEndpoint")) {
String internUrl = navigationEndpoint.getObject("urlEndpoint").getString("url");
if (internUrl.startsWith("https://www.youtube.com/redirect?")) {
// remove https://www.youtube.com part to fall in the next if block
internUrl = internUrl.substring(23);
}
if (internUrl.startsWith("/redirect?")) {
// q parameter can be the first parameter
internUrl = internUrl.substring(10);
final String[] params = internUrl.split("&");
for (final String param : params) {
if (param.split("=")[0].equals("q")) {
try {
return Utils.decodeUrlUtf8(param.split("=")[1]);
} catch (final UnsupportedEncodingException e) {
return null;
}
}
}
} else if (internUrl.startsWith("http")) {
return internUrl;
} else if (internUrl.startsWith("/channel") || internUrl.startsWith("/user")
|| internUrl.startsWith("/watch")) {
return "https://www.youtube.com" + internUrl;
}
} else if (navigationEndpoint.has("browseEndpoint")) {
final JsonObject browseEndpoint = navigationEndpoint.getObject("browseEndpoint");
final String canonicalBaseUrl = browseEndpoint.getString("canonicalBaseUrl");
final String browseId = browseEndpoint.getString("browseId");
// All channel ids are prefixed with UC
if (browseId != null && browseId.startsWith("UC")) {
return "https://www.youtube.com/channel/" + browseId;
}
if (!isNullOrEmpty(canonicalBaseUrl)) {
return "https://www.youtube.com" + canonicalBaseUrl;
}
throw new ParsingException("canonicalBaseUrl is null and browseId is not a channel (\""
+ browseEndpoint + "\")");
} else if (navigationEndpoint.has("watchEndpoint")) {
final StringBuilder url = new StringBuilder();
url.append("https://www.youtube.com/watch?v=").append(navigationEndpoint
.getObject("watchEndpoint").getString(VIDEO_ID));
if (navigationEndpoint.getObject("watchEndpoint").has("playlistId")) {
url.append("&list=").append(navigationEndpoint.getObject("watchEndpoint")
.getString("playlistId"));
}
if (navigationEndpoint.getObject("watchEndpoint").has("startTimeSeconds")) {
url.append("&amp;t=").append(navigationEndpoint.getObject("watchEndpoint")
.getInt("startTimeSeconds"));
}
return url.toString();
} else if (navigationEndpoint.has("watchPlaylistEndpoint")) {
return "https://www.youtube.com/playlist?list="
+ navigationEndpoint.getObject("watchPlaylistEndpoint").getString("playlistId");
}
return null;
}
/**
* Get the text from a JSON object that has either a {@code simpleText} or a {@code runs}
* array.
*
* @param textObject JSON object to get the text from
* @param html whether to return HTML, by parsing the {@code navigationEndpoint}
* @return text in the JSON object or {@code null}
*/
@Nullable
public static String getTextFromObject(final JsonObject textObject, final boolean html)
throws ParsingException {
if (isNullOrEmpty(textObject)) {
return null;
}
if (textObject.has("simpleText")) {
return textObject.getString("simpleText");
}
if (textObject.getArray("runs").isEmpty()) {
return null;
}
final StringBuilder textBuilder = new StringBuilder();
for (final Object textPart : textObject.getArray("runs")) {
final String text = ((JsonObject) textPart).getString("text");
if (html && ((JsonObject) textPart).has("navigationEndpoint")) {
final String url = getUrlFromNavigationEndpoint(((JsonObject) textPart)
.getObject("navigationEndpoint"));
if (!isNullOrEmpty(url)) {
textBuilder.append("<a href=\"").append(url).append("\">").append(text)
.append("</a>");
continue;
}
}
textBuilder.append(text);
}
String text = textBuilder.toString();
if (html) {
text = text.replaceAll("\\n", "<br>");
text = text.replaceAll(" {2}", " &nbsp;");
}
return text;
}
/**
* Parse a video description in the new "attributed" format, which contains the entire visible
* plaintext ({@code content}) and an array of {@code commandRuns}.
*
* <p>
* The {@code commandRuns} include the links and their position in the text.
* </p>
*
* @param attributedDescription the JSON object of the attributed description
* @return the parsed description, in HTML format, as a string
*/
@Nullable
public static String getAttributedDescription(
@Nullable final JsonObject attributedDescription) {
if (isNullOrEmpty(attributedDescription)) {
return null;
}
final String content = attributedDescription.getString("content");
final JsonArray commandRuns = attributedDescription.getArray("commandRuns");
if (content == null) {
return null;
}
final StringBuilder textBuilder = new StringBuilder();
int textStart = 0;
for (final Object commandRun: commandRuns) {
if (!(commandRun instanceof JsonObject)) {
continue;
}
final JsonObject run = ((JsonObject) commandRun);
final int startIndex = run.getInt("startIndex", -1);
final int length = run.getInt("length");
final JsonObject navigationEndpoint = run.getObject("onTap")
.getObject("innertubeCommand");
if (startIndex < 0 || length < 1 || navigationEndpoint == null) {
continue;
}
final String url;
try {
url = getUrlFromNavigationEndpoint(navigationEndpoint);
} catch (final ParsingException e) {
continue;
}
if (url == null) {
continue;
}
// Append text before the link
if (startIndex > textStart) {
textBuilder.append(content, textStart, startIndex);
}
// Trim and append link text
// Channel/Video format: 3xu00a0, (/ •), u00a0, <Name>, 2xu00a0
final String linkText = content.substring(startIndex, startIndex + length)
.replace('\u00a0', ' ')
.trim()
.replaceFirst("^[/•] *", "");
textBuilder.append("<a href=\"")
.append(url)
.append("\">")
.append(linkText)
.append("</a>");
textStart = startIndex + length;
}
// Append the remaining text
if (textStart < content.length()) {
textBuilder.append(content.substring(textStart));
}
return textBuilder.toString()
.replaceAll("\\n", "<br>")
.replaceAll(" {2}", " &nbsp;");
}
@Nullable
public static String getTextFromObject(final JsonObject textObject) throws ParsingException {
return getTextFromObject(textObject, false);
}
@Nullable
public static String getUrlFromObject(final JsonObject textObject) throws ParsingException {
if (isNullOrEmpty(textObject)) {
return null;
}
if (textObject.getArray("runs").isEmpty()) {
return null;
}
for (final Object textPart : textObject.getArray("runs")) {
final String url = getUrlFromNavigationEndpoint(((JsonObject) textPart)
.getObject("navigationEndpoint"));
if (!isNullOrEmpty(url)) {
return url;
}
}
return null;
}
@Nullable
public static String getTextAtKey(@Nonnull final JsonObject jsonObject, final String theKey)
throws ParsingException {
if (jsonObject.isString(theKey)) {
return jsonObject.getString(theKey);
} else {
return getTextFromObject(jsonObject.getObject(theKey));
}
}
public static String fixThumbnailUrl(@Nonnull final String thumbnailUrl) {
String result = thumbnailUrl;
if (result.startsWith("//")) {
result = result.substring(2);
}
if (result.startsWith(HTTP)) {
result = Utils.replaceHttpWithHttps(result);
} else if (!result.startsWith(HTTPS)) {
result = "https://" + result;
}
return result;
}
public static String getThumbnailUrlFromInfoItem(final JsonObject infoItem)
throws ParsingException {
// TODO: Don't simply get the first item, but look at all thumbnails and their resolution
try {
return fixThumbnailUrl(infoItem.getObject("thumbnail").getArray("thumbnails")
.getObject(0).getString("url"));
} catch (final Exception e) {
throw new ParsingException("Could not get thumbnail url", e);
}
}
@Nonnull
public static String getValidJsonResponseBody(@Nonnull final Response response)
throws ParsingException, MalformedURLException {
if (response.responseCode() == 404) {
throw new ContentNotAvailableException("Not found"
+ " (\"" + response.responseCode() + " " + response.responseMessage() + "\")");
}
final String responseBody = response.responseBody();
if (responseBody.length() < 50) { // Ensure to have a valid response
throw new ParsingException("JSON response is too short");
}
// Check if the request was redirected to the error page.
final URL latestUrl = new URL(response.latestUrl());
if (latestUrl.getHost().equalsIgnoreCase("www.youtube.com")) {
final String path = latestUrl.getPath();
if (path.equalsIgnoreCase("/oops") || path.equalsIgnoreCase("/error")) {
throw new ContentNotAvailableException("Content unavailable");
}
}
final String responseContentType = response.getHeader("Content-Type");
if (responseContentType != null
&& responseContentType.toLowerCase().contains("text/html")) {
throw new ParsingException("Got HTML document, expected JSON response"
+ " (latest url was: \"" + response.latestUrl() + "\")");
}
return responseBody;
}
public static JsonObject getJsonPostResponse(final String endpoint,
final byte[] body,
final Localization localization)
throws IOException, ExtractionException {
final Map<String, List<String>> headers = new HashMap<>();
addClientInfoHeaders(headers);
headers.put("Content-Type", singletonList("application/json"));
final Response response = getDownloader().post(YOUTUBEI_V1_URL + endpoint + "?key="
+ getKey() + DISABLE_PRETTY_PRINT_PARAMETER, headers, body, localization);
return JsonUtils.toJsonObject(getValidJsonResponseBody(response));
}
public static JsonObject getJsonAndroidPostResponse(
final String endpoint,
final byte[] body,
@Nonnull final Localization localization,
@Nullable final String endPartOfUrlRequest) throws IOException, ExtractionException {
return getMobilePostResponse(endpoint, body, localization,
getAndroidUserAgent(localization), ANDROID_YOUTUBE_KEY, endPartOfUrlRequest);
}
public static JsonObject getJsonIosPostResponse(
final String endpoint,
final byte[] body,
@Nonnull final Localization localization,
@Nullable final String endPartOfUrlRequest) throws IOException, ExtractionException {
return getMobilePostResponse(endpoint, body, localization, getIosUserAgent(localization),
IOS_YOUTUBE_KEY, endPartOfUrlRequest);
}
private static JsonObject getMobilePostResponse(
final String endpoint,
final byte[] body,
@Nonnull final Localization localization,
@Nonnull final String userAgent,
@Nonnull final String innerTubeApiKey,
@Nullable final String endPartOfUrlRequest) throws IOException, ExtractionException {
final Map<String, List<String>> headers = new HashMap<>();
headers.put("Content-Type", singletonList("application/json"));
headers.put("User-Agent", singletonList(userAgent));
headers.put("X-Goog-Api-Format-Version", singletonList("2"));
final String baseEndpointUrl = YOUTUBEI_V1_GAPIS_URL + endpoint + "?key=" + innerTubeApiKey
+ DISABLE_PRETTY_PRINT_PARAMETER;
final Response response = getDownloader().post(isNullOrEmpty(endPartOfUrlRequest)
? baseEndpointUrl : baseEndpointUrl + endPartOfUrlRequest,
headers, body, localization);
return JsonUtils.toJsonObject(getValidJsonResponseBody(response));
}
@Nonnull
public static JsonBuilder<JsonObject> prepareDesktopJsonBuilder(
@Nonnull final Localization localization,
@Nonnull final ContentCountry contentCountry)
throws IOException, ExtractionException {
// @formatter:off
return JsonObject.builder()
.object("context")
.object("client")
.value("hl", localization.getLocalizationCode())
.value("gl", contentCountry.getCountryCode())
.value("clientName", "WEB")
.value("clientVersion", getClientVersion())
.value("originalUrl", "https://www.youtube.com")
.value("platform", "DESKTOP")
.end()
.object("request")
.array("internalExperimentFlags")
.end()
.value("useSsl", true)
.end()
.object("user")
// TO DO: provide a way to enable restricted mode with:
// .value("enableSafetyMode", boolean)
.value("lockedSafetyMode", false)
.end()
.end();
// @formatter:on
}
@Nonnull
public static JsonBuilder<JsonObject> prepareAndroidMobileJsonBuilder(
@Nonnull final Localization localization,
@Nonnull final ContentCountry contentCountry) {
// @formatter:off
return JsonObject.builder()
.object("context")
.object("client")
.value("clientName", "ANDROID")
.value("clientVersion", ANDROID_YOUTUBE_CLIENT_VERSION)
.value("platform", "MOBILE")
.value("osName", "Android")
.value("osVersion", "12")
/*
A valid Android SDK version is required to be sure to get a valid player
response
If this parameter is not provided, the player response may be replaced by
the one of a 5-minute video saying the message "The following content is
not available on this app. Watch this content on the latest version on
YouTube"
See https://github.com/TeamNewPipe/NewPipe/issues/8713
The Android SDK version corresponding to the Android version used in
requests is sent
*/
.value("androidSdkVersion", 31)
.value("hl", localization.getLocalizationCode())
.value("gl", contentCountry.getCountryCode())
.end()
.object("user")
// TO DO: provide a way to enable restricted mode with:
// .value("enableSafetyMode", boolean)
.value("lockedSafetyMode", false)
.end()
.end();
// @formatter:on
}
@Nonnull
public static JsonBuilder<JsonObject> prepareIosMobileJsonBuilder(
@Nonnull final Localization localization,
@Nonnull final ContentCountry contentCountry) {
// @formatter:off
return JsonObject.builder()
.object("context")
.object("client")
.value("clientName", "IOS")
.value("clientVersion", IOS_YOUTUBE_CLIENT_VERSION)
.value("deviceMake", "Apple")
// Device model is required to get 60fps streams
.value("deviceModel", IOS_DEVICE_MODEL)
.value("platform", "MOBILE")
.value("osName", "iOS")
// The value of this field seems to use the following structure:
// "iOS version.0.build version"
// The build version corresponding to the iOS version used can be found on
// https://www.theiphonewiki.com/wiki/Firmware/iPhone/15.x#iPhone_13
.value("osVersion", "15.6.0.19G71")
.value("hl", localization.getLocalizationCode())
.value("gl", contentCountry.getCountryCode())
.end()
.object("user")
// TO DO: provide a way to enable restricted mode with:
// .value("enableSafetyMode", boolean)
.value("lockedSafetyMode", false)
.end()
.end();
// @formatter:on
}
@Nonnull
public static JsonBuilder<JsonObject> prepareTvHtml5EmbedJsonBuilder(
@Nonnull final Localization localization,
@Nonnull final ContentCountry contentCountry,
@Nonnull final String videoId) {
// @formatter:off
return JsonObject.builder()
.object("context")
.object("client")
.value("clientName", "TVHTML5_SIMPLY_EMBEDDED_PLAYER")
.value("clientVersion", TVHTML5_SIMPLY_EMBED_CLIENT_VERSION)
.value("clientScreen", "EMBED")
.value("platform", "TV")
.value("hl", localization.getLocalizationCode())
.value("gl", contentCountry.getCountryCode())
.end()
.object("thirdParty")
.value("embedUrl", "https://www.youtube.com/watch?v=" + videoId)
.end()
.object("user")
// TO DO: provide a way to enable restricted mode with:
// .value("enableSafetyMode", boolean)
.value("lockedSafetyMode", false)
.end()
.end();
// @formatter:on
}
@Nonnull
public static byte[] createDesktopPlayerBody(
@Nonnull final Localization localization,
@Nonnull final ContentCountry contentCountry,
@Nonnull final String videoId,
@Nonnull final String sts,
final boolean isTvHtml5DesktopJsonBuilder,
@Nonnull final String contentPlaybackNonce) throws IOException, ExtractionException {
// @formatter:off
return JsonWriter.string((isTvHtml5DesktopJsonBuilder
? prepareTvHtml5EmbedJsonBuilder(localization, contentCountry, videoId)
: prepareDesktopJsonBuilder(localization, contentCountry))
.object("playbackContext")
.object("contentPlaybackContext")
// Signature timestamp from the JavaScript base player is needed to get
// working obfuscated URLs
.value("signatureTimestamp", sts)
.value("referer", "https://www.youtube.com/watch?v=" + videoId)
.end()
.end()
.value(CPN, contentPlaybackNonce)
.value(VIDEO_ID, videoId)
.value(CONTENT_CHECK_OK, true)
.value(RACY_CHECK_OK, true)
.done())
.getBytes(StandardCharsets.UTF_8);
// @formatter:on
}
/**
* Get the user-agent string used as the user-agent for InnerTube requests with the Android
* client.
*
* <p>
* If the {@link Localization} provided is {@code null}, fallbacks to
* {@link Localization#DEFAULT the default one}.
* </p>
*
* @param localization the {@link Localization} to set in the user-agent
* @return the Android user-agent used for InnerTube requests with the Android client,
* depending on the {@link Localization} provided
*/
@Nonnull
public static String getAndroidUserAgent(@Nullable final Localization localization) {
// Spoofing an Android 12 device with the hardcoded version of the Android app
return "com.google.android.youtube/" + ANDROID_YOUTUBE_CLIENT_VERSION
+ " (Linux; U; Android 12; "
+ (localization != null ? localization : Localization.DEFAULT).getCountryCode()
+ ") gzip";
}
/**
* Get the user-agent string used as the user-agent for InnerTube requests with the iOS
* client.
*
* <p>
* If the {@link Localization} provided is {@code null}, fallbacks to
* {@link Localization#DEFAULT the default one}.
* </p>
*
* @param localization the {@link Localization} to set in the user-agent
* @return the iOS user-agent used for InnerTube requests with the iOS client, depending on the
* {@link Localization} provided
*/
@Nonnull
public static String getIosUserAgent(@Nullable final Localization localization) {
// Spoofing an iPhone 13 running iOS 15.6 with the hardcoded version of the iOS app
return "com.google.ios.youtube/" + IOS_YOUTUBE_CLIENT_VERSION
+ "(" + IOS_DEVICE_MODEL + "; U; CPU iOS 15_6 like Mac OS X; "
+ (localization != null ? localization : Localization.DEFAULT).getCountryCode()
+ ")";
}
/**
* Add required headers and cookies to an existing headers Map.
* @see #addClientInfoHeaders(Map)
* @see #addCookieHeader(Map)
*/
public static void addYouTubeHeaders(final Map<String, List<String>> headers)
throws IOException, ExtractionException {
addClientInfoHeaders(headers);
addCookieHeader(headers);
}
/**
* Add the <code>X-YouTube-Client-Name</code>, <code>X-YouTube-Client-Version</code>,
* <code>Origin</code>, and <code>Referer</code> headers.
* @param headers The headers which should be completed
*/
public static void addClientInfoHeaders(@Nonnull final Map<String, List<String>> headers)
throws IOException, ExtractionException {
headers.computeIfAbsent("Origin", k -> singletonList("https://www.youtube.com"));
headers.computeIfAbsent("Referer", k -> singletonList("https://www.youtube.com"));
headers.computeIfAbsent("X-YouTube-Client-Name", k -> singletonList("1"));
if (headers.get("X-YouTube-Client-Version") == null) {
headers.put("X-YouTube-Client-Version", singletonList(getClientVersion()));
}
}
/**
* Create a map with the required cookie header.
* @return A singleton map containing the header.
*/
public static Map<String, List<String>> getCookieHeader() {
return Collections.singletonMap("Cookie", singletonList(generateConsentCookie()));
}
/**
* Add the <code>CONSENT</code> cookie to prevent redirect to <code>consent.youtube.com</code>
* @param headers the headers which should be completed
*/
public static void addCookieHeader(@Nonnull final Map<String, List<String>> headers) {
if (headers.get("Cookie") == null) {
headers.put("Cookie", Collections.singletonList(generateConsentCookie()));
} else {
headers.get("Cookie").add(generateConsentCookie());
}
}
@Nonnull
public static String generateConsentCookie() {
return "CONSENT=" + (isConsentAccepted()
// YES+ means that the user did submit their choices and allows tracking.
? "YES+"
// PENDING+ means that the user did not yet submit their choices.
// YT & Google should not track the user, because they did not give consent.
// The three digits at the end can be random, but are required.
: "PENDING+" + (100 + numberGenerator.nextInt(900)));
}
public static String extractCookieValue(final String cookieName,
@Nonnull final Response response) {
final List<String> cookies = response.responseHeaders().get("set-cookie");
if (cookies == null) {
return "";
}
String result = "";
for (final String cookie : cookies) {
final int startIndex = cookie.indexOf(cookieName);
if (startIndex != -1) {
result = cookie.substring(startIndex + cookieName.length() + "=".length(),
cookie.indexOf(";", startIndex));
}
}
return result;
}
/**
* Shared alert detection function, multiple endpoints return the error similarly structured.
* <p>
* Will check if the object has an alert of the type "ERROR".
* </p>
*
* @param initialData the object which will be checked if an alert is present
* @throws ContentNotAvailableException if an alert is detected
*/
public static void defaultAlertsCheck(@Nonnull final JsonObject initialData)
throws ParsingException {
final JsonArray alerts = initialData.getArray("alerts");
if (!isNullOrEmpty(alerts)) {
final JsonObject alertRenderer = alerts.getObject(0).getObject("alertRenderer");
final String alertText = getTextFromObject(alertRenderer.getObject("text"));
final String alertType = alertRenderer.getString("type", "");
if (alertType.equalsIgnoreCase("ERROR")) {
if (alertText != null && alertText.contains("This account has been terminated")) {
if (alertText.contains("violation") || alertText.contains("violating")
|| alertText.contains("infringement")) {
// Possible error messages:
// "This account has been terminated for a violation of YouTube's Terms of
// Service."
// "This account has been terminated due to multiple or severe violations of
// YouTube's policy prohibiting hate speech."
// "This account has been terminated due to multiple or severe violations of
// YouTube's policy prohibiting content designed to harass, bully or
// threaten."
// "This account has been terminated due to multiple or severe violations
// of YouTube's policy against spam, deceptive practices and misleading
// content or other Terms of Service violations."
// "This account has been terminated due to multiple or severe violations of
// YouTube's policy on nudity or sexual content."
// "This account has been terminated for violating YouTube's Community
// Guidelines."
// "This account has been terminated because we received multiple
// third-party claims of copyright infringement regarding material that
// the user posted."
// "This account has been terminated because it is linked to an account that
// received multiple third-party claims of copyright infringement."
throw new AccountTerminatedException(alertText,
AccountTerminatedException.Reason.VIOLATION);
} else {
throw new AccountTerminatedException(alertText);
}
}
throw new ContentNotAvailableException("Got error: \"" + alertText + "\"");
}
}
}
@Nonnull
public static List<MetaInfo> getMetaInfo(@Nonnull final JsonArray contents)
throws ParsingException {
final List<MetaInfo> metaInfo = new ArrayList<>();
for (final Object content : contents) {
final JsonObject resultObject = (JsonObject) content;
if (resultObject.has("itemSectionRenderer")) {
for (final Object sectionContentObject
: resultObject.getObject("itemSectionRenderer").getArray("contents")) {
final JsonObject sectionContent = (JsonObject) sectionContentObject;
if (sectionContent.has("infoPanelContentRenderer")) {
metaInfo.add(getInfoPanelContent(sectionContent
.getObject("infoPanelContentRenderer")));
}
if (sectionContent.has("clarificationRenderer")) {
metaInfo.add(getClarificationRendererContent(sectionContent
.getObject("clarificationRenderer")
));
}
}
}
}
return metaInfo;
}
@Nonnull
private static MetaInfo getInfoPanelContent(@Nonnull final JsonObject infoPanelContentRenderer)
throws ParsingException {
final MetaInfo metaInfo = new MetaInfo();
final StringBuilder sb = new StringBuilder();
for (final Object paragraph : infoPanelContentRenderer.getArray("paragraphs")) {
if (sb.length() != 0) {
sb.append("<br>");
}
sb.append(YoutubeParsingHelper.getTextFromObject((JsonObject) paragraph));
}
metaInfo.setContent(new Description(sb.toString(), Description.HTML));
if (infoPanelContentRenderer.has("sourceEndpoint")) {
final String metaInfoLinkUrl = YoutubeParsingHelper.getUrlFromNavigationEndpoint(
infoPanelContentRenderer.getObject("sourceEndpoint"));
try {
metaInfo.addUrl(new URL(Objects.requireNonNull(extractCachedUrlIfNeeded(
metaInfoLinkUrl))));
} catch (final NullPointerException | MalformedURLException e) {
throw new ParsingException("Could not get metadata info URL", e);
}
final String metaInfoLinkText = YoutubeParsingHelper.getTextFromObject(
infoPanelContentRenderer.getObject("inlineSource"));
if (isNullOrEmpty(metaInfoLinkText)) {
throw new ParsingException("Could not get metadata info link text.");
}
metaInfo.addUrlText(metaInfoLinkText);
}
return metaInfo;
}
@Nonnull
private static MetaInfo getClarificationRendererContent(
@Nonnull final JsonObject clarificationRenderer) throws ParsingException {
final MetaInfo metaInfo = new MetaInfo();
final String title = YoutubeParsingHelper.getTextFromObject(clarificationRenderer
.getObject("contentTitle"));
final String text = YoutubeParsingHelper.getTextFromObject(clarificationRenderer
.getObject("text"));
if (title == null || text == null) {
throw new ParsingException("Could not extract clarification renderer content");
}
metaInfo.setTitle(title);
metaInfo.setContent(new Description(text, Description.PLAIN_TEXT));
if (clarificationRenderer.has("actionButton")) {
final JsonObject actionButton = clarificationRenderer.getObject("actionButton")
.getObject("buttonRenderer");
try {
final String url = YoutubeParsingHelper.getUrlFromNavigationEndpoint(actionButton
.getObject("command"));
metaInfo.addUrl(new URL(Objects.requireNonNull(extractCachedUrlIfNeeded(url))));
} catch (final NullPointerException | MalformedURLException e) {
throw new ParsingException("Could not get metadata info URL", e);
}
final String metaInfoLinkText = YoutubeParsingHelper.getTextFromObject(
actionButton.getObject("text"));
if (isNullOrEmpty(metaInfoLinkText)) {
throw new ParsingException("Could not get metadata info link text.");
}
metaInfo.addUrlText(metaInfoLinkText);
}
if (clarificationRenderer.has("secondaryEndpoint") && clarificationRenderer
.has("secondarySource")) {
final String url = getUrlFromNavigationEndpoint(clarificationRenderer
.getObject("secondaryEndpoint"));
// Ignore Google URLs, because those point to a Google search about "Covid-19"
if (url != null && !isGoogleURL(url)) {
try {
metaInfo.addUrl(new URL(url));
final String description = getTextFromObject(clarificationRenderer
.getObject("secondarySource"));
metaInfo.addUrlText(description == null ? url : description);
} catch (final MalformedURLException e) {
throw new ParsingException("Could not get metadata info secondary URL", e);
}
}
}
return metaInfo;
}
/**
* Sometimes, YouTube provides URLs which use Google's cache. They look like
* {@code https://webcache.googleusercontent.com/search?q=cache:CACHED_URL}
*
* @param url the URL which might refer to the Google's webcache
* @return the URL which is referring to the original site
*/
public static String extractCachedUrlIfNeeded(final String url) {
if (url == null) {
return null;
}
if (url.contains("webcache.googleusercontent.com")) {
return url.split("cache:")[1];
}
return url;
}
public static boolean isVerified(final JsonArray badges) {
if (Utils.isNullOrEmpty(badges)) {
return false;
}
for (final Object badge : badges) {
final String style = ((JsonObject) badge).getObject("metadataBadgeRenderer")
.getString("style");
if (style != null && (style.equals("BADGE_STYLE_TYPE_VERIFIED")
|| style.equals("BADGE_STYLE_TYPE_VERIFIED_ARTIST"))) {
return true;
}
}
return false;
}
/**
* Generate a content playback nonce (also called {@code cpn}), sent by YouTube clients in
* playback requests (and also for some clients, in the player request body).
*
* @return a content playback nonce string
*/
@Nonnull
public static String generateContentPlaybackNonce() {
return RandomStringFromAlphabetGenerator.generate(
CONTENT_PLAYBACK_NONCE_ALPHABET, 16, numberGenerator);
}
/**
* Try to generate a {@code t} parameter, sent by mobile clients as a query of the player
* request.
*
* <p>
* Some researches needs to be done to know how this parameter, unique at each request, is
* generated.
* </p>
*
* @return a 12 characters string to try to reproduce the {@code} parameter
*/
@Nonnull
public static String generateTParameter() {
return RandomStringFromAlphabetGenerator.generate(
CONTENT_PLAYBACK_NONCE_ALPHABET, 12, numberGenerator);
}
/**
* Check if the streaming URL is from the YouTube {@code WEB} client.
*
* @param url the streaming URL to be checked.
* @return true if it's a {@code WEB} streaming URL, false otherwise
*/
public static boolean isWebStreamingUrl(@Nonnull final String url) {
return Parser.isMatch(C_WEB_PATTERN, url);
}
/**
* Check if the streaming URL is a URL from the YouTube {@code TVHTML5_SIMPLY_EMBEDDED_PLAYER}
* client.
*
* @param url the streaming URL on which check if it's a {@code TVHTML5_SIMPLY_EMBEDDED_PLAYER}
* streaming URL.
* @return true if it's a {@code TVHTML5_SIMPLY_EMBEDDED_PLAYER} streaming URL, false otherwise
*/
public static boolean isTvHtml5SimplyEmbeddedPlayerStreamingUrl(@Nonnull final String url) {
return Parser.isMatch(C_TVHTML5_SIMPLY_EMBEDDED_PLAYER_PATTERN, url);
}
/**
* Check if the streaming URL is a URL from the YouTube {@code ANDROID} client.
*
* @param url the streaming URL to be checked.
* @return true if it's a {@code ANDROID} streaming URL, false otherwise
*/
public static boolean isAndroidStreamingUrl(@Nonnull final String url) {
return Parser.isMatch(C_ANDROID_PATTERN, url);
}
/**
* Check if the streaming URL is a URL from the YouTube {@code IOS} client.
*
* @param url the streaming URL on which check if it's a {@code IOS} streaming URL.
* @return true if it's a {@code IOS} streaming URL, false otherwise
*/
public static boolean isIosStreamingUrl(@Nonnull final String url) {
return Parser.isMatch(C_IOS_PATTERN, url);
}
/**
* @see #consentAccepted
*/
public static void setConsentAccepted(final boolean accepted) {
consentAccepted = accepted;
}
/**
* @see #consentAccepted
*/
public static boolean isConsentAccepted() {
return consentAccepted;
}
}