NewPipeExtractor/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java

/*
 * Created by Christian Schabesberger on 02.03.16.
 *
 * Copyright (C) 2016 Christian Schabesberger <chris.schabesberger@mailbox.org>
 * YoutubeParsingHelper.java is part of NewPipe Extractor.
 *
 * NewPipe Extractor is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * NewPipe Extractor is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with NewPipe Extractor. If not, see <https://www.gnu.org/licenses/>.
 */

package org.schabi.newpipe.extractor.services.youtube;

import static org.schabi.newpipe.extractor.NewPipe.getDownloader;
import static org.schabi.newpipe.extractor.utils.Utils.HTTP;
import static org.schabi.newpipe.extractor.utils.Utils.HTTPS;
import static org.schabi.newpipe.extractor.utils.Utils.getStringResultFromRegexArray;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;

import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonBuilder;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;
import com.grack.nanojson.JsonWriter;

import org.jsoup.nodes.Entities;
import org.schabi.newpipe.extractor.Image;
import org.schabi.newpipe.extractor.Image.ResolutionLevel;
import org.schabi.newpipe.extractor.downloader.Response;
import org.schabi.newpipe.extractor.exceptions.AccountTerminatedException;
import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.localization.ContentCountry;
import org.schabi.newpipe.extractor.localization.Localization;
import org.schabi.newpipe.extractor.playlist.PlaylistInfo;
import org.schabi.newpipe.extractor.stream.AudioTrackType;
import org.schabi.newpipe.extractor.utils.JsonUtils;
import org.schabi.newpipe.extractor.utils.Parser;
import org.schabi.newpipe.extractor.utils.RandomStringFromAlphabetGenerator;
import org.schabi.newpipe.extractor.utils.Utils;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.time.LocalDate;
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.time.format.DateTimeParseException;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.Random;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

public final class YoutubeParsingHelper {

    private YoutubeParsingHelper() {
    }

    /**
     * The base URL of requests of the {@code WEB} clients to the InnerTube internal API.
     */
    public static final String YOUTUBEI_V1_URL = "https://www.youtube.com/youtubei/v1/";

    /**
     * The base URL of requests of non-web clients to the InnerTube internal API.
     */
    public static final String YOUTUBEI_V1_GAPIS_URL =
            "https://youtubei.googleapis.com/youtubei/v1/";

    /**
     * The base URL of YouTube Music.
     */
    private static final String YOUTUBE_MUSIC_URL = "https://music.youtube.com";

    /**
     * A parameter to disable pretty-printed response of InnerTube requests, to reduce response
     * sizes.
     *
     * <p>
     * Sent in query parameters of the requests.
     * </p>
     **/
    public static final String DISABLE_PRETTY_PRINT_PARAMETER = "prettyPrint=false";

    /**
     * A parameter sent by official clients named {@code contentPlaybackNonce}.
     *
     * <p>
     * It is sent by official clients on videoplayback requests and InnerTube player requests in
     * most cases.
     * </p>
     *
     * <p>
     * It is composed of 16 characters which are generated from
     * {@link #CONTENT_PLAYBACK_NONCE_ALPHABET this alphabet}, with the use of strong random
     * values.
     * </p>
     *
     * @see #generateContentPlaybackNonce()
     */
    public static final String CPN = "cpn";
    public static final String VIDEO_ID = "videoId";

    /**
     * A parameter sent by official clients named {@code contentCheckOk}.
     *
     * <p>
     * Setting it to {@code true} allows us to get streaming data on videos with a warning about
     * what the sensible content they contain.
     * </p>
     */
    public static final String CONTENT_CHECK_OK = "contentCheckOk";

    /**
     * A parameter which may be sent by official clients named {@code racyCheckOk}.
     *
     * <p>
     * What this parameter does is not really known, but it seems to be linked to sensitive
     * contents such as age-restricted content.
     * </p>
     */
    public static final String RACY_CHECK_OK = "racyCheckOk";

    /**
     * The hardcoded client ID used for InnerTube requests with the {@code WEB} client.
     */
    private static final String WEB_CLIENT_ID = "1";

    /**
     * The client version for InnerTube requests with the {@code WEB} client, used as the last
     * fallback if the extraction of the real one failed.
     */
    private static final String HARDCODED_CLIENT_VERSION = "2.20240410.01.00";

    /**
     * The hardcoded client version of the Android app used for InnerTube requests with this
     * client.
     *
     * <p>
     * It can be extracted by getting the latest release version of the app in an APK repository
     * such as <a href="https://www.apkmirror.com/apk/google-inc/youtube/">APKMirror</a>.
     * </p>
     */
    private static final String ANDROID_YOUTUBE_CLIENT_VERSION = "19.13.36";

    /**
     * The hardcoded client version of the iOS app used for InnerTube requests with this client.
     *
     * <p>
     * It can be extracted by getting the latest release version of the app on
     * <a href="https://apps.apple.com/us/app/youtube-watch-listen-stream/id544007664/">the App
     * Store page of the YouTube app</a>, in the {@code What’s New} section.
     * </p>
     */
    private static final String IOS_YOUTUBE_CLIENT_VERSION = "19.14.3";

    /**
     * The hardcoded client version used for InnerTube requests with the TV HTML5 embed client.
     */
    private static final String TVHTML5_SIMPLY_EMBED_CLIENT_VERSION = "2.0";

    /**
     * The hardcoded client ID used for InnerTube requests with the YouTube Music desktop client.
     */
    private static final String YOUTUBE_MUSIC_CLIENT_ID = "67";

    /**
     * The hardcoded client version used for InnerTube requests with the YouTube Music desktop
     * client.
     */
    private static final String HARDCODED_YOUTUBE_MUSIC_CLIENT_VERSION = "1.20240403.01.00";

    private static String clientVersion;

    private static String youtubeMusicClientVersion;

    private static boolean clientVersionExtracted = false;
    @SuppressWarnings("OptionalUsedAsFieldOrParameterType")
    private static Optional<Boolean> hardcodedClientVersionValid = Optional.empty();

    private static final String[] INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES =
            {"INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"",
                    "innertube_context_client_version\":\"([0-9\\.]+?)\"",
                    "client.version=([0-9\\.]+)"};
    private static final String[] INITIAL_DATA_REGEXES =
            {"window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});",
                    "var\\s*ytInitialData\\s*=\\s*(\\{.*?\\});"};

    private static final String CONTENT_PLAYBACK_NONCE_ALPHABET =
            "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";

    /**
     * The device machine id for the iPhone 15, used to get 60fps with the {@code iOS} client.
     *
     * <p>
     * See <a href="https://gist.github.com/adamawolf/3048717">this GitHub Gist</a> for more
     * information.
     * </p>
     */
    private static final String IOS_DEVICE_MODEL = "iPhone15,4";

    /**
     * Spoofing an iPhone 15 running iOS 17.4.1 with the hardcoded version of the iOS app. To be
     * used for the {@code "osVersion"} field in JSON POST requests.
     * <p>
     * The value of this field seems to use the following structure:
     * "iOS major version.minor version.patch version.build version", where
     * "patch version" is equal to 0 if it isn't set
     * The build version corresponding to the iOS version used can be found on
     * <a href="https://theapplewiki.com/wiki/Firmware/iPhone/17.x#iPhone_15">
     *     https://theapplewiki.com/wiki/Firmware/iPhone/17.x#iPhone_15</a>
     * </p>
     *
     * @see #IOS_USER_AGENT_VERSION
     */
    private static final String IOS_OS_VERSION = "17.4.1.21E237";

    /**
     * Spoofing an iPhone 15 running iOS 17.4.1 with the hardcoded version of the iOS app. To be
     * used in the user agent for requests.
     *
     * @see #IOS_OS_VERSION
     */
    private static final String IOS_USER_AGENT_VERSION = "17_4_1";

    private static Random numberGenerator = new Random();

    private static final String FEED_BASE_CHANNEL_ID =
            "https://www.youtube.com/feeds/videos.xml?channel_id=";
    private static final String FEED_BASE_USER = "https://www.youtube.com/feeds/videos.xml?user=";
    private static final Pattern C_WEB_PATTERN = Pattern.compile("&c=WEB");
    private static final Pattern C_TVHTML5_SIMPLY_EMBEDDED_PLAYER_PATTERN =
            Pattern.compile("&c=TVHTML5_SIMPLY_EMBEDDED_PLAYER");
    private static final Pattern C_ANDROID_PATTERN = Pattern.compile("&c=ANDROID");
    private static final Pattern C_IOS_PATTERN = Pattern.compile("&c=IOS");

    private static final Set<String> GOOGLE_URLS = Set.of("google.", "m.google.", "www.google.");
    private static final Set<String> INVIDIOUS_URLS = Set.of("invidio.us", "dev.invidio.us",
            "www.invidio.us", "redirect.invidious.io", "invidious.snopyta.org", "yewtu.be",
            "tube.connect.cafe", "tubus.eduvid.org", "invidious.kavin.rocks", "invidious.site",
            "invidious-us.kavin.rocks", "piped.kavin.rocks", "vid.mint.lgbt", "invidiou.site",
            "invidious.fdn.fr", "invidious.048596.xyz", "invidious.zee.li", "vid.puffyan.us",
            "ytprivate.com", "invidious.namazso.eu", "invidious.silkky.cloud", "ytb.trom.tf",
            "invidious.exonip.de", "inv.riverside.rocks", "invidious.blamefran.net", "y.com.cm",
            "invidious.moomoo.me", "yt.cyberhost.uk");
    private static final Set<String> YOUTUBE_URLS = Set.of("youtube.com", "www.youtube.com",
            "m.youtube.com", "music.youtube.com");

    private static boolean consentAccepted = false;

    public static boolean isGoogleURL(final String url) {
        final String cachedUrl = extractCachedUrlIfNeeded(url);
        try {
            final URL u = new URL(cachedUrl);
            return GOOGLE_URLS.stream().anyMatch(item -> u.getHost().startsWith(item));
        } catch (final MalformedURLException e) {
            return false;
        }
    }

    public static boolean isYoutubeURL(@Nonnull final URL url) {
        return YOUTUBE_URLS.contains(url.getHost().toLowerCase(Locale.ROOT));
    }

    public static boolean isYoutubeServiceURL(@Nonnull final URL url) {
        final String host = url.getHost();
        return host.equalsIgnoreCase("www.youtube-nocookie.com")
                || host.equalsIgnoreCase("youtu.be");
    }

    public static boolean isHooktubeURL(@Nonnull final URL url) {
        final String host = url.getHost();
        return host.equalsIgnoreCase("hooktube.com");
    }

    public static boolean isInvidiousURL(@Nonnull final URL url) {
        return INVIDIOUS_URLS.contains(url.getHost().toLowerCase(Locale.ROOT));
    }

    public static boolean isY2ubeURL(@Nonnull final URL url) {
        return url.getHost().equalsIgnoreCase("y2u.be");
    }

    /**
     * Parses the duration string of the video expecting ":" or "." as separators
     *
     * @return the duration in seconds
     * @throws ParsingException when more than 3 separators are found
     */
    public static int parseDurationString(@Nonnull final String input)
            throws ParsingException, NumberFormatException {
        // If time separator : is not detected, try . instead
        final String[] splitInput = input.contains(":")
                ? input.split(":")
                : input.split("\\.");

        final int[] units = {24, 60, 60, 1};
        final int offset = units.length - splitInput.length;
        if (offset < 0) {
            throw new ParsingException("Error duration string with unknown format: " + input);
        }
        int duration = 0;
        for (int i = 0; i < splitInput.length; i++) {
            duration = units[i + offset] * (duration + convertDurationToInt(splitInput[i]));
        }
        return duration;
    }

    /**
     * Tries to convert a duration string to an integer without throwing an exception.
     * <br/>
     * Helper method for {@link #parseDurationString(String)}.
     * <br/>
     * Note: This method is also used as a workaround for NewPipe#8034 (YT shorts no longer
     * display any duration in channels).
     *
     * @param input The string to process
     * @return The converted integer or 0 if the conversion failed.
     */
    private static int convertDurationToInt(final String input) {
        if (input == null || input.isEmpty()) {
            return 0;
        }

        final String clearedInput = Utils.removeNonDigitCharacters(input);
        try {
            return Integer.parseInt(clearedInput);
        } catch (final NumberFormatException ex) {
            return 0;
        }
    }

    @Nonnull
    public static String getFeedUrlFrom(@Nonnull final String channelIdOrUser) {
        if (channelIdOrUser.startsWith("user/")) {
            return FEED_BASE_USER + channelIdOrUser.replace("user/", "");
        } else if (channelIdOrUser.startsWith("channel/")) {
            return FEED_BASE_CHANNEL_ID + channelIdOrUser.replace("channel/", "");
        } else {
            return FEED_BASE_CHANNEL_ID + channelIdOrUser;
        }
    }

    public static OffsetDateTime parseDateFrom(final String textualUploadDate)
            throws ParsingException {
        try {
            return OffsetDateTime.parse(textualUploadDate);
        } catch (final DateTimeParseException e) {
            try {
                return LocalDate.parse(textualUploadDate).atStartOfDay().atOffset(ZoneOffset.UTC);
            } catch (final DateTimeParseException e1) {
                throw new ParsingException("Could not parse date: \"" + textualUploadDate + "\"",
                        e1);
            }
        }
    }

    /**
     * Checks if the given playlist id is a YouTube Mix (auto-generated playlist)
     * Ids from a YouTube Mix start with "RD"
     *
     * @param playlistId the playlist id
     * @return Whether given id belongs to a YouTube Mix
     */
    public static boolean isYoutubeMixId(@Nonnull final String playlistId) {
        return playlistId.startsWith("RD");
    }

    /**
     * Checks if the given playlist id is a YouTube My Mix (auto-generated playlist)
     * Ids from a YouTube My Mix start with "RDMM"
     *
     * @param playlistId the playlist id
     * @return Whether given id belongs to a YouTube My Mix
     */
    public static boolean isYoutubeMyMixId(@Nonnull final String playlistId) {
        return playlistId.startsWith("RDMM");
    }

    /**
     * Checks if the given playlist id is a YouTube Music Mix (auto-generated playlist)
     * Ids from a YouTube Music Mix start with "RDAMVM" or "RDCLAK"
     *
     * @param playlistId the playlist id
     * @return Whether given id belongs to a YouTube Music Mix
     */
    public static boolean isYoutubeMusicMixId(@Nonnull final String playlistId) {
        return playlistId.startsWith("RDAMVM") || playlistId.startsWith("RDCLAK");
    }

    /**
     * Checks if the given playlist id is a YouTube Channel Mix (auto-generated playlist)
     * Ids from a YouTube channel Mix start with "RDCM"
     *
     * @return Whether given id belongs to a YouTube Channel Mix
     */
    public static boolean isYoutubeChannelMixId(@Nonnull final String playlistId) {
        return playlistId.startsWith("RDCM");
    }

    /**
     * Checks if the given playlist id is a YouTube Genre Mix (auto-generated playlist)
     * Ids from a YouTube Genre Mix start with "RDGMEM"
     *
     * @return Whether given id belongs to a YouTube Genre Mix
     */
    public static boolean isYoutubeGenreMixId(@Nonnull final String playlistId) {
        return playlistId.startsWith("RDGMEM");
    }

    /**
     * @param playlistId the playlist id to parse
     * @return the {@link PlaylistInfo.PlaylistType} extracted from the playlistId (mix playlist
     *         types included)
     * @throws ParsingException if the playlistId is null or empty, if the playlistId is not a mix,
     *                          if it is a mix but it's not based on a specific stream (this is the
     *                          case for channel or genre mixes)
     */
    @Nonnull
    public static String extractVideoIdFromMixId(final String playlistId)
            throws ParsingException {
        if (isNullOrEmpty(playlistId)) {
            throw new ParsingException("Video id could not be determined from empty playlist id");

        } else if (isYoutubeMyMixId(playlistId)) {
            return playlistId.substring(4);

        } else if (isYoutubeMusicMixId(playlistId)) {
            return playlistId.substring(6);

        } else if (isYoutubeChannelMixId(playlistId)) {
            // Channel mixes are of the form RMCM{channelId}, so videoId can't be determined
            throw new ParsingException("Video id could not be determined from channel mix id: "
                    + playlistId);

        } else if (isYoutubeGenreMixId(playlistId)) {
            // Genre mixes are of the form RDGMEM{garbage}, so videoId can't be determined
            throw new ParsingException("Video id could not be determined from genre mix id: "
                    + playlistId);

        } else if (isYoutubeMixId(playlistId)) { // normal mix
            if (playlistId.length() != 13) {
                // Stream YouTube mixes are of the form RD{videoId}, but if videoId is not exactly
                // 11 characters then it can't be a video id, hence we are dealing with a different
                // type of mix (e.g. genre mixes handled above, of the form RDGMEM{garbage})
                throw new ParsingException("Video id could not be determined from mix id: "
                    + playlistId);
            }
            return playlistId.substring(2);

        } else { // not a mix
            throw new ParsingException("Video id could not be determined from playlist id: "
                    + playlistId);
        }
    }

    /**
     * @param playlistId the playlist id to parse
     * @return the {@link PlaylistInfo.PlaylistType} extracted from the playlistId (mix playlist
     *         types included)
     * @throws ParsingException if the playlistId is null or empty
     */
    @Nonnull
    public static PlaylistInfo.PlaylistType extractPlaylistTypeFromPlaylistId(
            final String playlistId) throws ParsingException {
        if (isNullOrEmpty(playlistId)) {
            throw new ParsingException("Could not extract playlist type from empty playlist id");
        } else if (isYoutubeMusicMixId(playlistId)) {
            return PlaylistInfo.PlaylistType.MIX_MUSIC;
        } else if (isYoutubeChannelMixId(playlistId)) {
            return PlaylistInfo.PlaylistType.MIX_CHANNEL;
        } else if (isYoutubeGenreMixId(playlistId)) {
            return PlaylistInfo.PlaylistType.MIX_GENRE;
        } else if (isYoutubeMixId(playlistId)) { // normal mix
            // Either a normal mix based on a stream, or a "my mix" (still based on a stream).
            // NOTE: if YouTube introduces even more types of mixes that still start with RD,
            // they will default to this, even though they might not be based on a stream.
            return PlaylistInfo.PlaylistType.MIX_STREAM;
        } else {
            // not a known type of mix: just consider it a normal playlist
            return PlaylistInfo.PlaylistType.NORMAL;
        }
    }

    /**
     * @param playlistUrl the playlist url to parse
     * @return the {@link PlaylistInfo.PlaylistType} extracted from the playlistUrl's list param
     *         (mix playlist types included)
     * @throws ParsingException if the playlistUrl is malformed, if has no list param or if the list
     *                          param is empty
     */
    public static PlaylistInfo.PlaylistType extractPlaylistTypeFromPlaylistUrl(
            final String playlistUrl) throws ParsingException {
        try {
            return extractPlaylistTypeFromPlaylistId(
                    Utils.getQueryValue(Utils.stringToURL(playlistUrl), "list"));
        } catch (final MalformedURLException e) {
            throw new ParsingException("Could not extract playlist type from malformed url", e);
        }
    }

    private static JsonObject getInitialData(final String html) throws ParsingException {
        try {
            return JsonParser.object().from(getStringResultFromRegexArray(html,
                    INITIAL_DATA_REGEXES, 1));
        } catch (final JsonParserException | Parser.RegexException e) {
            throw new ParsingException("Could not get ytInitialData", e);
        }
    }

    public static boolean isHardcodedClientVersionValid()
            throws IOException, ExtractionException {
        if (hardcodedClientVersionValid.isPresent()) {
            return hardcodedClientVersionValid.get();
        }
        // @formatter:off
        final byte[] body = JsonWriter.string()
            .object()
                .object("context")
                    .object("client")
                        .value("hl", "en-GB")
                        .value("gl", "GB")
                        .value("clientName", "WEB")
                        .value("clientVersion", HARDCODED_CLIENT_VERSION)
                        .value("platform", "DESKTOP")
                        .value("utcOffsetMinutes", 0)
                    .end()
                    .object("request")
                        .array("internalExperimentFlags")
                        .end()
                        .value("useSsl", true)
                    .end()
                    .object("user")
                        // TODO: provide a way to enable restricted mode with:
                        //  .value("enableSafetyMode", boolean)
                        .value("lockedSafetyMode", false)
                    .end()
                .end()
                .value("fetchLiveState", true)
            .end().done().getBytes(StandardCharsets.UTF_8);
        // @formatter:on

        final var headers = getClientHeaders(WEB_CLIENT_ID, HARDCODED_CLIENT_VERSION);

        // This endpoint is fetched by the YouTube website to get the items of its main menu and is
        // pretty lightweight (around 30kB)
        final Response response = getDownloader().postWithContentTypeJson(
                YOUTUBEI_V1_URL + "guide?" + DISABLE_PRETTY_PRINT_PARAMETER,
                headers, body);
        final String responseBody = response.responseBody();
        final int responseCode = response.responseCode();

        hardcodedClientVersionValid = Optional.of(responseBody.length() > 5000
                && responseCode == 200); // Ensure to have a valid response
        return hardcodedClientVersionValid.get();
    }


    private static void extractClientVersionFromSwJs()
            throws IOException, ExtractionException {
        if (clientVersionExtracted) {
            return;
        }
        final String url = "https://www.youtube.com/sw.js";
        final var headers = getOriginReferrerHeaders("https://www.youtube.com");
        final String response = getDownloader().get(url, headers).responseBody();
        try {
            clientVersion = getStringResultFromRegexArray(response,
                    INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
        } catch (final Parser.RegexException e) {
            throw new ParsingException("Could not extract YouTube WEB InnerTube client version "
                    + "from sw.js", e);
        }
        clientVersionExtracted = true;
    }

    private static void extractClientVersionFromHtmlSearchResultsPage()
            throws IOException, ExtractionException {
        // Don't extract the InnerTube client version if it has been already extracted
        if (clientVersionExtracted) {
            return;
        }

        // Don't provide a search term in order to have a smaller response
        final String url = "https://www.youtube.com/results?search_query=&ucbcb=1";
        final String html = getDownloader().get(url, getCookieHeader()).responseBody();
        final JsonObject initialData = getInitialData(html);
        final JsonArray serviceTrackingParams = initialData.getObject("responseContext")
                .getArray("serviceTrackingParams");

        // Try to get version from initial data first
        final Stream<JsonObject> serviceTrackingParamsStream = serviceTrackingParams.stream()
                .filter(JsonObject.class::isInstance)
                .map(JsonObject.class::cast);

        clientVersion = getClientVersionFromServiceTrackingParam(
                serviceTrackingParamsStream, "CSI", "cver");

        if (clientVersion == null) {
            try {
                clientVersion = getStringResultFromRegexArray(html,
                        INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
            } catch (final Parser.RegexException ignored) {
            }
        }

        // Fallback to get a shortened client version which does not contain the last two
        // digits
        if (isNullOrEmpty(clientVersion)) {
            clientVersion = getClientVersionFromServiceTrackingParam(
                    serviceTrackingParamsStream, "ECATCHER", "client.version");
        }

        if (clientVersion == null) {
            throw new ParsingException(
                    // CHECKSTYLE:OFF
                    "Could not extract YouTube WEB InnerTube client version from HTML search results page");
                    // CHECKSTYLE:ON
        }

        clientVersionExtracted = true;
    }

    @Nullable
    private static String getClientVersionFromServiceTrackingParam(
            @Nonnull final Stream<JsonObject> serviceTrackingParamsStream,
            @Nonnull final String serviceName,
            @Nonnull final String clientVersionKey) {
        return serviceTrackingParamsStream.filter(serviceTrackingParam ->
                        serviceTrackingParam.getString("service", "")
                                .equals(serviceName))
                .flatMap(serviceTrackingParam -> serviceTrackingParam.getArray("params")
                        .stream())
                .filter(JsonObject.class::isInstance)
                .map(JsonObject.class::cast)
                .filter(param -> param.getString("key", "")
                        .equals(clientVersionKey))
                .map(param -> param.getString("value"))
                .filter(paramValue -> !isNullOrEmpty(paramValue))
                .findFirst()
                .orElse(null);
    }

    /**
     * Get the client version used by YouTube website on InnerTube requests.
     */
    public static String getClientVersion() throws IOException, ExtractionException {
        if (!isNullOrEmpty(clientVersion)) {
            return clientVersion;
        }

        // Always extract the latest client version, by trying first to extract it from the
        // JavaScript service worker, then from HTML search results page as a fallback, to prevent
        // fingerprinting based on the client version used
        try {
            extractClientVersionFromSwJs();
        } catch (final Exception e) {
            extractClientVersionFromHtmlSearchResultsPage();
        }

        if (clientVersionExtracted) {
            return clientVersion;
        }

        // Fallback to the hardcoded one if it is valid
        if (isHardcodedClientVersionValid()) {
            clientVersion = HARDCODED_CLIENT_VERSION;
            return clientVersion;
        }

        throw new ExtractionException("Could not get YouTube WEB client version");
    }

    /**
     * <p>
     * <b>Only used in tests.</b>
     * </p>
     *
     * <p>
     * Quick-and-dirty solution to reset global state in between test classes.
     * </p>
     * <p>
     * This is needed for the mocks because in order to reach that state a network request has to
     * be made. If the global state is not reset and the RecordingDownloader is used,
     * then only the first test class has that request recorded. Meaning running the other
     * tests with mocks will fail, because the mock is missing.
     * </p>
     */
    public static void resetClientVersion() {
        clientVersion = null;
        clientVersionExtracted = false;
    }

    /**
     * <p>
     * <b>Only used in tests.</b>
     * </p>
     */
    public static void setNumberGenerator(final Random random) {
        numberGenerator = random;
    }

    public static boolean isHardcodedYoutubeMusicClientVersionValid() throws IOException,
            ReCaptchaException {
        final String url =
                "https://music.youtube.com/youtubei/v1/music/get_search_suggestions?"
                        + DISABLE_PRETTY_PRINT_PARAMETER;

        // @formatter:off
        final byte[] json = JsonWriter.string()
            .object()
                .object("context")
                    .object("client")
                        .value("clientName", "WEB_REMIX")
                        .value("clientVersion", HARDCODED_YOUTUBE_MUSIC_CLIENT_VERSION)
                        .value("hl", "en-GB")
                        .value("gl", "GB")
                        .value("platform", "DESKTOP")
                        .value("utcOffsetMinutes", 0)
                    .end()
                    .object("request")
                        .array("internalExperimentFlags")
                        .end()
                        .value("useSsl", true)
                    .end()
                    .object("user")
                        // TODO: provide a way to enable restricted mode with:
                        //  .value("enableSafetyMode", boolean)
                        .value("lockedSafetyMode", false)
                    .end()
                .end()
                .value("input", "")
            .end().done().getBytes(StandardCharsets.UTF_8);
        // @formatter:on

        final var headers = new HashMap<>(getOriginReferrerHeaders(YOUTUBE_MUSIC_URL));
        headers.putAll(getClientHeaders(YOUTUBE_MUSIC_CLIENT_ID,
                HARDCODED_YOUTUBE_MUSIC_CLIENT_VERSION));

        final Response response = getDownloader().postWithContentTypeJson(url, headers, json);
        // Ensure to have a valid response
        return response.responseBody().length() > 500 && response.responseCode() == 200;
    }

    public static String getYoutubeMusicClientVersion()
            throws IOException, ReCaptchaException, Parser.RegexException {
        if (!isNullOrEmpty(youtubeMusicClientVersion)) {
            return youtubeMusicClientVersion;
        }
        if (isHardcodedYoutubeMusicClientVersionValid()) {
            youtubeMusicClientVersion = HARDCODED_YOUTUBE_MUSIC_CLIENT_VERSION;
            return youtubeMusicClientVersion;
        }

        try {
            final String url = "https://music.youtube.com/sw.js";
            final var headers = getOriginReferrerHeaders(YOUTUBE_MUSIC_URL);
            final String response = getDownloader().get(url, headers).responseBody();

            youtubeMusicClientVersion = getStringResultFromRegexArray(response,
                    INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
        } catch (final Exception e) {
            final String url = "https://music.youtube.com/?ucbcb=1";
            final String html = getDownloader().get(url, getCookieHeader()).responseBody();

            youtubeMusicClientVersion = getStringResultFromRegexArray(html,
                    INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
        }

        return youtubeMusicClientVersion;
    }

    @Nullable
    public static String getUrlFromNavigationEndpoint(
            @Nonnull final JsonObject navigationEndpoint) {
        if (navigationEndpoint.has("urlEndpoint")) {
            String internUrl = navigationEndpoint.getObject("urlEndpoint")
                    .getString("url");
            if (internUrl.startsWith("https://www.youtube.com/redirect?")) {
                // remove https://www.youtube.com part to fall in the next if block
                internUrl = internUrl.substring(23);
            }

            if (internUrl.startsWith("/redirect?")) {
                // q parameter can be the first parameter
                internUrl = internUrl.substring(10);
                final String[] params = internUrl.split("&");
                for (final String param : params) {
                    if (param.split("=")[0].equals("q")) {
                        try {
                            return Utils.decodeUrlUtf8(param.split("=")[1]);
                        } catch (final UnsupportedEncodingException e) {
                            return null;
                        }
                    }
                }
            } else if (internUrl.startsWith("http")) {
                return internUrl;
            } else if (internUrl.startsWith("/channel") || internUrl.startsWith("/user")
                    || internUrl.startsWith("/watch")) {
                return "https://www.youtube.com" + internUrl;
            }
        }

        if (navigationEndpoint.has("browseEndpoint")) {
            final JsonObject browseEndpoint = navigationEndpoint.getObject("browseEndpoint");
            final String canonicalBaseUrl = browseEndpoint.getString("canonicalBaseUrl");
            final String browseId = browseEndpoint.getString("browseId");

            // All channel ids are prefixed with UC
            if (browseId != null && browseId.startsWith("UC")) {
                return "https://www.youtube.com/channel/" + browseId;
            }

            if (!isNullOrEmpty(canonicalBaseUrl)) {
                return "https://www.youtube.com" + canonicalBaseUrl;
            }
        }

        if (navigationEndpoint.has("watchEndpoint")) {
            final StringBuilder url = new StringBuilder();
            url.append("https://www.youtube.com/watch?v=")
                    .append(navigationEndpoint.getObject("watchEndpoint")
                            .getString(VIDEO_ID));
            if (navigationEndpoint.getObject("watchEndpoint").has("playlistId")) {
                url.append("&list=").append(navigationEndpoint.getObject("watchEndpoint")
                        .getString("playlistId"));
            }
            if (navigationEndpoint.getObject("watchEndpoint").has("startTimeSeconds")) {
                url.append("&t=")
                        .append(navigationEndpoint.getObject("watchEndpoint")
                        .getInt("startTimeSeconds"));
            }
            return url.toString();
        }

        if (navigationEndpoint.has("watchPlaylistEndpoint")) {
            return "https://www.youtube.com/playlist?list="
                    + navigationEndpoint.getObject("watchPlaylistEndpoint")
                    .getString("playlistId");
        }

        if (navigationEndpoint.has("commandMetadata")) {
            final JsonObject metadata = navigationEndpoint.getObject("commandMetadata")
                    .getObject("webCommandMetadata");
            if (metadata.has("url")) {
                return "https://www.youtube.com" + metadata.getString("url");
            }
        }

        return null;
    }

    /**
     * Get the text from a JSON object that has either a {@code simpleText} or a {@code runs}
     * array.
     *
     * @param textObject JSON object to get the text from
     * @param html       whether to return HTML, by parsing the {@code navigationEndpoint}
     * @return text in the JSON object or {@code null}
     */
    @Nullable
    public static String getTextFromObject(final JsonObject textObject, final boolean html) {
        if (isNullOrEmpty(textObject)) {
            return null;
        }

        if (textObject.has("simpleText")) {
            return textObject.getString("simpleText");
        }

        if (textObject.getArray("runs").isEmpty()) {
            return null;
        }

        final StringBuilder textBuilder = new StringBuilder();
        for (final Object o : textObject.getArray("runs")) {
            final JsonObject run = (JsonObject) o;
            String text = run.getString("text");

            if (html) {
                if (run.has("navigationEndpoint")) {
                    final String url = getUrlFromNavigationEndpoint(
                            run.getObject("navigationEndpoint"));
                    if (!isNullOrEmpty(url)) {
                        text = "<a href=\"" + Entities.escape(url) + "\">" + Entities.escape(text)
                                + "</a>";
                    }
                }

                final boolean bold = run.has("bold")
                        && run.getBoolean("bold");
                final boolean italic = run.has("italics")
                        && run.getBoolean("italics");
                final boolean strikethrough = run.has("strikethrough")
                        && run.getBoolean("strikethrough");

                if (bold) {
                    textBuilder.append("<b>");
                }
                if (italic) {
                    textBuilder.append("<i>");
                }
                if (strikethrough) {
                    textBuilder.append("<s>");
                }

                textBuilder.append(text);

                if (strikethrough) {
                    textBuilder.append("</s>");
                }
                if (italic) {
                    textBuilder.append("</i>");
                }
                if (bold) {
                    textBuilder.append("</b>");
                }
            } else {
                textBuilder.append(text);
            }
        }

        String text = textBuilder.toString();

        if (html) {
            text = text.replaceAll("\\n", "<br>");
            text = text.replaceAll(" {2}", " &nbsp;");
        }

        return text;
    }

    @Nonnull
    public static String getTextFromObjectOrThrow(final JsonObject textObject, final String error)
            throws ParsingException {
        final String result = getTextFromObject(textObject);
        if (result == null) {
            throw new ParsingException("Could not extract text: " + error);
        }
        return result;
    }

    @Nullable
    public static String getTextFromObject(final JsonObject textObject) {
        return getTextFromObject(textObject, false);
    }

    @Nullable
    public static String getUrlFromObject(final JsonObject textObject) {
        if (isNullOrEmpty(textObject)) {
            return null;
        }

        if (textObject.getArray("runs").isEmpty()) {
            return null;
        }

        for (final Object textPart : textObject.getArray("runs")) {
            final String url = getUrlFromNavigationEndpoint(((JsonObject) textPart)
                    .getObject("navigationEndpoint"));
            if (!isNullOrEmpty(url)) {
                return url;
            }
        }

        return null;
    }

    @Nullable
    public static String getTextAtKey(@Nonnull final JsonObject jsonObject, final String theKey) {
        if (jsonObject.isString(theKey)) {
            return jsonObject.getString(theKey);
        } else {
            return getTextFromObject(jsonObject.getObject(theKey));
        }
    }

    public static String fixThumbnailUrl(@Nonnull final String thumbnailUrl) {
        String result = thumbnailUrl;
        if (result.startsWith("//")) {
            result = result.substring(2);
        }

        if (result.startsWith(HTTP)) {
            result = Utils.replaceHttpWithHttps(result);
        } else if (!result.startsWith(HTTPS)) {
            result = "https://" + result;
        }

        return result;
    }

    /**
     * Get thumbnails from a {@link JsonObject} representing a YouTube
     * {@link org.schabi.newpipe.extractor.InfoItem InfoItem}.
     *
     * <p>
     * Thumbnails are got from the {@code thumbnails} {@link JsonArray} inside the {@code thumbnail}
     * {@link JsonObject} of the YouTube {@link org.schabi.newpipe.extractor.InfoItem InfoItem},
     * using {@link #getImagesFromThumbnailsArray(JsonArray)}.
     * </p>
     *
     * @param infoItem a YouTube {@link org.schabi.newpipe.extractor.InfoItem InfoItem}
     * @return an unmodifiable list of {@link Image}s found in the {@code thumbnails}
     * {@link JsonArray}
     * @throws ParsingException if an exception occurs when
     *                          {@link #getImagesFromThumbnailsArray(JsonArray)} is executed
     */
    @Nonnull
    public static List<Image> getThumbnailsFromInfoItem(@Nonnull final JsonObject infoItem)
            throws ParsingException {
        try {
            return getImagesFromThumbnailsArray(infoItem.getObject("thumbnail")
                    .getArray("thumbnails"));
        } catch (final Exception e) {
            throw new ParsingException("Could not get thumbnails from InfoItem", e);
        }
    }

    /**
     * Get images from a YouTube {@code thumbnails} {@link JsonArray}.
     *
     * <p>
     * The properties of the {@link Image}s created will be set using the corresponding ones of
     * thumbnail items.
     * </p>
     *
     * @param thumbnails a YouTube {@code thumbnails} {@link JsonArray}
     * @return an unmodifiable list of {@link Image}s extracted from the given {@link JsonArray}
     */
    @Nonnull
    public static List<Image> getImagesFromThumbnailsArray(
            @Nonnull final JsonArray thumbnails) {
        return thumbnails.stream()
                .filter(JsonObject.class::isInstance)
                .map(JsonObject.class::cast)
                .filter(thumbnail -> !isNullOrEmpty(thumbnail.getString("url")))
                .map(thumbnail -> {
                    final int height = thumbnail.getInt("height", Image.HEIGHT_UNKNOWN);
                    return new Image(fixThumbnailUrl(thumbnail.getString("url")),
                            height,
                            thumbnail.getInt("width", Image.WIDTH_UNKNOWN),
                            ResolutionLevel.fromHeight(height));
                })
                .collect(Collectors.toUnmodifiableList());
    }

    @Nonnull
    public static String getValidJsonResponseBody(@Nonnull final Response response)
            throws ParsingException, MalformedURLException {
        if (response.responseCode() == 404) {
            throw new ContentNotAvailableException("Not found"
                    + " (\"" + response.responseCode() + " " + response.responseMessage() + "\")");
        }

        final String responseBody = response.responseBody();
        if (responseBody.length() < 50) { // Ensure to have a valid response
            throw new ParsingException("JSON response is too short");
        }

        // Check if the request was redirected to the error page.
        final URL latestUrl = new URL(response.latestUrl());
        if (latestUrl.getHost().equalsIgnoreCase("www.youtube.com")) {
            final String path = latestUrl.getPath();
            if (path.equalsIgnoreCase("/oops") || path.equalsIgnoreCase("/error")) {
                throw new ContentNotAvailableException("Content unavailable");
            }
        }

        final String responseContentType = response.getHeader("Content-Type");
        if (responseContentType != null
                && responseContentType.toLowerCase().contains("text/html")) {
            throw new ParsingException("Got HTML document, expected JSON response"
                    + " (latest url was: \"" + response.latestUrl() + "\")");
        }

        return responseBody;
    }

    public static JsonObject getJsonPostResponse(final String endpoint,
                                                 final byte[] body,
                                                 final Localization localization)
            throws IOException, ExtractionException {
        final var headers = getYouTubeHeaders();

        return JsonUtils.toJsonObject(getValidJsonResponseBody(
                getDownloader().postWithContentTypeJson(YOUTUBEI_V1_URL + endpoint + "?"
                        + DISABLE_PRETTY_PRINT_PARAMETER, headers, body, localization)));
    }

    public static JsonObject getJsonAndroidPostResponse(
            final String endpoint,
            final byte[] body,
            @Nonnull final Localization localization,
            @Nullable final String endPartOfUrlRequest) throws IOException, ExtractionException {
        return getMobilePostResponse(endpoint, body, localization,
                getAndroidUserAgent(localization), endPartOfUrlRequest);
    }

    public static JsonObject getJsonIosPostResponse(
            final String endpoint,
            final byte[] body,
            @Nonnull final Localization localization,
            @Nullable final String endPartOfUrlRequest) throws IOException, ExtractionException {
        return getMobilePostResponse(endpoint, body, localization, getIosUserAgent(localization),
                endPartOfUrlRequest);
    }

    private static JsonObject getMobilePostResponse(
            final String endpoint,
            final byte[] body,
            @Nonnull final Localization localization,
            @Nonnull final String userAgent,
            @Nullable final String endPartOfUrlRequest) throws IOException, ExtractionException {
        final var headers = Map.of("User-Agent", List.of(userAgent),
                "X-Goog-Api-Format-Version", List.of("2"));

        final String baseEndpointUrl = YOUTUBEI_V1_GAPIS_URL + endpoint + "?"
                + DISABLE_PRETTY_PRINT_PARAMETER;

        return JsonUtils.toJsonObject(getValidJsonResponseBody(
                getDownloader().postWithContentTypeJson(isNullOrEmpty(endPartOfUrlRequest)
                                ? baseEndpointUrl
                                : baseEndpointUrl + endPartOfUrlRequest,
                        headers, body, localization)));
    }

    @Nonnull
    public static JsonBuilder<JsonObject> prepareDesktopJsonBuilder(
            @Nonnull final Localization localization,
            @Nonnull final ContentCountry contentCountry)
            throws IOException, ExtractionException {
        return prepareDesktopJsonBuilder(localization, contentCountry, null);
    }

    @Nonnull
    public static JsonBuilder<JsonObject> prepareDesktopJsonBuilder(
            @Nonnull final Localization localization,
            @Nonnull final ContentCountry contentCountry,
            @Nullable final String visitorData)
            throws IOException, ExtractionException {
        // @formatter:off
        final JsonBuilder<JsonObject> builder = JsonObject.builder()
                .object("context")
                    .object("client")
                        .value("hl", localization.getLocalizationCode())
                        .value("gl", contentCountry.getCountryCode())
                        .value("clientName", "WEB")
                        .value("clientVersion", getClientVersion())
                        .value("originalUrl", "https://www.youtube.com")
                        .value("platform", "DESKTOP")
                        .value("utcOffsetMinutes", 0);

        if (visitorData != null) {
            builder.value("visitorData", visitorData);
        }

        return builder.end()
                    .object("request")
                        .array("internalExperimentFlags")
                        .end()
                        .value("useSsl", true)
                    .end()
                    .object("user")
                        // TODO: provide a way to enable restricted mode with:
                        //  .value("enableSafetyMode", boolean)
                        .value("lockedSafetyMode", false)
                    .end()
                .end();
        // @formatter:on
    }

    @Nonnull
    public static JsonBuilder<JsonObject> prepareAndroidMobileJsonBuilder(
            @Nonnull final Localization localization,
            @Nonnull final ContentCountry contentCountry) {
        // @formatter:off
        return JsonObject.builder()
                .object("context")
                    .object("client")
                        .value("clientName", "ANDROID")
                        .value("clientVersion", ANDROID_YOUTUBE_CLIENT_VERSION)
                        .value("platform", "MOBILE")
                        .value("osName", "Android")
                        .value("osVersion", "14")
                        /*
                        A valid Android SDK version is required to be sure to get a valid player
                        response
                        If this parameter is not provided, the player response is replaced by an
                        error saying the message "The following content is not available on this
                        app. Watch this content on the latest version on YouTube" (it was
                        previously a 5-minute video with this message)
                        See https://github.com/TeamNewPipe/NewPipe/issues/8713
                        The Android SDK version corresponding to the Android version used in
                        requests is sent
                        */
                        .value("androidSdkVersion", 34)
                        .value("hl", localization.getLocalizationCode())
                        .value("gl", contentCountry.getCountryCode())
                        .value("utcOffsetMinutes", 0)
                    .end()
                    .object("request")
                        .array("internalExperimentFlags")
                        .end()
                        .value("useSsl", true)
                    .end()
                    .object("user")
                        // TODO: provide a way to enable restricted mode with:
                        //  .value("enableSafetyMode", boolean)
                        .value("lockedSafetyMode", false)
                    .end()
                .end();
        // @formatter:on
    }

    @Nonnull
    public static JsonBuilder<JsonObject> prepareIosMobileJsonBuilder(
            @Nonnull final Localization localization,
            @Nonnull final ContentCountry contentCountry) {
        // @formatter:off
        return JsonObject.builder()
                .object("context")
                    .object("client")
                        .value("clientName", "IOS")
                        .value("clientVersion", IOS_YOUTUBE_CLIENT_VERSION)
                        .value("deviceMake",  "Apple")
                        // Device model is required to get 60fps streams
                        .value("deviceModel", IOS_DEVICE_MODEL)
                        .value("platform", "MOBILE")
                        .value("osName", "iOS")
                        .value("osVersion", IOS_OS_VERSION)
                        .value("hl", localization.getLocalizationCode())
                        .value("gl", contentCountry.getCountryCode())
                        .value("utcOffsetMinutes", 0)
                    .end()
                    .object("request")
                        .array("internalExperimentFlags")
                        .end()
                        .value("useSsl", true)
                    .end()
                    .object("user")
                        // TODO: provide a way to enable restricted mode with:
                        //  .value("enableSafetyMode", boolean)
                        .value("lockedSafetyMode", false)
                    .end()
                .end();
        // @formatter:on
    }

    @Nonnull
    public static JsonBuilder<JsonObject> prepareTvHtml5EmbedJsonBuilder(
            @Nonnull final Localization localization,
            @Nonnull final ContentCountry contentCountry,
            @Nonnull final String videoId) {
        // @formatter:off
        return JsonObject.builder()
                .object("context")
                    .object("client")
                        .value("clientName", "TVHTML5_SIMPLY_EMBEDDED_PLAYER")
                        .value("clientVersion", TVHTML5_SIMPLY_EMBED_CLIENT_VERSION)
                        .value("clientScreen", "EMBED")
                        .value("platform", "TV")
                        .value("hl", localization.getLocalizationCode())
                        .value("gl", contentCountry.getCountryCode())
                        .value("utcOffsetMinutes", 0)
                    .end()
                    .object("thirdParty")
                        .value("embedUrl", "https://www.youtube.com/watch?v=" + videoId)
                    .end()
                    .object("request")
                        .array("internalExperimentFlags")
                        .end()
                        .value("useSsl", true)
                    .end()
                    .object("user")
                        // TODO: provide a way to enable restricted mode with:
                        //  .value("enableSafetyMode", boolean)
                        .value("lockedSafetyMode", false)
                    .end()
                .end();
        // @formatter:on
    }

    @Nonnull
    public static byte[] createDesktopPlayerBody(
            @Nonnull final Localization localization,
            @Nonnull final ContentCountry contentCountry,
            @Nonnull final String videoId,
            @Nonnull final Integer sts,
            final boolean isTvHtml5DesktopJsonBuilder,
            @Nonnull final String contentPlaybackNonce) throws IOException, ExtractionException {
        // @formatter:off
        return JsonWriter.string((isTvHtml5DesktopJsonBuilder
                        ? prepareTvHtml5EmbedJsonBuilder(localization, contentCountry, videoId)
                        : prepareDesktopJsonBuilder(localization, contentCountry))
                .object("playbackContext")
                    .object("contentPlaybackContext")
                        // Signature timestamp from the JavaScript base player is needed to get
                        // working obfuscated URLs
                        .value("signatureTimestamp", sts)
                        .value("referer", "https://www.youtube.com/watch?v=" + videoId)
                    .end()
                .end()
                .value(CPN, contentPlaybackNonce)
                .value(VIDEO_ID, videoId)
                .value(CONTENT_CHECK_OK, true)
                .value(RACY_CHECK_OK, true)
                .done())
                .getBytes(StandardCharsets.UTF_8);
        // @formatter:on
    }

    /**
     * Get the user-agent string used as the user-agent for InnerTube requests with the Android
     * client.
     *
     * <p>
     * If the {@link Localization} provided is {@code null}, fallbacks to
     * {@link Localization#DEFAULT the default one}.
     * </p>
     *
     * @param localization the {@link Localization} to set in the user-agent
     * @return the Android user-agent used for InnerTube requests with the Android client,
     * depending on the {@link Localization} provided
     */
    @Nonnull
    public static String getAndroidUserAgent(@Nullable final Localization localization) {
        // Spoofing an Android 14 device with the hardcoded version of the Android app
        return "com.google.android.youtube/" + ANDROID_YOUTUBE_CLIENT_VERSION
                + " (Linux; U; Android 14; "
                + (localization != null ? localization : Localization.DEFAULT).getCountryCode()
                + ") gzip";
    }

    /**
     * Get the user-agent string used as the user-agent for InnerTube requests with the iOS
     * client.
     *
     * <p>
     * If the {@link Localization} provided is {@code null}, fallbacks to
     * {@link Localization#DEFAULT the default one}.
     * </p>
     *
     * @param localization the {@link Localization} to set in the user-agent
     * @return the iOS user-agent used for InnerTube requests with the iOS client, depending on the
     * {@link Localization} provided
     */
    @Nonnull
    public static String getIosUserAgent(@Nullable final Localization localization) {
        // Spoofing an iPhone 15 running iOS 17.4.1 with the hardcoded version of the iOS app
        return "com.google.ios.youtube/" + IOS_YOUTUBE_CLIENT_VERSION
                + "(" + IOS_DEVICE_MODEL + "; U; CPU iOS "
                + IOS_USER_AGENT_VERSION + " like Mac OS X; "
                + (localization != null ? localization : Localization.DEFAULT).getCountryCode()
                + ")";
    }

    /**
     * Returns a {@link Map} containing the required YouTube Music headers.
     */
    @Nonnull
    public static Map<String, List<String>> getYoutubeMusicHeaders() {
        final var headers = new HashMap<>(getOriginReferrerHeaders(YOUTUBE_MUSIC_URL));
        headers.putAll(getClientHeaders(YOUTUBE_MUSIC_CLIENT_ID,
                youtubeMusicClientVersion));
        return headers;
    }

    /**
     * Returns a {@link Map} containing the required YouTube headers, including the
     * <code>CONSENT</code> cookie to prevent redirects to <code>consent.youtube.com</code>
     */
    public static Map<String, List<String>> getYouTubeHeaders()
            throws ExtractionException, IOException {
        final var headers = getClientInfoHeaders();
        headers.put("Cookie", List.of(generateConsentCookie()));
        return headers;
    }

    /**
     * Returns a {@link Map} containing the {@code X-YouTube-Client-Name},
     * {@code X-YouTube-Client-Version}, {@code Origin}, and {@code Referer} headers.
     */
    public static Map<String, List<String>> getClientInfoHeaders()
            throws ExtractionException, IOException {
        final var headers = new HashMap<>(getOriginReferrerHeaders("https://www.youtube.com"));
        headers.putAll(getClientHeaders(WEB_CLIENT_ID, getClientVersion()));
        return headers;
    }

    /**
     * Returns an unmodifiable {@link Map} containing the {@code Origin} and {@code Referer}
     * headers set to the given URL.
     *
     * @param url The URL to be set as the origin and referrer.
     */
    private static Map<String, List<String>> getOriginReferrerHeaders(@Nonnull final String url) {
        final var urlList = List.of(url);
        return Map.of("Origin", urlList, "Referer", urlList);
    }

    /**
     * Returns an unmodifiable {@link Map} containing the {@code X-YouTube-Client-Name} and
     * {@code X-YouTube-Client-Version} headers.
     *
     * @param name The X-YouTube-Client-Name value.
     * @param version X-YouTube-Client-Version value.
     */
    private static Map<String, List<String>> getClientHeaders(@Nonnull final String name,
                                                              @Nonnull final String version) {
        return Map.of("X-YouTube-Client-Name", List.of(name),
                "X-YouTube-Client-Version", List.of(version));
    }

    /**
     * Create a map with the required cookie header.
     * @return A singleton map containing the header.
     */
    public static Map<String, List<String>> getCookieHeader() {
        return Map.of("Cookie", List.of(generateConsentCookie()));
    }

    @Nonnull
    public static String generateConsentCookie() {
        return "SOCS=" + (isConsentAccepted()
                // CAISAiAD means that the user configured manually cookies YouTube, regardless of
                // the consent values
                // This value surprisingly allows to extract mixes and some YouTube Music playlists
                // in the same way when a user allows all cookies
                ? "CAISAiAD"
                // CAE= means that the user rejected all non-necessary cookies with the "Reject
                // all" button on the consent page
                : "CAE=");
    }

    public static String extractCookieValue(final String cookieName,
                                            @Nonnull final Response response) {
        final List<String> cookies = response.responseHeaders().get("set-cookie");
        if (cookies == null) {
            return "";
        }

        String result = "";
        for (final String cookie : cookies) {
            final int startIndex = cookie.indexOf(cookieName);
            if (startIndex != -1) {
                result = cookie.substring(startIndex + cookieName.length() + "=".length(),
                        cookie.indexOf(";", startIndex));
            }
        }
        return result;
    }

    /**
     * Shared alert detection function, multiple endpoints return the error similarly structured.
     * <p>
     * Will check if the object has an alert of the type "ERROR".
     * </p>
     *
     * @param initialData the object which will be checked if an alert is present
     * @throws ContentNotAvailableException if an alert is detected
     */
    public static void defaultAlertsCheck(@Nonnull final JsonObject initialData)
            throws ParsingException {
        final JsonArray alerts = initialData.getArray("alerts");
        if (!isNullOrEmpty(alerts)) {
            final JsonObject alertRenderer = alerts.getObject(0).getObject("alertRenderer");
            final String alertText = getTextFromObject(alertRenderer.getObject("text"));
            final String alertType = alertRenderer.getString("type", "");
            if (alertType.equalsIgnoreCase("ERROR")) {
                if (alertText != null
                        && (alertText.contains("This account has been terminated")
                        || alertText.contains("This channel was removed"))) {
                    if (alertText.matches(".*violat(ed|ion|ing).*")
                            || alertText.contains("infringement")) {
                        // Possible error messages:
                        // "This account has been terminated for a violation of YouTube's Terms of
                        //     Service."
                        // "This account has been terminated due to multiple or severe violations of
                        //     YouTube's policy prohibiting hate speech."
                        // "This account has been terminated due to multiple or severe violations of
                        //     YouTube's policy prohibiting content designed to harass, bully or
                        //     threaten."
                        // "This account has been terminated due to multiple or severe violations
                        //     of YouTube's policy against spam, deceptive practices and misleading
                        //     content or other Terms of Service violations."
                        // "This account has been terminated due to multiple or severe violations of
                        //     YouTube's policy on nudity or sexual content."
                        // "This account has been terminated for violating YouTube's Community
                        //     Guidelines."
                        // "This account has been terminated because we received multiple
                        //     third-party claims of copyright infringement regarding material that
                        //     the user posted."
                        // "This account has been terminated because it is linked to an account that
                        //     received multiple third-party claims of copyright infringement."
                        // "This channel was removed because it violated our Community Guidelines."
                        throw new AccountTerminatedException(alertText,
                                AccountTerminatedException.Reason.VIOLATION);
                    } else {
                        throw new AccountTerminatedException(alertText);
                    }
                }
                throw new ContentNotAvailableException("Got error: \"" + alertText + "\"");
            }
        }
    }

    /**
     * Sometimes, YouTube provides URLs which use Google's cache. They look like
     * {@code https://webcache.googleusercontent.com/search?q=cache:CACHED_URL}
     *
     * @param url the URL which might refer to the Google's webcache
     * @return the URL which is referring to the original site
     */
    public static String extractCachedUrlIfNeeded(final String url) {
        if (url == null) {
            return null;
        }
        if (url.contains("webcache.googleusercontent.com")) {
            return url.split("cache:")[1];
        }
        return url;
    }

    public static boolean isVerified(final JsonArray badges) {
        if (Utils.isNullOrEmpty(badges)) {
            return false;
        }

        for (final Object badge : badges) {
            final String style = ((JsonObject) badge).getObject("metadataBadgeRenderer")
                    .getString("style");
            if (style != null && (style.equals("BADGE_STYLE_TYPE_VERIFIED")
                    || style.equals("BADGE_STYLE_TYPE_VERIFIED_ARTIST"))) {
                return true;
            }
        }

        return false;
    }

    /**
     * Generate a content playback nonce (also called {@code cpn}), sent by YouTube clients in
     * playback requests (and also for some clients, in the player request body).
     *
     * @return a content playback nonce string
     */
    @Nonnull
    public static String generateContentPlaybackNonce() {
        return RandomStringFromAlphabetGenerator.generate(
                CONTENT_PLAYBACK_NONCE_ALPHABET, 16, numberGenerator);
    }

    /**
     * Try to generate a {@code t} parameter, sent by mobile clients as a query of the player
     * request.
     *
     * <p>
     * Some researches needs to be done to know how this parameter, unique at each request, is
     * generated.
     * </p>
     *
     * @return a 12 characters string to try to reproduce the {@code} parameter
     */
    @Nonnull
    public static String generateTParameter() {
        return RandomStringFromAlphabetGenerator.generate(
                CONTENT_PLAYBACK_NONCE_ALPHABET, 12, numberGenerator);
    }

    /**
     * Check if the streaming URL is from the YouTube {@code WEB} client.
     *
     * @param url the streaming URL to be checked.
     * @return true if it's a {@code WEB} streaming URL, false otherwise
     */
    public static boolean isWebStreamingUrl(@Nonnull final String url) {
        return Parser.isMatch(C_WEB_PATTERN, url);
    }

    /**
     * Check if the streaming URL is a URL from the YouTube {@code TVHTML5_SIMPLY_EMBEDDED_PLAYER}
     * client.
     *
     * @param url the streaming URL on which check if it's a {@code TVHTML5_SIMPLY_EMBEDDED_PLAYER}
     *            streaming URL.
     * @return true if it's a {@code TVHTML5_SIMPLY_EMBEDDED_PLAYER} streaming URL, false otherwise
     */
    public static boolean isTvHtml5SimplyEmbeddedPlayerStreamingUrl(@Nonnull final String url) {
        return Parser.isMatch(C_TVHTML5_SIMPLY_EMBEDDED_PLAYER_PATTERN, url);
    }

    /**
     * Check if the streaming URL is a URL from the YouTube {@code ANDROID} client.
     *
     * @param url the streaming URL to be checked.
     * @return true if it's a {@code ANDROID} streaming URL, false otherwise
     */
    public static boolean isAndroidStreamingUrl(@Nonnull final String url) {
        return Parser.isMatch(C_ANDROID_PATTERN, url);
    }

    /**
     * Check if the streaming URL is a URL from the YouTube {@code IOS} client.
     *
     * @param url the streaming URL on which check if it's a {@code IOS} streaming URL.
     * @return true if it's a {@code IOS} streaming URL, false otherwise
     */
    public static boolean isIosStreamingUrl(@Nonnull final String url) {
        return Parser.isMatch(C_IOS_PATTERN, url);
    }

    /**
     * Determines how the consent cookie that is required for YouTube, {@code SOCS}, will be
     * generated.
     *
     * <ul>
     *   <li>{@code false} (the default value) will use {@code CAE=};</li>
     *   <li>{@code true} will use {@code CAISAiAD}.</li>
     * </ul>
     *
     * <p>
     * Setting this value to {@code true} is needed to extract mixes and some YouTube Music
     * playlists in some countries such as the EU ones.
     * </p>
     */
    public static void setConsentAccepted(final boolean accepted) {
        consentAccepted = accepted;
    }

    /**
     * Get the value of the consent's acceptance.
     *
     * @see #setConsentAccepted(boolean)
     * @return the consent's acceptance value
     */
    public static boolean isConsentAccepted() {
        return consentAccepted;
    }

    /**
     * Extract the audio track type from a YouTube stream URL.
     * <p>
     * The track type is parsed from the {@code xtags} URL parameter
     * (Example: {@code acont=original:lang=en}).
     * </p>
     * @param streamUrl YouTube stream URL
     * @return {@link AudioTrackType} or {@code null} if no track type was found
     */
    @Nullable
    public static AudioTrackType extractAudioTrackType(final String streamUrl) {
        final String xtags;
        try {
            xtags = Utils.getQueryValue(new URL(streamUrl), "xtags");
        } catch (final MalformedURLException e) {
            return null;
        }
        if (xtags == null) {
            return null;
        }

        String atype = null;
        for (final String param : xtags.split(":")) {
            final String[] kv = param.split("=", 2);
            if (kv.length > 1 && kv[0].equals("acont")) {
                atype = kv[1];
                break;
            }
        }
        if (atype == null) {
            return null;
        }

        switch (atype) {
            case "original":
                return AudioTrackType.ORIGINAL;
            case "dubbed":
                return AudioTrackType.DUBBED;
            case "descriptive":
                return AudioTrackType.DESCRIPTIVE;
            default:
                return null;
        }
    }
}
-												Improve tests and randomness

- Use the existing RNG inside YoutubeParsingHelper
- Deduplicated test-setup for YouTube tests
- Minor improvements

											
										
										
											2022-02-07 21:23:38 +01:00
+								/*
 								 * Created by Christian Schabesberger on 02.03.16.
 								 *
-												Update copyright notices

Update copyright notices to comply to GPLv3 and change NewPipe to NewPipe Extractor on some notices that were not updated.

											
										
										
											2023-09-23 00:10:15 +02:00
+								 * Copyright (C) 2016 Christian Schabesberger <chris.schabesberger@mailbox.org>
-												Improve tests and randomness

- Use the existing RNG inside YoutubeParsingHelper
- Deduplicated test-setup for YouTube tests
- Minor improvements

											
										
										
											2022-02-07 21:23:38 +01:00
+								 * YoutubeParsingHelper.java is part of NewPipe Extractor.
 								 *
 								 * NewPipe Extractor is free software: you can redistribute it and/or modify
 								 * it under the terms of the GNU General Public License as published by
 								 * the Free Software Foundation, either version 3 of the License, or
 								 * (at your option) any later version.
 								 *
 								 * NewPipe Extractor is distributed in the hope that it will be useful,
 								 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 								 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 								 * GNU General Public License for more details.
 								 *
 								 * You should have received a copy of the GNU General Public License
 								 * along with NewPipe Extractor. If not, see <https://www.gnu.org/licenses/>.
 								 */
-												refactor: create extractors and linkHandler packages for SoundCloud, move YoutubeParsingHelper to youtube package (not linkhandler anymore)

											
										
										
											2020-04-10 10:51:05 +02:00
+								package org.schabi.newpipe.extractor.services.youtube;
-												initial commit

											
										
										
											2017-03-01 18:47:52 +01:00
-												[YouTube] Fix checkstyle issues

											
										
										
											2022-03-18 15:09:06 +01:00
+								import static org.schabi.newpipe.extractor.NewPipe.getDownloader;
 								import static org.schabi.newpipe.extractor.utils.Utils.HTTP;
 								import static org.schabi.newpipe.extractor.utils.Utils.HTTPS;
-												[YouTube] Try to use lighter requests when extracting client version and key from YouTube and YouTube Music

This is done by fetching https://www.youtube.com/sw.js for YouTube and https://music.youtube.com/sw.js for YouTube Music.

Two new methods in Utils class have been added which allow to try to get a match of regular expressions in a string array, or a Pattern array, on a content, on a specific index or 0.
Also some code refactoring has been made in this class.

											
										
										
											2022-01-09 22:49:37 +01:00
+								import static org.schabi.newpipe.extractor.utils.Utils.getStringResultFromRegexArray;
-												[YouTube] Fix checkstyle issues

											
										
										
											2022-03-18 15:09:06 +01:00
+								import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
-												Use Collections.singletonMap().

											
										
										
											2022-07-27 03:26:02 +02:00
-												Use new youtube continuations api for playlists

Requires sending a POST request instead of GET.
clientName and clientVersion, which were required as headers previously now need to be part of the request payload.
continuation id also needs to be part of request body.

quick and dirty solution.

											
										
										
											2021-03-03 19:49:26 +01:00
+								import com.grack.nanojson.JsonArray;
-												Use Page.body for YoutubePlaylistExtractor

											
										
										
											2021-04-02 21:34:47 +02:00
+								import com.grack.nanojson.JsonBuilder;
-												Use new youtube continuations api for playlists

Requires sending a POST request instead of GET.
clientName and clientVersion, which were required as headers previously now need to be part of the request payload.
continuation id also needs to be part of request body.

quick and dirty solution.

											
										
										
											2021-03-03 19:49:26 +01:00
+								import com.grack.nanojson.JsonObject;
 								import com.grack.nanojson.JsonParser;
 								import com.grack.nanojson.JsonParserException;
 								import com.grack.nanojson.JsonWriter;
-												[YouTube] Add utility methods to get images from InfoItems and thumbnails arrays

Unmodifiable lists of Images are returned, parsed from a given YouTube
"thumbnails" JSON array.

These methods will be used in all YouTube extractors and InfoItems, as the
structures between content types (videos, channels, playlists, ...) are common.

											
										
										
											2022-07-22 17:28:39 +02:00
-												[YouTube] Implement emergency meta info

YouTube provides that meta info panel when users search for really sensitive content like suicide (e.g. "blue whale").

It contains:
- an encouragement as title (e.g. "We are with you")
- a phone number as action
- details about how to call the phone number (e.g. availability)
- an url pointing to the website of an association

Also add a test that just checks if a meta info is properly extracted

											
										
										
											2023-12-07 20:47:02 +01:00
+								import org.jsoup.nodes.Entities;
-												[YouTube] Add utility methods to get images from InfoItems and thumbnails arrays

Unmodifiable lists of Images are returned, parsed from a given YouTube
"thumbnails" JSON array.

These methods will be used in all YouTube extractors and InfoItems, as the
structures between content types (videos, channels, playlists, ...) are common.

											
										
										
											2022-07-22 17:28:39 +02:00
+								import org.schabi.newpipe.extractor.Image;
 								import org.schabi.newpipe.extractor.Image.ResolutionLevel;
-												Implement time ago parser and improve localization handling

- Handle special cases for languages where the number is not shown
- Rework the Downloader base implementation, allowing for more
advanced things to be done
- Separate the localization from the content country (just like
YouTube let's the user choose both).

											
										
										
											2019-04-28 22:03:16 +02:00
+								import org.schabi.newpipe.extractor.downloader.Response;
-												[YouTube] Fix checkstyle issues

											
										
										
											2022-03-18 15:09:06 +01:00
+								import org.schabi.newpipe.extractor.exceptions.AccountTerminatedException;
 								import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
 								import org.schabi.newpipe.extractor.exceptions.ExtractionException;
 								import org.schabi.newpipe.extractor.exceptions.ParsingException;
 								import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
-												Fix localization and update client version

											
										
										
											2021-04-30 19:06:56 +02:00
+								import org.schabi.newpipe.extractor.localization.ContentCountry;
-												Use Localization in getJsonResponse()

											
										
										
											2020-02-29 16:55:07 +01:00
+								import org.schabi.newpipe.extractor.localization.Localization;
-												[YouTube] Differentiate genre mixes from normal mixes

Note: genre mixes already worked, now they are just considered as such in various video id extraction and in related items
Note 2: now extracting a mix id from a *normal* youtube mix id will fail if the video id wouldn't be exactly 11 characters long

											
										
										
											2022-02-17 17:19:54 +01:00
+								import org.schabi.newpipe.extractor.playlist.PlaylistInfo;
-												Add track types to audio streams (#1041)


											
										
										
											2023-03-28 00:02:20 +02:00
+								import org.schabi.newpipe.extractor.stream.AudioTrackType;
-												Move toJsonArray and toJsonObject to JsonUtils

											
										
										
											2021-03-04 18:58:51 +01:00
+								import org.schabi.newpipe.extractor.utils.JsonUtils;
-												Move getInitialData() method to YouTubeParsingHelper

Rename ytInitialData to initialData
											
										
										
											2020-02-22 23:51:02 +01:00
+								import org.schabi.newpipe.extractor.utils.Parser;
-												Improve tests and randomness

- Use the existing RNG inside YoutubeParsingHelper
- Deduplicated test-setup for YouTube tests
- Minor improvements

											
										
										
											2022-02-07 21:23:38 +01:00
+								import org.schabi.newpipe.extractor.utils.RandomStringFromAlphabetGenerator;
-												Actually fix thumbnail URLs starting with //

											
										
										
											2020-02-28 09:36:33 +01:00
+								import org.schabi.newpipe.extractor.utils.Utils;
-												initial commit

											
										
										
											2017-03-01 18:47:52 +01:00
-												Create getJsonResponse() function

											
										
										
											2020-02-29 16:42:04 +01:00
+								import java.io.IOException;
-												Implement getTextFromObject() function

											
										
										
											2020-02-27 17:39:23 +01:00
+								import java.io.UnsupportedEncodingException;
-												Extract some code to getValidResponseBody()

											
										
										
											2020-04-01 16:01:21 +02:00
+								import java.net.MalformedURLException;
-												refactored YouTube-linkHandler to use less regex and more URL-methods

											
										
										
											2019-01-13 12:52:07 +01:00
+								import java.net.URL;
-												[YouTube] Add the cpn param to playback requests and try to spoof better the Android client

The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.

For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.

Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).

For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.

This commit also fixes a small bug with the Android User-Agent string.

Some code improvements have been also made.

											
										
										
											2021-12-22 17:55:41 +01:00
+								import java.nio.charset.StandardCharsets;
-												Fix YouTube parse error when only a date is present.

											
										
										
											2020-11-03 11:54:46 +01:00
+								import java.time.LocalDate;
-												Switch from Calendar to OffsetDateTime in DateWrapper.

											
										
										
											2020-10-18 05:48:14 +02:00
+								import java.time.OffsetDateTime;
-												Fix YouTube parse error when only a date is present.

											
										
										
											2020-11-03 11:54:46 +01:00
+								import java.time.ZoneOffset;
-												Switch from Calendar to OffsetDateTime in DateWrapper.

											
										
										
											2020-10-18 05:48:14 +02:00
+								import java.time.format.DateTimeParseException;
-												[YouTube] Fix checkstyle issues

											
										
										
											2022-03-18 15:09:06 +01:00
+								import java.util.HashMap;
 								import java.util.List;
-												Use immutable sets in YoutubeParsingHelper.

											
										
										
											2022-08-09 05:44:34 +02:00
+								import java.util.Locale;
-												[YouTube] Fix checkstyle issues

											
										
										
											2022-03-18 15:09:06 +01:00
+								import java.util.Map;
 								import java.util.Optional;
 								import java.util.Random;
-												Use immutable sets in YoutubeParsingHelper.

											
										
										
											2022-08-09 05:44:34 +02:00
+								import java.util.Set;
-												Apply changes in YoutubeStreamExtractor

Extract post live DVR streams as post live streams instead of live streams.

A new class has been in order to improve code: ItagInfo, which stores an itag, the content (URL) extracted and if its an URL or not.
A functional interface has been added in order to abstract the stream building: StreamBuilderHelper.
Also add the cver parameter added by the desktop web client on the corresponding streams (a new method has been added in YoutubeParsingHelper to check this and another for Android streams).

Some code in these classes has been also refactored/improved/optimized.

											
										
										
											2022-03-06 20:10:11 +01:00
+								import java.util.regex.Pattern;
-												[YouTube] Add utility methods to get images from InfoItems and thumbnails arrays

Unmodifiable lists of Images are returned, parsed from a given YouTube
"thumbnails" JSON array.

These methods will be used in all YouTube extractors and InfoItems, as the
structures between content types (videos, channels, playlists, ...) are common.

											
										
										
											2022-07-22 17:28:39 +02:00
+								import java.util.stream.Collectors;
-												[YouTube] Improve WEB client version and API key HTML extraction

Common code in WEB client version HTML extraction has been deduplicated, usage of the Java 8 Stream API has been made and initial data fallback has been used as a last resort.
This means that the client version extraction from regexes will be used before this fallback, as it doesn't contain the full client version.
This can be used as a way to fingerprint the extractor, even if it seems to be not the case.

											
										
										
											2022-08-08 19:36:19 +02:00
+								import java.util.stream.Stream;
-												Make resetClientVersionAndKey public

											
										
										
											2021-01-17 18:48:16 +01:00
 								import javax.annotation.Nonnull;
 								import javax.annotation.Nullable;
-												Improve getClientVersion()

											
										
										
											2020-02-26 15:22:59 +01:00
-												[YouTube] Fix checkstyle issues

											
										
										
											2022-03-18 15:09:06 +01:00
+								public final class YoutubeParsingHelper {
-												initial commit

											
										
										
											2017-03-01 18:47:52 +01:00
 								    private YoutubeParsingHelper() {
 								    }
-												[YouTube] Add documentation about parameters added and clients versions and key

Also move the iPhone device machine id to a constant, explain how it is used and move the licence in the header of the file, and fix missing imports in YoutubeStreamExtractor (due to a rebase issue).

											
										
										
											2022-03-26 20:02:35 +01:00
+								    /**
-												[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more

Use the TV embedded client technique to get streams of embeddable age-restricted videos.

This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.

The outdated code for these contents has been removed.

Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.

Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.

											
										
										
											2022-04-02 19:06:36 +02:00
+								     * The base URL of requests of the {@code WEB} clients to the InnerTube internal API.
-												[YouTube] Add documentation about parameters added and clients versions and key

Also move the iPhone device machine id to a constant, explain how it is used and move the licence in the header of the file, and fix missing imports in YoutubeStreamExtractor (due to a rebase issue).

											
										
										
											2022-03-26 20:02:35 +01:00
+								     */
-												Use the youtubei API for YouTube mixes + update the corresponding test + do some improvements

Use the youtubei API for YouTube mixes. The corresponding has been updated because the new API breaks the tests of YoutubeMixPlaylistExtractorTest.
Remove some deprecated code (the old search code with the pbj JSON) and do some other improvements.

											
										
										
											2021-05-30 17:23:51 +02:00
+								    public static final String YOUTUBEI_V1_URL = "https://www.youtube.com/youtubei/v1/";
-												[YouTube] Reduce InnerTube response sizes by adding the prettyPrint parameter with the false value

InnerTube responses return pretty printed responses, which increase responses' size for nothing.

By using the prettyPrint parameter on requests and setting its value to false, responses are not pretty printed anymore, which reduces responses size, and so data transfer and processing times.
This usage has been recently deployed by YouTube on their websites.

											
										
										
											2022-03-15 11:46:12 +01:00
-												[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more

Use the TV embedded client technique to get streams of embeddable age-restricted videos.

This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.

The outdated code for these contents has been removed.

Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.

Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.

											
										
										
											2022-04-02 19:06:36 +02:00
+								    /**
 								     * The base URL of requests of non-web clients to the InnerTube internal API.
 								     */
 								    public static final String YOUTUBEI_V1_GAPIS_URL =
 								            "https://youtubei.googleapis.com/youtubei/v1/";
-												Use immutable Map factory methods.

											
										
										
											2022-11-12 05:01:05 +01:00
+								    /**
 								     * The base URL of YouTube Music.
 								     */
 								    private static final String YOUTUBE_MUSIC_URL = "https://music.youtube.com";
-												[YouTube] Reduce InnerTube response sizes by adding the prettyPrint parameter with the false value

InnerTube responses return pretty printed responses, which increase responses' size for nothing.

By using the prettyPrint parameter on requests and setting its value to false, responses are not pretty printed anymore, which reduces responses size, and so data transfer and processing times.
This usage has been recently deployed by YouTube on their websites.

											
										
										
											2022-03-15 11:46:12 +01:00
+								    /**
 								     * A parameter to disable pretty-printed response of InnerTube requests, to reduce response
 								     * sizes.
 								     *
 								     * <p>
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								     * Sent in query parameters of the requests.
-												[YouTube] Reduce InnerTube response sizes by adding the prettyPrint parameter with the false value

InnerTube responses return pretty printed responses, which increase responses' size for nothing.

By using the prettyPrint parameter on requests and setting its value to false, responses are not pretty printed anymore, which reduces responses size, and so data transfer and processing times.
This usage has been recently deployed by YouTube on their websites.

											
										
										
											2022-03-15 11:46:12 +01:00
+								     * </p>
 								     **/
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								    public static final String DISABLE_PRETTY_PRINT_PARAMETER = "prettyPrint=false";
-												[YouTube] Add documentation about parameters added and clients versions and key

Also move the iPhone device machine id to a constant, explain how it is used and move the licence in the header of the file, and fix missing imports in YoutubeStreamExtractor (due to a rebase issue).

											
										
										
											2022-03-26 20:02:35 +01:00
 								    /**
 								     * A parameter sent by official clients named {@code contentPlaybackNonce}.
 								     *
 								     * <p>
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								     * It is sent by official clients on videoplayback requests and InnerTube player requests in
 								     * most cases.
-												[YouTube] Add documentation about parameters added and clients versions and key

Also move the iPhone device machine id to a constant, explain how it is used and move the licence in the header of the file, and fix missing imports in YoutubeStreamExtractor (due to a rebase issue).

											
										
										
											2022-03-26 20:02:35 +01:00
+								     * </p>
 								     *
 								     * <p>
 								     * It is composed of 16 characters which are generated from
 								     * {@link #CONTENT_PLAYBACK_NONCE_ALPHABET this alphabet}, with the use of strong random
 								     * values.
 								     * </p>
 								     *
 								     * @see #generateContentPlaybackNonce()
 								     */
-												[YouTube] Add the cpn param to playback requests and try to spoof better the Android client

The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.

For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.

Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).

For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.

This commit also fixes a small bug with the Android User-Agent string.

Some code improvements have been also made.

											
										
										
											2021-12-22 17:55:41 +01:00
+								    public static final String CPN = "cpn";
 								    public static final String VIDEO_ID = "videoId";
-												Use the youtubei API for YouTube mixes + update the corresponding test + do some improvements

Use the youtubei API for YouTube mixes. The corresponding has been updated because the new API breaks the tests of YoutubeMixPlaylistExtractorTest.
Remove some deprecated code (the old search code with the pbj JSON) and do some other improvements.

											
										
										
											2021-05-30 17:23:51 +02:00
-												[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more

Use the TV embedded client technique to get streams of embeddable age-restricted videos.

This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.

The outdated code for these contents has been removed.

Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.

Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.

											
										
										
											2022-04-02 19:06:36 +02:00
+								    /**
 								     * A parameter sent by official clients named {@code contentCheckOk}.
 								     *
 								     * <p>
 								     * Setting it to {@code true} allows us to get streaming data on videos with a warning about
 								     * what the sensible content they contain.
 								     * </p>
 								     */
 								    public static final String CONTENT_CHECK_OK = "contentCheckOk";
 								    /**
-												[YouTube] Update client versions and use a real version for the iOS client

The iOS version can be got easily in fact, by looking at the What's New section of the App Store' app page.

											
										
										
											2022-08-08 20:12:32 +02:00
+								     * A parameter which may be sent by official clients named {@code racyCheckOk}.
-												[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more

Use the TV embedded client technique to get streams of embeddable age-restricted videos.

This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.

The outdated code for these contents has been removed.

Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.

Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.

											
										
										
											2022-04-02 19:06:36 +02:00
+								     *
 								     * <p>
 								     * What this parameter does is not really known, but it seems to be linked to sensitive
 								     * contents such as age-restricted content.
 								     * </p>
 								     */
 								    public static final String RACY_CHECK_OK = "racyCheckOk";
-												[YouTube] Create constants for client names/versions

											
										
										
											2024-04-20 11:43:54 +02:00
+								    /**
 								     * The hardcoded client ID used for InnerTube requests with the {@code WEB} client.
 								     */
 								    private static final String WEB_CLIENT_ID = "1";
-												[YouTube] Add documentation about parameters added and clients versions and key

Also move the iPhone device machine id to a constant, explain how it is used and move the licence in the header of the file, and fix missing imports in YoutubeStreamExtractor (due to a rebase issue).

											
										
										
											2022-03-26 20:02:35 +01:00
+								    /**
 								     * The client version for InnerTube requests with the {@code WEB} client, used as the last
 								     * fallback if the extraction of the real one failed.
 								     */
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								    private static final String HARDCODED_CLIENT_VERSION = "2.20240410.01.00";
-												[YouTube] Add the cpn param to playback requests and try to spoof better the Android client

The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.

For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.

Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).

For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.

This commit also fixes a small bug with the Android User-Agent string.

Some code improvements have been also made.

											
										
										
											2021-12-22 17:55:41 +01:00
-												[YouTube] Add documentation about parameters added and clients versions and key

Also move the iPhone device machine id to a constant, explain how it is used and move the licence in the header of the file, and fix missing imports in YoutubeStreamExtractor (due to a rebase issue).

											
										
										
											2022-03-26 20:02:35 +01:00
+								    /**
-												[YouTube] Update client versions and use a real version for the iOS client

The iOS version can be got easily in fact, by looking at the What's New section of the App Store' app page.

											
										
										
											2022-08-08 20:12:32 +02:00
+								     * The hardcoded client version of the Android app used for InnerTube requests with this
 								     * client.
 								     *
 								     * <p>
 								     * It can be extracted by getting the latest release version of the app in an APK repository
 								     * such as <a href="https://www.apkmirror.com/apk/google-inc/youtube/">APKMirror</a>.
 								     * </p>
-												[YouTube] Add documentation about parameters added and clients versions and key

Also move the iPhone device machine id to a constant, explain how it is used and move the licence in the header of the file, and fix missing imports in YoutubeStreamExtractor (due to a rebase issue).

											
										
										
											2022-03-26 20:02:35 +01:00
+								     */
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								    private static final String ANDROID_YOUTUBE_CLIENT_VERSION = "19.13.36";
-												[YouTube] Add documentation about parameters added and clients versions and key

Also move the iPhone device machine id to a constant, explain how it is used and move the licence in the header of the file, and fix missing imports in YoutubeStreamExtractor (due to a rebase issue).

											
										
										
											2022-03-26 20:02:35 +01:00
 								    /**
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								     * The hardcoded client version of the iOS app used for InnerTube requests with this client.
-												[YouTube] Add documentation about parameters added and clients versions and key

Also move the iPhone device machine id to a constant, explain how it is used and move the licence in the header of the file, and fix missing imports in YoutubeStreamExtractor (due to a rebase issue).

											
										
										
											2022-03-26 20:02:35 +01:00
+								     *
 								     * <p>
-												[YouTube] Update client versions and use a real version for the iOS client

The iOS version can be got easily in fact, by looking at the What's New section of the App Store' app page.

											
										
										
											2022-08-08 20:12:32 +02:00
+								     * It can be extracted by getting the latest release version of the app on
 								     * <a href="https://apps.apple.com/us/app/youtube-watch-listen-stream/id544007664/">the App
 								     * Store page of the YouTube app</a>, in the {@code What’s New} section.
-												[YouTube] Add documentation about parameters added and clients versions and key

Also move the iPhone device machine id to a constant, explain how it is used and move the licence in the header of the file, and fix missing imports in YoutubeStreamExtractor (due to a rebase issue).

											
										
										
											2022-03-26 20:02:35 +01:00
+								     * </p>
 								     */
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								    private static final String IOS_YOUTUBE_CLIENT_VERSION = "19.14.3";
-												[YouTube] Add the cpn param to playback requests and try to spoof better the Android client

The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.

For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.

Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).

For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.

This commit also fixes a small bug with the Android User-Agent string.

Some code improvements have been also made.

											
										
										
											2021-12-22 17:55:41 +01:00
-												[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more

Use the TV embedded client technique to get streams of embeddable age-restricted videos.

This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.

The outdated code for these contents has been removed.

Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.

Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.

											
										
										
											2022-04-02 19:06:36 +02:00
+								    /**
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								     * The hardcoded client version used for InnerTube requests with the TV HTML5 embed client.
-												[YouTube] Update client versions and use a real version for the iOS client

The iOS version can be got easily in fact, by looking at the What's New section of the App Store' app page.

											
										
										
											2022-08-08 20:12:32 +02:00
+								     */
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								    private static final String TVHTML5_SIMPLY_EMBED_CLIENT_VERSION = "2.0";
-												[YouTube] Update client versions and use a real version for the iOS client

The iOS version can be got easily in fact, by looking at the What's New section of the App Store' app page.

											
										
										
											2022-08-08 20:12:32 +02:00
-												[YouTube] Create constants for client names/versions

											
										
										
											2024-04-20 11:43:54 +02:00
+								    /**
 								     * The hardcoded client ID used for InnerTube requests with the YouTube Music desktop client.
 								     */
 								    private static final String YOUTUBE_MUSIC_CLIENT_ID = "67";
-												[YouTube] Update client versions and use a real version for the iOS client

The iOS version can be got easily in fact, by looking at the What's New section of the App Store' app page.

											
										
										
											2022-08-08 20:12:32 +02:00
+								    /**
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								     * The hardcoded client version used for InnerTube requests with the YouTube Music desktop
 								     * client.
-												[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more

Use the TV embedded client technique to get streams of embeddable age-restricted videos.

This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.

The outdated code for these contents has been removed.

Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.

Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.

											
										
										
											2022-04-02 19:06:36 +02:00
+								     */
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								    private static final String HARDCODED_YOUTUBE_MUSIC_CLIENT_VERSION = "1.20240403.01.00";
-												[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more

Use the TV embedded client technique to get streams of embeddable age-restricted videos.

This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.

The outdated code for these contents has been removed.

Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.

Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.

											
										
										
											2022-04-02 19:06:36 +02:00
-												Improve getClientVersion()

											
										
										
											2020-02-26 15:22:59 +01:00
+								    private static String clientVersion;
-												Extract YouTube's key

											
										
										
											2020-07-26 12:00:56 +02:00
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								    private static String youtubeMusicClientVersion;
-												Add support for YouTube Music search

											
										
										
											2020-03-17 11:33:39 +01:00
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								    private static boolean clientVersionExtracted = false;
-												Adress the last requested changes + update YoutubeCommentsExtractor mocks

											
										
										
											2021-07-09 18:23:46 +02:00
+								    @SuppressWarnings("OptionalUsedAsFieldOrParameterType")
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								    private static Optional<Boolean> hardcodedClientVersionValid = Optional.empty();
-												Adress requested changes

											
										
										
											2022-02-05 22:05:07 +01:00
-												[YouTube] Try to use lighter requests when extracting client version and key from YouTube and YouTube Music

This is done by fetching https://www.youtube.com/sw.js for YouTube and https://music.youtube.com/sw.js for YouTube Music.

Two new methods in Utils class have been added which allow to try to get a match of regular expressions in a string array, or a Pattern array, on a content, on a specific index or 0.
Also some code refactoring has been made in this class.

											
										
										
											2022-01-09 22:49:37 +01:00
+								    private static final String[] INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES =
 								            {"INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"",
 								                    "innertube_context_client_version\":\"([0-9\\.]+?)\"",
 								                    "client.version=([0-9\\.]+)"};
-												Adress requested changes

											
										
										
											2022-02-05 22:05:07 +01:00
+								    private static final String[] INITIAL_DATA_REGEXES =
 								            {"window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});",
 								                    "var\\s*ytInitialData\\s*=\\s*(\\{.*?\\});"};
-												Use lightweight requests when getting and checking YouTube API key and client version

											
										
										
											2021-04-12 18:24:32 +02:00
-												[YouTube] Add the cpn param to playback requests and try to spoof better the Android client

The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.

For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.

Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).

For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.

This commit also fixes a small bug with the Android User-Agent string.

Some code improvements have been also made.

											
										
										
											2021-12-22 17:55:41 +01:00
+								    private static final String CONTENT_PLAYBACK_NONCE_ALPHABET =
 								            "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
-												[YouTube] Add documentation about parameters added and clients versions and key

Also move the iPhone device machine id to a constant, explain how it is used and move the licence in the header of the file, and fix missing imports in YoutubeStreamExtractor (due to a rebase issue).

											
										
										
											2022-03-26 20:02:35 +01:00
+								    /**
-												[YouTube] Update InnerTube clients' version and devices' OS version and model

											
										
										
											2023-10-07 14:14:34 +02:00
+								     * The device machine id for the iPhone 15, used to get 60fps with the {@code iOS} client.
-												[YouTube] Add documentation about parameters added and clients versions and key

Also move the iPhone device machine id to a constant, explain how it is used and move the licence in the header of the file, and fix missing imports in YoutubeStreamExtractor (due to a rebase issue).

											
										
										
											2022-03-26 20:02:35 +01:00
+								     *
 								     * <p>
 								     * See <a href="https://gist.github.com/adamawolf/3048717">this GitHub Gist</a> for more
 								     * information.
 								     * </p>
 								     */
-												[YouTube] Update InnerTube clients' version and devices' OS version and model

											
										
										
											2023-10-07 14:14:34 +02:00
+								    private static final String IOS_DEVICE_MODEL = "iPhone15,4";
-												[YouTube] Add documentation about parameters added and clients versions and key

Also move the iPhone device machine id to a constant, explain how it is used and move the licence in the header of the file, and fix missing imports in YoutubeStreamExtractor (due to a rebase issue).

											
										
										
											2022-03-26 20:02:35 +01:00
-												[YouTube] Create constants for client names/versions

											
										
										
											2024-04-20 11:43:54 +02:00
+								    /**
 								     * Spoofing an iPhone 15 running iOS 17.4.1 with the hardcoded version of the iOS app. To be
 								     * used for the {@code "osVersion"} field in JSON POST requests.
 								     * <p>
 								     * The value of this field seems to use the following structure:
 								     * "iOS major version.minor version.patch version.build version", where
 								     * "patch version" is equal to 0 if it isn't set
 								     * The build version corresponding to the iOS version used can be found on
 								     * <a href="https://theapplewiki.com/wiki/Firmware/iPhone/17.x#iPhone_15">
 								     *     https://theapplewiki.com/wiki/Firmware/iPhone/17.x#iPhone_15</a>
 								     * </p>
 								     *
 								     * @see #IOS_USER_AGENT_VERSION
 								     */
 								    private static final String IOS_OS_VERSION = "17.4.1.21E237";
 								    /**
 								     * Spoofing an iPhone 15 running iOS 17.4.1 with the hardcoded version of the iOS app. To be
 								     * used in the user agent for requests.
 								     *
 								     * @see #IOS_OS_VERSION
 								     */
 								    private static final String IOS_USER_AGENT_VERSION = "17_4_1";
-												Replace SecureRandom with Random


											
										
										
											2023-08-04 00:00:02 +02:00
+								    private static Random numberGenerator = new Random();
-												Add method to inject Random into YoutubeParsingHelper and use in tests

											
										
										
											2021-04-08 16:36:55 +02:00
-												Adress reviews and do some improvements

Adress changes requested in reviews.
Do some improvements, remove unused imports and format some code to be in the 100 characters line limit.

											
										
										
											2021-04-25 18:54:26 +02:00
+								    private static final String FEED_BASE_CHANNEL_ID =
 								            "https://www.youtube.com/feeds/videos.xml?channel_id=";
-												Introduce FeedExtractor making fetching from dedicated feeds possible

YouTube, for example, has a dedicated feed which was built to be used
like this.

											
										
										
											2019-12-16 08:35:44 +01:00
+								    private static final String FEED_BASE_USER = "https://www.youtube.com/feeds/videos.xml?user=";
-												Apply changes in YoutubeStreamExtractor

Extract post live DVR streams as post live streams instead of live streams.

A new class has been in order to improve code: ItagInfo, which stores an itag, the content (URL) extracted and if its an URL or not.
A functional interface has been added in order to abstract the stream building: StreamBuilderHelper.
Also add the cver parameter added by the desktop web client on the corresponding streams (a new method has been added in YoutubeParsingHelper to check this and another for Android streams).

Some code in these classes has been also refactored/improved/optimized.

											
										
										
											2022-03-06 20:10:11 +01:00
+								    private static final Pattern C_WEB_PATTERN = Pattern.compile("&c=WEB");
 								    private static final Pattern C_TVHTML5_SIMPLY_EMBEDDED_PLAYER_PATTERN =
 								            Pattern.compile("&c=TVHTML5_SIMPLY_EMBEDDED_PLAYER");
 								    private static final Pattern C_ANDROID_PATTERN = Pattern.compile("&c=ANDROID");
 								    private static final Pattern C_IOS_PATTERN = Pattern.compile("&c=IOS");
-												Introduce FeedExtractor making fetching from dedicated feeds possible

YouTube, for example, has a dedicated feed which was built to be used
like this.

											
										
										
											2019-12-16 08:35:44 +01:00
-												Use immutable sets in YoutubeParsingHelper.

											
										
										
											2022-08-09 05:44:34 +02:00
+								    private static final Set<String> GOOGLE_URLS = Set.of("google.", "m.google.", "www.google.");
 								    private static final Set<String> INVIDIOUS_URLS = Set.of("invidio.us", "dev.invidio.us",
 								            "www.invidio.us", "redirect.invidious.io", "invidious.snopyta.org", "yewtu.be",
 								            "tube.connect.cafe", "tubus.eduvid.org", "invidious.kavin.rocks", "invidious.site",
 								            "invidious-us.kavin.rocks", "piped.kavin.rocks", "vid.mint.lgbt", "invidiou.site",
 								            "invidious.fdn.fr", "invidious.048596.xyz", "invidious.zee.li", "vid.puffyan.us",
 								            "ytprivate.com", "invidious.namazso.eu", "invidious.silkky.cloud", "ytb.trom.tf",
 								            "invidious.exonip.de", "inv.riverside.rocks", "invidious.blamefran.net", "y.com.cm",
 								            "invidious.moomoo.me", "yt.cyberhost.uk");
 								    private static final Set<String> YOUTUBE_URLS = Set.of("youtube.com", "www.youtube.com",
 								            "m.youtube.com", "music.youtube.com");
-												Fixed all YTMixPlaylists

Added option to choose if you want to consent or not - currently this is done by a static variable in ``YoutubeParsingHelper`` - may not be the best long-term solution but for now the tests work again (in EU countries) 🥳

											
										
										
											2022-07-30 16:05:52 +02:00
+								    private static boolean consentAccepted = false;
-												[YouTube] Move meta info extraction to separate file

YoutubeParsingHelper was longer than 2000 lines which caused checkstyle issues

											
										
										
											2023-12-07 20:57:44 +01:00
+								    public static boolean isGoogleURL(final String url) {
-												[YouTube] Fix checkstyle issues

											
										
										
											2022-03-18 15:09:06 +01:00
+								        final String cachedUrl = extractCachedUrlIfNeeded(url);
-												Extract stream and search meta info for YouTube

Add method to extract Google webcache URLs.

											
										
										
											2020-12-20 19:54:12 +01:00
+								        try {
-												[YouTube] Fix checkstyle issues

											
										
										
											2022-03-18 15:09:06 +01:00
+								            final URL u = new URL(cachedUrl);
-												Use immutable sets in YoutubeParsingHelper.

											
										
										
											2022-08-09 05:44:34 +02:00
+								            return GOOGLE_URLS.stream().anyMatch(item -> u.getHost().startsWith(item));
-												Use the youtubei API for YouTube playlists

											
										
										
											2021-04-08 16:17:59 +02:00
+								        } catch (final MalformedURLException e) {
-												Extract stream and search meta info for YouTube

Add method to extract Google webcache URLs.

											
										
										
											2020-12-20 19:54:12 +01:00
+								            return false;
-												[YouTube] Improve detection of reCAPTCHA pages

											
										
										
											2019-10-29 06:00:29 +01:00
+								        }
-												Extract stream and search meta info for YouTube

Add method to extract Google webcache URLs.

											
										
										
											2020-12-20 19:54:12 +01:00
+								    }
-												[YouTube] Improve detection of reCAPTCHA pages

											
										
										
											2019-10-29 06:00:29 +01:00
-												Annotate YoutubeParsingHelper methods with Nonnull when needed

											
										
										
											2021-06-11 13:34:23 +02:00
+								    public static boolean isYoutubeURL(@Nonnull final URL url) {
-												Use immutable sets in YoutubeParsingHelper.

											
										
										
											2022-08-09 05:44:34 +02:00
+								        return YOUTUBE_URLS.contains(url.getHost().toLowerCase(Locale.ROOT));
-												refactored YouTube-linkHandler to use less regex and more URL-methods

											
										
										
											2019-01-13 12:52:07 +01:00
+								    }
-												Annotate YoutubeParsingHelper methods with Nonnull when needed

											
										
										
											2021-06-11 13:34:23 +02:00
+								    public static boolean isYoutubeServiceURL(@Nonnull final URL url) {
-												Make some vars final and add annotations to methods


											
										
										
											2020-12-15 17:21:21 +01:00
+								        final String host = url.getHost();
-												Adress reviews and do some improvements

Adress changes requested in reviews.
Do some improvements, remove unused imports and format some code to be in the 100 characters line limit.

											
										
										
											2021-04-25 18:54:26 +02:00
+								        return host.equalsIgnoreCase("www.youtube-nocookie.com")
 								                || host.equalsIgnoreCase("youtu.be");
-												split isYoutubeALikeURL into multiple methods

											
										
										
											2019-01-27 01:28:51 +01:00
+								    }
-												refactored YouTube-linkHandler to use less regex and more URL-methods

											
										
										
											2019-01-13 12:52:07 +01:00
-												Annotate YoutubeParsingHelper methods with Nonnull when needed

											
										
										
											2021-06-11 13:34:23 +02:00
+								    public static boolean isHooktubeURL(@Nonnull final URL url) {
-												Make some vars final and add annotations to methods


											
										
										
											2020-12-15 17:21:21 +01:00
+								        final String host = url.getHost();
-												split isYoutubeALikeURL into multiple methods

											
										
										
											2019-01-27 01:28:51 +01:00
+								        return host.equalsIgnoreCase("hooktube.com");
 								    }
-												Use immutable sets in YoutubeParsingHelper.

											
										
										
											2022-08-09 05:44:34 +02:00
+								    public static boolean isInvidiousURL(@Nonnull final URL url) {
 								        return INVIDIOUS_URLS.contains(url.getHost().toLowerCase(Locale.ROOT));
-												refactored YouTube-linkHandler to use less regex and more URL-methods

											
										
										
											2019-01-13 12:52:07 +01:00
+								    }
-												Add support for y2u.be links

											
										
										
											2021-10-22 21:48:18 +02:00
+								    public static boolean isY2ubeURL(@Nonnull final URL url) {
 								        return url.getHost().equalsIgnoreCase("y2u.be");
 								    }
-												Added a comment

and fixed a "typo" in the method parseDurationString replacing Long with Integer.

These are not the droids you are looking for wb9688 :grin:
											
										
										
											2020-06-15 11:27:44 +02:00
+								    /**
-												update invidious instances

											
										
										
											2020-07-02 21:31:05 +02:00
+								     * Parses the duration string of the video expecting ":" or "." as separators
-												use only one constant UTF-8

											
										
										
											2021-02-07 22:12:22 +01:00
+								     *
-												Added a comment

and fixed a "typo" in the method parseDurationString replacing Long with Integer.

These are not the droids you are looking for wb9688 :grin:
											
										
										
											2020-06-15 11:27:44 +02:00
+								     * @return the duration in seconds
-												update invidious instances

											
										
										
											2020-07-02 21:31:05 +02:00
+								     * @throws ParsingException when more than 3 separators are found
-												Added a comment

and fixed a "typo" in the method parseDurationString replacing Long with Integer.

These are not the droids you are looking for wb9688 :grin:
											
										
										
											2020-06-15 11:27:44 +02:00
+								     */
-												Annotate YoutubeParsingHelper methods with Nonnull when needed

											
										
										
											2021-06-11 13:34:23 +02:00
+								    public static int parseDurationString(@Nonnull final String input)
-												initial commit

											
										
										
											2017-03-01 18:47:52 +01:00
+								            throws ParsingException, NumberFormatException {
-												Accommodate time formatting for different countries

If there's a . in the time format, this detects it and uses is instead of : 
This removes errors and lag related to "Could not get duration" while using NewPipe in countries with official time formatting with . (dot) instead of : (punctuation colon)

Tested to compile and work on real device
											
										
										
											2018-09-09 11:53:10 +02:00
+								        // If time separator : is not detected, try . instead
-												refactor split time parsing

											
										
										
											2018-09-09 14:01:39 +02:00
+								        final String[] splitInput = input.contains(":")
 								                ? input.split(":")
 								                : input.split("\\.");
-												[YouTube] Improve duration parsing
											
										
										
											2022-11-09 09:41:29 +01:00
+								        final int[] units = {24, 60, 60, 1};
 								        final int offset = units.length - splitInput.length;
 								        if (offset < 0) {
 								            throw new ParsingException("Error duration string with unknown format: " + input);
-												initial commit

											
										
										
											2017-03-01 18:47:52 +01:00
+								        }
-												[YouTube] Improve duration parsing
											
										
										
											2022-11-09 09:41:29 +01:00
+								        int duration = 0;
 								        for (int i = 0; i < splitInput.length; i++) {
 								            duration = units[i + offset] * (duration + convertDurationToInt(splitInput[i]));
 								        }
 								        return duration;
-												Workaround for incorrect duration for "YT shorts" videos in channels

As a workaround 0 is returned as duration for such videos.
See also https://github.com/TeamNewPipe/NewPipe/issues/8034

											
										
										
											2022-03-17 14:50:12 +01:00
+								    }
 								    /**
 								     * Tries to convert a duration string to an integer without throwing an exception.
 								     * <br/>
 								     * Helper method for {@link #parseDurationString(String)}.
 								     * <br/>
 								     * Note: This method is also used as a workaround for NewPipe#8034 (YT shorts no longer
 								     * display any duration in channels).
 								     *
 								     * @param input The string to process
 								     * @return The converted integer or 0 if the conversion failed.
 								     */
 								    private static int convertDurationToInt(final String input) {
 								        if (input == null || input.isEmpty()) {
 								            return 0;
 								        }
 								        final String clearedInput = Utils.removeNonDigitCharacters(input);
 								        try {
 								            return Integer.parseInt(clearedInput);
 								        } catch (final NumberFormatException ex) {
 								            return 0;
 								        }
-												initial commit

											
										
										
											2017-03-01 18:47:52 +01:00
+								    }
-												Implement time ago parser and improve localization handling

- Handle special cases for languages where the number is not shown
- Rework the Downloader base implementation, allowing for more
advanced things to be done
- Separate the localization from the content country (just like
YouTube let's the user choose both).

											
										
										
											2019-04-28 22:03:16 +02:00
-												Annotate YoutubeParsingHelper methods with Nonnull when needed

											
										
										
											2021-06-11 13:34:23 +02:00
+								    @Nonnull
 								    public static String getFeedUrlFrom(@Nonnull final String channelIdOrUser) {
-												Introduce FeedExtractor making fetching from dedicated feeds possible

YouTube, for example, has a dedicated feed which was built to be used
like this.

											
										
										
											2019-12-16 08:35:44 +01:00
+								        if (channelIdOrUser.startsWith("user/")) {
 								            return FEED_BASE_USER + channelIdOrUser.replace("user/", "");
 								        } else if (channelIdOrUser.startsWith("channel/")) {
 								            return FEED_BASE_CHANNEL_ID + channelIdOrUser.replace("channel/", "");
 								        } else {
 								            return FEED_BASE_CHANNEL_ID + channelIdOrUser;
 								        }
 								    }
-												Adress reviews and do some improvements

Adress changes requested in reviews.
Do some improvements, remove unused imports and format some code to be in the 100 characters line limit.

											
										
										
											2021-04-25 18:54:26 +02:00
+								    public static OffsetDateTime parseDateFrom(final String textualUploadDate)
 								            throws ParsingException {
-												Implement time ago parser and improve localization handling

- Handle special cases for languages where the number is not shown
- Rework the Downloader base implementation, allowing for more
advanced things to be done
- Separate the localization from the content country (just like
YouTube let's the user choose both).

											
										
										
											2019-04-28 22:03:16 +02:00
+								        try {
-												Switch from Calendar to OffsetDateTime in DateWrapper.

											
										
										
											2020-10-18 05:48:14 +02:00
+								            return OffsetDateTime.parse(textualUploadDate);
-												Use the youtubei API for YouTube playlists

											
										
										
											2021-04-08 16:17:59 +02:00
+								        } catch (final DateTimeParseException e) {
-												Fix YouTube parse error when only a date is present.

											
										
										
											2020-11-03 11:54:46 +01:00
+								            try {
 								                return LocalDate.parse(textualUploadDate).atStartOfDay().atOffset(ZoneOffset.UTC);
-												Use the youtubei API for YouTube playlists

											
										
										
											2021-04-08 16:17:59 +02:00
+								            } catch (final DateTimeParseException e1) {
-												Adress reviews and do some improvements

Adress changes requested in reviews.
Do some improvements, remove unused imports and format some code to be in the 100 characters line limit.

											
										
										
											2021-04-25 18:54:26 +02:00
+								                throw new ParsingException("Could not parse date: \"" + textualUploadDate + "\"",
 								                        e1);
-												Fix YouTube parse error when only a date is present.

											
										
										
											2020-11-03 11:54:46 +01:00
+								            }
-												Implement time ago parser and improve localization handling

- Handle special cases for languages where the number is not shown
- Rework the Downloader base implementation, allowing for more
advanced things to be done
- Separate the localization from the content country (just like
YouTube let's the user choose both).

											
										
										
											2019-04-28 22:03:16 +02:00
+								        }
 								    }
-												Move getInitialData() method to YouTubeParsingHelper

Rename ytInitialData to initialData
											
										
										
											2020-02-22 23:51:02 +01:00
-												[Youtube] Add some comments to mix

											
										
										
											2020-02-02 18:15:47 +01:00
+								    /**
-												[Youtube] Add cookies to youtube mix request

This way youtube wont return duplicates when getting more items of the mix (but youtube can also track us)

											
										
										
											2020-04-16 19:28:27 +02:00
+								     * Checks if the given playlist id is a YouTube Mix (auto-generated playlist)
 								     * Ids from a YouTube Mix start with "RD"
-												use only one constant UTF-8

											
										
										
											2021-02-07 22:12:22 +01:00
+								     *
-												Adress changes

											
										
										
											2021-06-24 18:39:16 +02:00
+								     * @param playlistId the playlist id
-												[Youtube] Add cookies to youtube mix request

This way youtube wont return duplicates when getting more items of the mix (but youtube can also track us)

											
										
										
											2020-04-16 19:28:27 +02:00
+								     * @return Whether given id belongs to a YouTube Mix
-												[Youtube] Add some comments to mix

											
										
										
											2020-02-02 18:15:47 +01:00
+								     */
-												Annotate YoutubeParsingHelper methods with Nonnull when needed

											
										
										
											2021-06-11 13:34:23 +02:00
+								    public static boolean isYoutubeMixId(@Nonnull final String playlistId) {
-												[YouTube] Now music mixes can be treated as normal mixes

Using a playlist extractor on them would result in "Unviewable playlist" errors

											
										
										
											2023-01-15 23:28:59 +01:00
+								        return playlistId.startsWith("RD");
-												[Youtube] apply wb9688 suggestion (mix)

Channel mix adjusments and test
Don't accept youtube music mix urls as playlist
Don't override playlistData to keep getInitialPage()
Remove json constants
Indentation

											
										
										
											2020-03-21 18:48:12 +01:00
+								    }
-												[YouTube] Extract mixes from streams related items

											
										
										
											2022-02-02 20:23:11 +01:00
+								    /**
 								     * Checks if the given playlist id is a YouTube My Mix (auto-generated playlist)
 								     * Ids from a YouTube My Mix start with "RDMM"
 								     *
 								     * @param playlistId the playlist id
 								     * @return Whether given id belongs to a YouTube My Mix
 								     */
 								    public static boolean isYoutubeMyMixId(@Nonnull final String playlistId) {
 								        return playlistId.startsWith("RDMM");
 								    }
-												[Youtube] apply wb9688 suggestion (mix)

Channel mix adjusments and test
Don't accept youtube music mix urls as playlist
Don't override playlistData to keep getInitialPage()
Remove json constants
Indentation

											
										
										
											2020-03-21 18:48:12 +01:00
+								    /**
-												[Youtube] Add cookies to youtube mix request

This way youtube wont return duplicates when getting more items of the mix (but youtube can also track us)

											
										
										
											2020-04-16 19:28:27 +02:00
+								     * Checks if the given playlist id is a YouTube Music Mix (auto-generated playlist)
-												[YouTube] Music Mix: Fix playlist not being accepted

Regression introduced by YouTube Mix support (#280)
											
										
										
											2020-12-23 21:07:30 +01:00
+								     * Ids from a YouTube Music Mix start with "RDAMVM" or "RDCLAK"
-												use only one constant UTF-8

											
										
										
											2021-02-07 22:12:22 +01:00
+								     *
-												Use lightweight requests when getting and checking YouTube API key and client version

											
										
										
											2021-04-12 18:24:32 +02:00
+								     * @param playlistId the playlist id
-												[Youtube] Add cookies to youtube mix request

This way youtube wont return duplicates when getting more items of the mix (but youtube can also track us)

											
										
										
											2020-04-16 19:28:27 +02:00
+								     * @return Whether given id belongs to a YouTube Music Mix
-												[Youtube] apply wb9688 suggestion (mix)

Channel mix adjusments and test
Don't accept youtube music mix urls as playlist
Don't override playlistData to keep getInitialPage()
Remove json constants
Indentation

											
										
										
											2020-03-21 18:48:12 +01:00
+								     */
-												Annotate YoutubeParsingHelper methods with Nonnull when needed

											
										
										
											2021-06-11 13:34:23 +02:00
+								    public static boolean isYoutubeMusicMixId(@Nonnull final String playlistId) {
-												[YouTube] Music Mix: Fix playlist not being accepted

Regression introduced by YouTube Mix support (#280)
											
										
										
											2020-12-23 21:07:30 +01:00
+								        return playlistId.startsWith("RDAMVM") || playlistId.startsWith("RDCLAK");
-												[Youtube] Implement mix extractor for auto-generated playlists.

-New YoutubeMixPlaylistExtractor, that extracts from a mix (auto-generated playlist).
-The url has the format of "youtube.com/watch?v=videoID&playlistID",
where playlistID always starts with "RD" and usually followed by the videoID.
-Change YoutubePlaylistLinkHandlerFactory to create a linkhandler with the given url if it is a mix.
-Change YoutubeService to return YoutubeMixPlaylistExtractor if the url is a mix.

											
										
										
											2020-02-02 14:19:48 +01:00
+								    }
-												use only one constant UTF-8

											
										
										
											2021-02-07 22:12:22 +01:00
-												[Youtube] Apply review suggestions and avoid channel mix edge case

											
										
										
											2020-09-26 11:22:24 +02:00
+								    /**
 								     * Checks if the given playlist id is a YouTube Channel Mix (auto-generated playlist)
 								     * Ids from a YouTube channel Mix start with "RDCM"
-												use only one constant UTF-8

											
										
										
											2021-02-07 22:12:22 +01:00
+								     *
-												[Youtube] Apply review suggestions and avoid channel mix edge case

											
										
										
											2020-09-26 11:22:24 +02:00
+								     * @return Whether given id belongs to a YouTube Channel Mix
 								     */
-												Annotate YoutubeParsingHelper methods with Nonnull when needed

											
										
										
											2021-06-11 13:34:23 +02:00
+								    public static boolean isYoutubeChannelMixId(@Nonnull final String playlistId) {
-												[Youtube] Apply review suggestions and avoid channel mix edge case

											
										
										
											2020-09-26 11:22:24 +02:00
+								        return playlistId.startsWith("RDCM");
 								    }
 								    /**
-												[YouTube] Differentiate genre mixes from normal mixes

Note: genre mixes already worked, now they are just considered as such in various video id extraction and in related items
Note 2: now extracting a mix id from a *normal* youtube mix id will fail if the video id wouldn't be exactly 11 characters long

											
										
										
											2022-02-17 17:19:54 +01:00
+								     * Checks if the given playlist id is a YouTube Genre Mix (auto-generated playlist)
 								     * Ids from a YouTube Genre Mix start with "RDGMEM"
 								     *
 								     * @return Whether given id belongs to a YouTube Genre Mix
 								     */
 								    public static boolean isYoutubeGenreMixId(@Nonnull final String playlistId) {
 								        return playlistId.startsWith("RDGMEM");
 								    }
 								    /**
 								     * @param playlistId the playlist id to parse
 								     * @return the {@link PlaylistInfo.PlaylistType} extracted from the playlistId (mix playlist
 								     *         types included)
 								     * @throws ParsingException if the playlistId is null or empty, if the playlistId is not a mix,
 								     *                          if it is a mix but it's not based on a specific stream (this is the
 								     *                          case for channel or genre mixes)
-												[Youtube] Apply review suggestions and avoid channel mix edge case

											
										
										
											2020-09-26 11:22:24 +02:00
+								     */
-												Annotate YoutubeParsingHelper methods with Nonnull when needed

											
										
										
											2021-06-11 13:34:23 +02:00
+								    @Nonnull
-												[YouTube] Differentiate genre mixes from normal mixes

Note: genre mixes already worked, now they are just considered as such in various video id extraction and in related items
Note 2: now extracting a mix id from a *normal* youtube mix id will fail if the video id wouldn't be exactly 11 characters long

											
										
										
											2022-02-17 17:19:54 +01:00
+								    public static String extractVideoIdFromMixId(final String playlistId)
-												Annotate YoutubeParsingHelper methods with Nonnull when needed

											
										
										
											2021-06-11 13:34:23 +02:00
+								            throws ParsingException {
-												[YouTube] Differentiate genre mixes from normal mixes

Note: genre mixes already worked, now they are just considered as such in various video id extraction and in related items
Note 2: now extracting a mix id from a *normal* youtube mix id will fail if the video id wouldn't be exactly 11 characters long

											
										
										
											2022-02-17 17:19:54 +01:00
+								        if (isNullOrEmpty(playlistId)) {
 								            throw new ParsingException("Video id could not be determined from empty playlist id");
 								        } else if (isYoutubeMyMixId(playlistId)) {
-												[Youtube] Apply review suggestions and avoid channel mix edge case

											
										
										
											2020-09-26 11:22:24 +02:00
+								            return playlistId.substring(4);
-												[YouTube] Remove useless comments about mixes

											
										
										
											2022-02-17 15:25:06 +01:00
+								        } else if (isYoutubeMusicMixId(playlistId)) {
-												[Youtube] Apply review suggestions and avoid channel mix edge case

											
										
										
											2020-09-26 11:22:24 +02:00
+								            return playlistId.substring(6);
-												[YouTube] Remove useless comments about mixes

											
										
										
											2022-02-17 15:25:06 +01:00
+								        } else if (isYoutubeChannelMixId(playlistId)) {
-												[YouTube] Differentiate genre mixes from normal mixes

Note: genre mixes already worked, now they are just considered as such in various video id extraction and in related items
Note 2: now extracting a mix id from a *normal* youtube mix id will fail if the video id wouldn't be exactly 11 characters long

											
										
										
											2022-02-17 17:19:54 +01:00
+								            // Channel mixes are of the form RMCM{channelId}, so videoId can't be determined
 								            throw new ParsingException("Video id could not be determined from channel mix id: "
 								                    + playlistId);
 								        } else if (isYoutubeGenreMixId(playlistId)) {
 								            // Genre mixes are of the form RDGMEM{garbage}, so videoId can't be determined
 								            throw new ParsingException("Video id could not be determined from genre mix id: "
-												Adress reviews and do some improvements

Adress changes requested in reviews.
Do some improvements, remove unused imports and format some code to be in the 100 characters line limit.

											
										
										
											2021-04-25 18:54:26 +02:00
+								                    + playlistId);
-												[Youtube] Apply review suggestions and avoid channel mix edge case

											
										
										
											2020-09-26 11:22:24 +02:00
-												[YouTube] Remove useless comments about mixes

											
										
										
											2022-02-17 15:25:06 +01:00
+								        } else if (isYoutubeMixId(playlistId)) { // normal mix
-												[YouTube] Differentiate genre mixes from normal mixes

Note: genre mixes already worked, now they are just considered as such in various video id extraction and in related items
Note 2: now extracting a mix id from a *normal* youtube mix id will fail if the video id wouldn't be exactly 11 characters long

											
										
										
											2022-02-17 17:19:54 +01:00
+								            if (playlistId.length() != 13) {
 								                // Stream YouTube mixes are of the form RD{videoId}, but if videoId is not exactly
 								                // 11 characters then it can't be a video id, hence we are dealing with a different
 								                // type of mix (e.g. genre mixes handled above, of the form RDGMEM{garbage})
 								                throw new ParsingException("Video id could not be determined from mix id: "
 								                    + playlistId);
 								            }
-												[Youtube] Apply review suggestions and avoid channel mix edge case

											
										
										
											2020-09-26 11:22:24 +02:00
+								            return playlistId.substring(2);
-												Replace explicit string checks whether a playlist íd is a certain YouTube Mix type with calling the dedicated methods


											
										
										
											2020-12-25 15:00:31 +01:00
+								        } else { // not a mix
-												[YouTube] Differentiate genre mixes from normal mixes

Note: genre mixes already worked, now they are just considered as such in various video id extraction and in related items
Note 2: now extracting a mix id from a *normal* youtube mix id will fail if the video id wouldn't be exactly 11 characters long

											
										
										
											2022-02-17 17:19:54 +01:00
+								            throw new ParsingException("Video id could not be determined from playlist id: "
-												Adress reviews and do some improvements

Adress changes requested in reviews.
Do some improvements, remove unused imports and format some code to be in the 100 characters line limit.

											
										
										
											2021-04-25 18:54:26 +02:00
+								                    + playlistId);
-												[Youtube] Apply review suggestions and avoid channel mix edge case

											
										
										
											2020-09-26 11:22:24 +02:00
+								        }
 								    }
-												[Youtube] Implement mix extractor for auto-generated playlists.

-New YoutubeMixPlaylistExtractor, that extracts from a mix (auto-generated playlist).
-The url has the format of "youtube.com/watch?v=videoID&playlistID",
where playlistID always starts with "RD" and usually followed by the videoID.
-Change YoutubePlaylistLinkHandlerFactory to create a linkhandler with the given url if it is a mix.
-Change YoutubeService to return YoutubeMixPlaylistExtractor if the url is a mix.

											
										
										
											2020-02-02 14:19:48 +01:00
-												[YouTube] Extract playlist type in playlist extractor

											
										
										
											2022-02-17 17:39:49 +01:00
+								    /**
 								     * @param playlistId the playlist id to parse
 								     * @return the {@link PlaylistInfo.PlaylistType} extracted from the playlistId (mix playlist
 								     *         types included)
 								     * @throws ParsingException if the playlistId is null or empty
 								     */
 								    @Nonnull
 								    public static PlaylistInfo.PlaylistType extractPlaylistTypeFromPlaylistId(
 								            final String playlistId) throws ParsingException {
 								        if (isNullOrEmpty(playlistId)) {
 								            throw new ParsingException("Could not extract playlist type from empty playlist id");
 								        } else if (isYoutubeMusicMixId(playlistId)) {
 								            return PlaylistInfo.PlaylistType.MIX_MUSIC;
 								        } else if (isYoutubeChannelMixId(playlistId)) {
 								            return PlaylistInfo.PlaylistType.MIX_CHANNEL;
 								        } else if (isYoutubeGenreMixId(playlistId)) {
 								            return PlaylistInfo.PlaylistType.MIX_GENRE;
 								        } else if (isYoutubeMixId(playlistId)) { // normal mix
 								            // Either a normal mix based on a stream, or a "my mix" (still based on a stream).
 								            // NOTE: if YouTube introduces even more types of mixes that still start with RD,
 								            // they will default to this, even though they might not be based on a stream.
 								            return PlaylistInfo.PlaylistType.MIX_STREAM;
 								        } else {
 								            // not a known type of mix: just consider it a normal playlist
 								            return PlaylistInfo.PlaylistType.NORMAL;
 								        }
 								    }
 								    /**
 								     * @param playlistUrl the playlist url to parse
 								     * @return the {@link PlaylistInfo.PlaylistType} extracted from the playlistUrl's list param
 								     *         (mix playlist types included)
 								     * @throws ParsingException if the playlistUrl is malformed, if has no list param or if the list
 								     *                          param is empty
 								     */
 								    public static PlaylistInfo.PlaylistType extractPlaylistTypeFromPlaylistUrl(
 								            final String playlistUrl) throws ParsingException {
 								        try {
 								            return extractPlaylistTypeFromPlaylistId(
 								                    Utils.getQueryValue(Utils.stringToURL(playlistUrl), "list"));
 								        } catch (final MalformedURLException e) {
 								            throw new ParsingException("Could not extract playlist type from malformed url", e);
 								        }
 								    }
-												Adress requested changes

											
										
										
											2022-02-05 22:05:07 +01:00
+								    private static JsonObject getInitialData(final String html) throws ParsingException {
-												Move getInitialData() method to YouTubeParsingHelper

Rename ytInitialData to initialData
											
										
										
											2020-02-22 23:51:02 +01:00
+								        try {
-												Adress requested changes

											
										
										
											2022-02-05 22:05:07 +01:00
+								            return JsonParser.object().from(getStringResultFromRegexArray(html,
 								                    INITIAL_DATA_REGEXES, 1));
-												Use the youtubei API for YouTube playlists

											
										
										
											2021-04-08 16:17:59 +02:00
+								        } catch (final JsonParserException | Parser.RegexException e) {
-												Move getInitialData() method to YouTubeParsingHelper

Rename ytInitialData to initialData
											
										
										
											2020-02-22 23:51:02 +01:00
+								            throw new ParsingException("Could not get ytInitialData", e);
 								        }
 								    }
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								    public static boolean isHardcodedClientVersionValid()
-												Adress changes

											
										
										
											2021-06-24 18:39:16 +02:00
+								            throws IOException, ExtractionException {
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								        if (hardcodedClientVersionValid.isPresent()) {
 								            return hardcodedClientVersionValid.get();
-												Adress reviews and do some improvements

Adress changes requested in reviews.
Do some improvements, remove unused imports and format some code to be in the 100 characters line limit.

											
										
										
											2021-04-25 18:54:26 +02:00
+								        }
-												Use lightweight requests when getting and checking YouTube API key and client version

											
										
										
											2021-04-12 18:24:32 +02:00
+								        // @formatter:off
 								        final byte[] body = JsonWriter.string()
 								            .object()
 								                .object("context")
 								                    .object("client")
-												Adress changes

											
										
										
											2021-06-24 18:39:16 +02:00
+								                        .value("hl", "en-GB")
-												Use lightweight requests when getting and checking YouTube API key and client version

											
										
										
											2021-04-12 18:24:32 +02:00
+								                        .value("gl", "GB")
-												Adress changes

											
										
										
											2021-06-24 18:39:16 +02:00
+								                        .value("clientName", "WEB")
-												Use lightweight requests when getting and checking YouTube API key and client version

											
										
										
											2021-04-12 18:24:32 +02:00
+								                        .value("clientVersion", HARDCODED_CLIENT_VERSION)
-												[YouTube] Improve payloads and URLs of InnerTube requests

For every InnerTube request:
- Always add a `request` object with the following properties:
  - "internalExperimentFlags" set to an empty array;
  - "useSsl" set to "true";
  - "lockedSafetyMode" set to "false".
- Use proper TODO comment to provide a way to enable restricted mode on every
request and add it on requests on which it wasn't present.

For YouTube Music:
- Remove alt query parameter, as it is not used anymore by the website;
- Add prettyPrint query parameter with false value on YouTube Music search
continuations.

											
										
										
											2023-10-07 15:00:40 +02:00
+								                        .value("platform", "DESKTOP")
-												[YouTube] Add utcOffsetMinutes to all InnerTube payloads

This should make returned dates consistent between timezones and countries on
which the extractor is ran.

It was previously only set on YouTube Music search continuations.

											
										
										
											2023-10-07 15:04:36 +02:00
+								                        .value("utcOffsetMinutes", 0)
-												[YouTube] Improve payloads and URLs of InnerTube requests

For every InnerTube request:
- Always add a `request` object with the following properties:
  - "internalExperimentFlags" set to an empty array;
  - "useSsl" set to "true";
  - "lockedSafetyMode" set to "false".
- Use proper TODO comment to provide a way to enable restricted mode on every
request and add it on requests on which it wasn't present.

For YouTube Music:
- Remove alt query parameter, as it is not used anymore by the website;
- Add prettyPrint query parameter with false value on YouTube Music search
continuations.

											
										
										
											2023-10-07 15:00:40 +02:00
+								                    .end()
 								                    .object("request")
 								                        .array("internalExperimentFlags")
 								                        .end()
 								                        .value("useSsl", true)
 								                    .end()
 								                    .object("user")
 								                        // TODO: provide a way to enable restricted mode with:
 								                        //  .value("enableSafetyMode", boolean)
 								                        .value("lockedSafetyMode", false)
-												Use lightweight requests when getting and checking YouTube API key and client version

											
										
										
											2021-04-12 18:24:32 +02:00
+								                    .end()
-												Adress changes

											
										
										
											2021-06-24 18:39:16 +02:00
+								                .end()
 								                .value("fetchLiveState", true)
-												Use StandardCharsets.UTF_8.

											
										
										
											2022-07-28 04:19:21 +02:00
+								            .end().done().getBytes(StandardCharsets.UTF_8);
-												Use lightweight requests when getting and checking YouTube API key and client version

											
										
										
											2021-04-12 18:24:32 +02:00
+								        // @formatter:on
-												[YouTube] Avoid crashing by letting exceptions bubble up

											
										
										
											2020-02-29 22:42:43 +01:00
-												[YouTube] Create constants for client names/versions

											
										
										
											2024-04-20 11:43:54 +02:00
+								        final var headers = getClientHeaders(WEB_CLIENT_ID, HARDCODED_CLIENT_VERSION);
-												Improve getClientVersion()

											
										
										
											2020-02-26 15:22:59 +01:00
-												Use lightweight requests when getting and checking YouTube API key and client version

											
										
										
											2021-04-12 18:24:32 +02:00
+								        // This endpoint is fetched by the YouTube website to get the items of its main menu and is
 								        // pretty lightweight (around 30kB)
-												Use Downloader's postWithContentType and postWithContentTypeJson methods in services and extractors

											
										
										
											2022-07-15 20:56:37 +02:00
+								        final Response response = getDownloader().postWithContentTypeJson(
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								                YOUTUBEI_V1_URL + "guide?" + DISABLE_PRETTY_PRINT_PARAMETER,
-												Use Downloader's postWithContentType and postWithContentTypeJson methods in services and extractors

											
										
										
											2022-07-15 20:56:37 +02:00
+								                headers, body);
-												Use lightweight requests when getting and checking YouTube API key and client version

											
										
										
											2021-04-12 18:24:32 +02:00
+								        final String responseBody = response.responseBody();
 								        final int responseCode = response.responseCode();
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								        hardcodedClientVersionValid = Optional.of(responseBody.length() > 5000
-												Adress the last requested changes + update YoutubeCommentsExtractor mocks

											
										
										
											2021-07-09 18:23:46 +02:00
+								                && responseCode == 200); // Ensure to have a valid response
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								        return hardcodedClientVersionValid.get();
-												Check if hard-coded client version is valid in separate function

											
										
										
											2020-02-28 16:35:24 +01:00
+								    }
-												[YouTube] Try to use lighter requests when extracting client version and key from YouTube and YouTube Music

This is done by fetching https://www.youtube.com/sw.js for YouTube and https://music.youtube.com/sw.js for YouTube Music.

Two new methods in Utils class have been added which allow to try to get a match of regular expressions in a string array, or a Pattern array, on a content, on a specific index or 0.
Also some code refactoring has been made in this class.

											
										
										
											2022-01-09 22:49:37 +01:00
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								    private static void extractClientVersionFromSwJs()
-												[YouTube] Try to use lighter requests when extracting client version and key from YouTube and YouTube Music

This is done by fetching https://www.youtube.com/sw.js for YouTube and https://music.youtube.com/sw.js for YouTube Music.

Two new methods in Utils class have been added which allow to try to get a match of regular expressions in a string array, or a Pattern array, on a content, on a specific index or 0.
Also some code refactoring has been made in this class.

											
										
										
											2022-01-09 22:49:37 +01:00
+								            throws IOException, ExtractionException {
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								        if (clientVersionExtracted) {
-												[YouTube] Fix checkstyle issues

											
										
										
											2022-03-18 15:09:06 +01:00
+								            return;
 								        }
-												[YouTube] Try to use lighter requests when extracting client version and key from YouTube and YouTube Music

This is done by fetching https://www.youtube.com/sw.js for YouTube and https://music.youtube.com/sw.js for YouTube Music.

Two new methods in Utils class have been added which allow to try to get a match of regular expressions in a string array, or a Pattern array, on a content, on a specific index or 0.
Also some code refactoring has been made in this class.

											
										
										
											2022-01-09 22:49:37 +01:00
+								        final String url = "https://www.youtube.com/sw.js";
-												Use immutable Map factory methods.

											
										
										
											2022-11-12 05:01:05 +01:00
+								        final var headers = getOriginReferrerHeaders("https://www.youtube.com");
-												[YouTube] Try to use lighter requests when extracting client version and key from YouTube and YouTube Music

This is done by fetching https://www.youtube.com/sw.js for YouTube and https://music.youtube.com/sw.js for YouTube Music.

Two new methods in Utils class have been added which allow to try to get a match of regular expressions in a string array, or a Pattern array, on a content, on a specific index or 0.
Also some code refactoring has been made in this class.

											
										
										
											2022-01-09 22:49:37 +01:00
+								        final String response = getDownloader().get(url, headers).responseBody();
 								        try {
 								            clientVersion = getStringResultFromRegexArray(response,
 								                    INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
 								        } catch (final Parser.RegexException e) {
-												Fix missing imports and Checkstyle issues

											
										
										
											2022-03-27 20:51:30 +02:00
+								            throw new ParsingException("Could not extract YouTube WEB InnerTube client version "
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								                    + "from sw.js", e);
-												[YouTube] Try to use lighter requests when extracting client version and key from YouTube and YouTube Music

This is done by fetching https://www.youtube.com/sw.js for YouTube and https://music.youtube.com/sw.js for YouTube Music.

Two new methods in Utils class have been added which allow to try to get a match of regular expressions in a string array, or a Pattern array, on a content, on a specific index or 0.
Also some code refactoring has been made in this class.

											
										
										
											2022-01-09 22:49:37 +01:00
+								        }
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								        clientVersionExtracted = true;
-												[YouTube] Try to use lighter requests when extracting client version and key from YouTube and YouTube Music

This is done by fetching https://www.youtube.com/sw.js for YouTube and https://music.youtube.com/sw.js for YouTube Music.

Two new methods in Utils class have been added which allow to try to get a match of regular expressions in a string array, or a Pattern array, on a content, on a specific index or 0.
Also some code refactoring has been made in this class.

											
										
										
											2022-01-09 22:49:37 +01:00
+								    }
-												[YouTube] Fix checkstyle issues

											
										
										
											2022-03-18 15:09:06 +01:00
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								    private static void extractClientVersionFromHtmlSearchResultsPage()
-												[YouTube] Try to use lighter requests when extracting client version and key from YouTube and YouTube Music

This is done by fetching https://www.youtube.com/sw.js for YouTube and https://music.youtube.com/sw.js for YouTube Music.

Two new methods in Utils class have been added which allow to try to get a match of regular expressions in a string array, or a Pattern array, on a content, on a specific index or 0.
Also some code refactoring has been made in this class.

											
										
										
											2022-01-09 22:49:37 +01:00
+								            throws IOException, ExtractionException {
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								        // Don't extract the InnerTube client version if it has been already extracted
 								        if (clientVersionExtracted) {
-												[YouTube] Try to use lighter requests when extracting client version and key from YouTube and YouTube Music

This is done by fetching https://www.youtube.com/sw.js for YouTube and https://music.youtube.com/sw.js for YouTube Music.

Two new methods in Utils class have been added which allow to try to get a match of regular expressions in a string array, or a Pattern array, on a content, on a specific index or 0.
Also some code refactoring has been made in this class.

											
										
										
											2022-01-09 22:49:37 +01:00
+								            return;
 								        }
-												[YouTube] Improve WEB client version and API key HTML extraction

Common code in WEB client version HTML extraction has been deduplicated, usage of the Java 8 Stream API has been made and initial data fallback has been used as a last resort.
This means that the client version extraction from regexes will be used before this fallback, as it doesn't contain the full client version.
This can be used as a way to fingerprint the extractor, even if it seems to be not the case.

											
										
										
											2022-08-08 19:36:19 +02:00
-												Use lightweight requests when getting and checking YouTube API key and client version

											
										
										
											2021-04-12 18:24:32 +02:00
+								        // Don't provide a search term in order to have a smaller response
-												Adress again reviews, fix some rebase issues

											
										
										
											2021-06-26 20:04:55 +02:00
+								        final String url = "https://www.youtube.com/results?search_query=&ucbcb=1";
-												Use Collections.singletonMap().

											
										
										
											2022-07-27 03:26:02 +02:00
+								        final String html = getDownloader().get(url, getCookieHeader()).responseBody();
-												Extract YouTube's key

											
										
										
											2020-07-26 12:00:56 +02:00
+								        final JsonObject initialData = getInitialData(html);
-												Use lightweight requests when getting and checking YouTube API key and client version

											
										
										
											2021-04-12 18:24:32 +02:00
+								        final JsonArray serviceTrackingParams = initialData.getObject("responseContext")
 								                .getArray("serviceTrackingParams");
-												[YouTube] Avoid crashing by letting exceptions bubble up

											
										
										
											2020-02-29 22:42:43 +01:00
-												Use lightweight requests when getting and checking YouTube API key and client version

											
										
										
											2021-04-12 18:24:32 +02:00
+								        // Try to get version from initial data first
-												[YouTube] Improve WEB client version and API key HTML extraction

Common code in WEB client version HTML extraction has been deduplicated, usage of the Java 8 Stream API has been made and initial data fallback has been used as a last resort.
This means that the client version extraction from regexes will be used before this fallback, as it doesn't contain the full client version.
This can be used as a way to fingerprint the extractor, even if it seems to be not the case.

											
										
										
											2022-08-08 19:36:19 +02:00
+								        final Stream<JsonObject> serviceTrackingParamsStream = serviceTrackingParams.stream()
 								                .filter(JsonObject.class::isInstance)
 								                .map(JsonObject.class::cast);
 								        clientVersion = getClientVersionFromServiceTrackingParam(
 								                serviceTrackingParamsStream, "CSI", "cver");
 								        if (clientVersion == null) {
 								            try {
 								                clientVersion = getStringResultFromRegexArray(html,
 								                        INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
 								            } catch (final Parser.RegexException ignored) {
-												Add getClientVersion()  and HARDCODED_CLIENT_VERSION to YouTubeParsingHelper

Prefer hardcoded client version above the current one when making requests to retrieve the same JSON structure for each request.
											
										
										
											2020-02-24 19:03:54 +01:00
+								            }
-												[YouTube] Avoid crashing by letting exceptions bubble up

											
										
										
											2020-02-29 22:42:43 +01:00
+								        }
-												Add getClientVersion()  and HARDCODED_CLIENT_VERSION to YouTubeParsingHelper

Prefer hardcoded client version above the current one when making requests to retrieve the same JSON structure for each request.
											
										
										
											2020-02-24 19:03:54 +01:00
-												[YouTube] Improve WEB client version and API key HTML extraction

Common code in WEB client version HTML extraction has been deduplicated, usage of the Java 8 Stream API has been made and initial data fallback has been used as a last resort.
This means that the client version extraction from regexes will be used before this fallback, as it doesn't contain the full client version.
This can be used as a way to fingerprint the extractor, even if it seems to be not the case.

											
										
										
											2022-08-08 19:36:19 +02:00
+								        // Fallback to get a shortened client version which does not contain the last two
 								        // digits
 								        if (isNullOrEmpty(clientVersion)) {
 								            clientVersion = getClientVersionFromServiceTrackingParam(
 								                    serviceTrackingParamsStream, "ECATCHER", "client.version");
 								        }
 								        if (clientVersion == null) {
 								            throw new ParsingException(
 								                    // CHECKSTYLE:OFF
 								                    "Could not extract YouTube WEB InnerTube client version from HTML search results page");
 								                    // CHECKSTYLE:ON
-												[YouTube] Avoid crashing by letting exceptions bubble up

											
										
										
											2020-02-29 22:42:43 +01:00
+								        }
-												[YouTube] Improve WEB client version and API key HTML extraction

Common code in WEB client version HTML extraction has been deduplicated, usage of the Java 8 Stream API has been made and initial data fallback has been used as a last resort.
This means that the client version extraction from regexes will be used before this fallback, as it doesn't contain the full client version.
This can be used as a way to fingerprint the extractor, even if it seems to be not the case.

											
										
										
											2022-08-08 19:36:19 +02:00
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								        clientVersionExtracted = true;
-												Extract YouTube's key

											
										
										
											2020-07-26 12:00:56 +02:00
+								    }
-												[YouTube] Improve WEB client version and API key HTML extraction

Common code in WEB client version HTML extraction has been deduplicated, usage of the Java 8 Stream API has been made and initial data fallback has been used as a last resort.
This means that the client version extraction from regexes will be used before this fallback, as it doesn't contain the full client version.
This can be used as a way to fingerprint the extractor, even if it seems to be not the case.

											
										
										
											2022-08-08 19:36:19 +02:00
+								    @Nullable
 								    private static String getClientVersionFromServiceTrackingParam(
 								            @Nonnull final Stream<JsonObject> serviceTrackingParamsStream,
 								            @Nonnull final String serviceName,
 								            @Nonnull final String clientVersionKey) {
 								        return serviceTrackingParamsStream.filter(serviceTrackingParam ->
 								                        serviceTrackingParam.getString("service", "")
 								                                .equals(serviceName))
 								                .flatMap(serviceTrackingParam -> serviceTrackingParam.getArray("params")
 								                        .stream())
 								                .filter(JsonObject.class::isInstance)
 								                .map(JsonObject.class::cast)
 								                .filter(param -> param.getString("key", "")
 								                        .equals(clientVersionKey))
 								                .map(param -> param.getString("value"))
 								                .filter(paramValue -> !isNullOrEmpty(paramValue))
 								                .findFirst()
 								                .orElse(null);
 								    }
-												Extract YouTube's key

											
										
										
											2020-07-26 12:00:56 +02:00
+								    /**
-												[YouTube] Update client versions and fix a bug when using resetClientVersionAndKey method

The boolean keyAndVersionExtracted in YoutubeParsingHelper was not set to false when resetting the client version and the key, which makes the extractor uses null on the next getting of the client version or the key if the clientVersion and the key were extracted before.
Also update client versions.

											
										
										
											2021-12-11 16:52:17 +01:00
+								     * Get the client version used by YouTube website on InnerTube requests.
-												Extract YouTube's key

											
										
										
											2020-07-26 12:00:56 +02:00
+								     */
 								    public static String getClientVersion() throws IOException, ExtractionException {
-												[YouTube] Fix checkstyle issues

											
										
										
											2022-03-18 15:09:06 +01:00
+								        if (!isNullOrEmpty(clientVersion)) {
 								            return clientVersion;
 								        }
-												Extract YouTube's key

											
										
										
											2020-07-26 12:00:56 +02:00
-												[YouTube] Improve WEB client version and API key HTML extraction

Common code in WEB client version HTML extraction has been deduplicated, usage of the Java 8 Stream API has been made and initial data fallback has been used as a last resort.
This means that the client version extraction from regexes will be used before this fallback, as it doesn't contain the full client version.
This can be used as a way to fingerprint the extractor, even if it seems to be not the case.

											
										
										
											2022-08-08 19:36:19 +02:00
+								        // Always extract the latest client version, by trying first to extract it from the
 								        // JavaScript service worker, then from HTML search results page as a fallback, to prevent
-												[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more

Use the TV embedded client technique to get streams of embeddable age-restricted videos.

This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.

The outdated code for these contents has been removed.

Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.

Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.

											
										
										
											2022-04-02 19:06:36 +02:00
+								        // fingerprinting based on the client version used
-												[YouTube] Try to use lighter requests when extracting client version and key from YouTube and YouTube Music

This is done by fetching https://www.youtube.com/sw.js for YouTube and https://music.youtube.com/sw.js for YouTube Music.

Two new methods in Utils class have been added which allow to try to get a match of regular expressions in a string array, or a Pattern array, on a content, on a specific index or 0.
Also some code refactoring has been made in this class.

											
										
										
											2022-01-09 22:49:37 +01:00
+								        try {
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								            extractClientVersionFromSwJs();
-												[YouTube] Try to use lighter requests when extracting client version and key from YouTube and YouTube Music

This is done by fetching https://www.youtube.com/sw.js for YouTube and https://music.youtube.com/sw.js for YouTube Music.

Two new methods in Utils class have been added which allow to try to get a match of regular expressions in a string array, or a Pattern array, on a content, on a specific index or 0.
Also some code refactoring has been made in this class.

											
										
										
											2022-01-09 22:49:37 +01:00
+								        } catch (final Exception e) {
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								            extractClientVersionFromHtmlSearchResultsPage();
-												[YouTube] Try to use lighter requests when extracting client version and key from YouTube and YouTube Music

This is done by fetching https://www.youtube.com/sw.js for YouTube and https://music.youtube.com/sw.js for YouTube Music.

Two new methods in Utils class have been added which allow to try to get a match of regular expressions in a string array, or a Pattern array, on a content, on a specific index or 0.
Also some code refactoring has been made in this class.

											
										
										
											2022-01-09 22:49:37 +01:00
+								        }
-												[YouTube] Update client versions and fix a bug when using resetClientVersionAndKey method

The boolean keyAndVersionExtracted in YoutubeParsingHelper was not set to false when resetting the client version and the key, which makes the extractor uses null on the next getting of the client version or the key if the clientVersion and the key were extracted before.
Also update client versions.

											
										
										
											2021-12-11 16:52:17 +01:00
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								        if (clientVersionExtracted) {
-												[YouTube] Update client versions and fix a bug when using resetClientVersionAndKey method

The boolean keyAndVersionExtracted in YoutubeParsingHelper was not set to false when resetting the client version and the key, which makes the extractor uses null on the next getting of the client version or the key if the clientVersion and the key were extracted before.
Also update client versions.

											
										
										
											2021-12-11 16:52:17 +01:00
+								            return clientVersion;
 								        }
-												Improve tests and randomness

- Use the existing RNG inside YoutubeParsingHelper
- Deduplicated test-setup for YouTube tests
- Minor improvements

											
										
										
											2022-02-07 21:23:38 +01:00
-												[YouTube] Improve WEB client version and API key HTML extraction

Common code in WEB client version HTML extraction has been deduplicated, usage of the Java 8 Stream API has been made and initial data fallback has been used as a last resort.
This means that the client version extraction from regexes will be used before this fallback, as it doesn't contain the full client version.
This can be used as a way to fingerprint the extractor, even if it seems to be not the case.

											
										
										
											2022-08-08 19:36:19 +02:00
+								        // Fallback to the hardcoded one if it is valid
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								        if (isHardcodedClientVersionValid()) {
-												Improve tests and randomness

- Use the existing RNG inside YoutubeParsingHelper
- Deduplicated test-setup for YouTube tests
- Minor improvements

											
										
										
											2022-02-07 21:23:38 +01:00
+								            clientVersion = HARDCODED_CLIENT_VERSION;
 								            return clientVersion;
 								        }
-												[YouTube] Update client versions and fix a bug when using resetClientVersionAndKey method

The boolean keyAndVersionExtracted in YoutubeParsingHelper was not set to false when resetting the client version and the key, which makes the extractor uses null on the next getting of the client version or the key if the clientVersion and the key were extracted before.
Also update client versions.

											
										
										
											2021-12-11 16:52:17 +01:00
+								        throw new ExtractionException("Could not get YouTube WEB client version");
-												Extract YouTube's key

											
										
										
											2020-07-26 12:00:56 +02:00
+								    }
-												Add comment for usage of YoutubeParsingHelper.resetClientVersionAndKey

											
										
										
											2021-01-14 20:01:52 +01:00
+								    /**
-												Add comment explaining YoutubeParsingHelper.resetClientVersionAndKey

											
										
										
											2021-02-17 19:21:39 +01:00
+								     * <p>
-												[YouTube] Update client versions and fix a bug when using resetClientVersionAndKey method

The boolean keyAndVersionExtracted in YoutubeParsingHelper was not set to false when resetting the client version and the key, which makes the extractor uses null on the next getting of the client version or the key if the clientVersion and the key were extracted before.
Also update client versions.

											
										
										
											2021-12-11 16:52:17 +01:00
+								     * <b>Only used in tests.</b>
-												Add comment explaining YoutubeParsingHelper.resetClientVersionAndKey

											
										
										
											2021-02-17 19:21:39 +01:00
+								     * </p>
-												Add comment for usage of YoutubeParsingHelper.resetClientVersionAndKey

											
										
										
											2021-01-14 20:01:52 +01:00
+								     *
-												Add comment explaining YoutubeParsingHelper.resetClientVersionAndKey

											
										
										
											2021-02-17 19:21:39 +01:00
+								     * <p>
-												Add comment for usage of YoutubeParsingHelper.resetClientVersionAndKey

											
										
										
											2021-01-14 20:01:52 +01:00
+								     * Quick-and-dirty solution to reset global state in between test classes.
-												Add comment explaining YoutubeParsingHelper.resetClientVersionAndKey

											
										
										
											2021-02-17 19:21:39 +01:00
+								     * </p>
 								     * <p>
 								     * This is needed for the mocks because in order to reach that state a network request has to
 								     * be made. If the global state is not reset and the RecordingDownloader is used,
 								     * then only the first test class has that request recorded. Meaning running the other
 								     * tests with mocks will fail, because the mock is missing.
 								     * </p>
-												Add comment for usage of YoutubeParsingHelper.resetClientVersionAndKey

											
										
										
											2021-01-14 20:01:52 +01:00
+								     */
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								    public static void resetClientVersion() {
-												Add method to allow resetting youtube client version and key

This is needed so that a request is made for each test class when running multiple at once. This way RecordingDownloader records all necessary requests.
This works as long as tests are run sequentially and not in parallel.

											
										
										
											2021-01-10 20:24:50 +01:00
+								        clientVersion = null;
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								        clientVersionExtracted = false;
-												Add method to allow resetting youtube client version and key

This is needed so that a request is made for each test class when running multiple at once. This way RecordingDownloader records all necessary requests.
This works as long as tests are run sequentially and not in parallel.

											
										
										
											2021-01-10 20:24:50 +01:00
+								    }
-												Add method to inject Random into YoutubeParsingHelper and use in tests

											
										
										
											2021-04-08 16:36:55 +02:00
+								    /**
 								     * <p>
-												[YouTube] Update client versions and fix a bug when using resetClientVersionAndKey method

The boolean keyAndVersionExtracted in YoutubeParsingHelper was not set to false when resetting the client version and the key, which makes the extractor uses null on the next getting of the client version or the key if the clientVersion and the key were extracted before.
Also update client versions.

											
										
										
											2021-12-11 16:52:17 +01:00
+								     * <b>Only used in tests.</b>
-												Add method to inject Random into YoutubeParsingHelper and use in tests

											
										
										
											2021-04-08 16:36:55 +02:00
+								     * </p>
 								     */
-												Adress again reviews, fix some rebase issues

											
										
										
											2021-06-26 20:04:55 +02:00
+								    public static void setNumberGenerator(final Random random) {
-												Add method to inject Random into YoutubeParsingHelper and use in tests

											
										
										
											2021-04-08 16:36:55 +02:00
+								        numberGenerator = random;
 								    }
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								    public static boolean isHardcodedYoutubeMusicClientVersionValid() throws IOException,
-												Adress reviews and do some improvements

Adress changes requested in reviews.
Do some improvements, remove unused imports and format some code to be in the 100 characters line limit.

											
										
										
											2021-04-25 18:54:26 +02:00
+								            ReCaptchaException {
-												Don't use the youtubei.googleapis.com but the websites domains + update client version of the desktop internal API

Use again www.youtube.com and music.youtube.com domains instead of youtubei.googleapis.com domain because it spoofs more a web client of YouTube or YouTube Music and may reduce Google's detection of NewPipe Extractor users.

											
										
										
											2021-05-09 16:14:37 +02:00
+								        final String url =
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								                "https://music.youtube.com/youtubei/v1/music/get_search_suggestions?"
 								                        + DISABLE_PRETTY_PRINT_PARAMETER;
-												Improve getYoutubeMusicKeys()

											
										
										
											2020-03-20 11:05:19 +01:00
 								        // @formatter:off
-												[YouTube] Fix checkstyle issues

											
										
										
											2022-03-18 15:09:06 +01:00
+								        final byte[] json = JsonWriter.string()
-												Improve getYoutubeMusicKeys()

											
										
										
											2020-03-20 11:05:19 +01:00
+								            .object()
 								                .object("context")
 								                    .object("client")
 								                        .value("clientName", "WEB_REMIX")
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								                        .value("clientVersion", HARDCODED_YOUTUBE_MUSIC_CLIENT_VERSION)
-												Don't use the youtubei.googleapis.com but the websites domains + update client version of the desktop internal API

Use again www.youtube.com and music.youtube.com domains instead of youtubei.googleapis.com domain because it spoofs more a web client of YouTube or YouTube Music and may reduce Google's detection of NewPipe Extractor users.

											
										
										
											2021-05-09 16:14:37 +02:00
+								                        .value("hl", "en-GB")
-												Improve getYoutubeMusicKeys()

											
										
										
											2020-03-20 11:05:19 +01:00
+								                        .value("gl", "GB")
-												[YouTube] Improve payloads and URLs of InnerTube requests

For every InnerTube request:
- Always add a `request` object with the following properties:
  - "internalExperimentFlags" set to an empty array;
  - "useSsl" set to "true";
  - "lockedSafetyMode" set to "false".
- Use proper TODO comment to provide a way to enable restricted mode on every
request and add it on requests on which it wasn't present.

For YouTube Music:
- Remove alt query parameter, as it is not used anymore by the website;
- Add prettyPrint query parameter with false value on YouTube Music search
continuations.

											
										
										
											2023-10-07 15:00:40 +02:00
+								                        .value("platform", "DESKTOP")
-												[YouTube] Add utcOffsetMinutes to all InnerTube payloads

This should make returned dates consistent between timezones and countries on
which the extractor is ran.

It was previously only set on YouTube Music search continuations.

											
										
										
											2023-10-07 15:04:36 +02:00
+								                        .value("utcOffsetMinutes", 0)
-												Improve getYoutubeMusicKeys()

											
										
										
											2020-03-20 11:05:19 +01:00
+								                    .end()
 								                    .object("request")
-												[YouTube] Improve payloads and URLs of InnerTube requests

For every InnerTube request:
- Always add a `request` object with the following properties:
  - "internalExperimentFlags" set to an empty array;
  - "useSsl" set to "true";
  - "lockedSafetyMode" set to "false".
- Use proper TODO comment to provide a way to enable restricted mode on every
request and add it on requests on which it wasn't present.

For YouTube Music:
- Remove alt query parameter, as it is not used anymore by the website;
- Add prettyPrint query parameter with false value on YouTube Music search
continuations.

											
										
										
											2023-10-07 15:00:40 +02:00
+								                        .array("internalExperimentFlags")
 								                        .end()
 								                        .value("useSsl", true)
-												Improve getYoutubeMusicKeys()

											
										
										
											2020-03-20 11:05:19 +01:00
+								                    .end()
 								                    .object("user")
-												[YouTube] Improve payloads and URLs of InnerTube requests

For every InnerTube request:
- Always add a `request` object with the following properties:
  - "internalExperimentFlags" set to an empty array;
  - "useSsl" set to "true";
  - "lockedSafetyMode" set to "false".
- Use proper TODO comment to provide a way to enable restricted mode on every
request and add it on requests on which it wasn't present.

For YouTube Music:
- Remove alt query parameter, as it is not used anymore by the website;
- Add prettyPrint query parameter with false value on YouTube Music search
continuations.

											
										
										
											2023-10-07 15:00:40 +02:00
+								                        // TODO: provide a way to enable restricted mode with:
 								                        //  .value("enableSafetyMode", boolean)
 								                        .value("lockedSafetyMode", false)
-												Improve getYoutubeMusicKeys()

											
										
										
											2020-03-20 11:05:19 +01:00
+								                    .end()
 								                .end()
-												Use the youtubei.googleapis.com domain for YouTube Music searches + change a check + update client version and mocks

Change the domain from music.youtube.com to youtubei.googleapis.com.
Use a lightweight request to check if the hardcoded YouTubeMusic keys are valid. Increase the length of the response to 500 because if the key is invalid, the length of the response returned is higher than 250 and the response when the key is valid is higher than 1500.
Format the YoutubeMusicSearchExtractor file.
Update YouTube web client version and mocks

											
										
										
											2021-04-11 17:41:40 +02:00
+								                .value("input", "")
-												Use StandardCharsets.UTF_8.

											
										
										
											2022-07-28 04:19:21 +02:00
+								            .end().done().getBytes(StandardCharsets.UTF_8);
-												Improve getYoutubeMusicKeys()

											
										
										
											2020-03-20 11:05:19 +01:00
+								        // @formatter:on
-												Use immutable Map factory methods.

											
										
										
											2022-11-12 05:01:05 +01:00
+								        final var headers = new HashMap<>(getOriginReferrerHeaders(YOUTUBE_MUSIC_URL));
-												[YouTube] Create constants for client names/versions

											
										
										
											2024-04-20 11:43:54 +02:00
+								        headers.putAll(getClientHeaders(YOUTUBE_MUSIC_CLIENT_ID,
 								                HARDCODED_YOUTUBE_MUSIC_CLIENT_VERSION));
-												Improve getYoutubeMusicKeys()

											
										
										
											2020-03-20 11:05:19 +01:00
-												Use Downloader's postWithContentType and postWithContentTypeJson methods in services and extractors

											
										
										
											2022-07-15 20:56:37 +02:00
+								        final Response response = getDownloader().postWithContentTypeJson(url, headers, json);
-												Adress reviews and do some improvements

Adress changes requested in reviews.
Do some improvements, remove unused imports and format some code to be in the 100 characters line limit.

											
										
										
											2021-04-25 18:54:26 +02:00
+								        // Ensure to have a valid response
-												Adress requested changes in YoutubeParsingHelper

											
										
										
											2021-06-06 15:39:45 +02:00
+								        return response.responseBody().length() > 500 && response.responseCode() == 200;
-												Improve getYoutubeMusicKeys()

											
										
										
											2020-03-20 11:05:19 +01:00
+								    }
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								    public static String getYoutubeMusicClientVersion()
-												[YouTube] Try to use lighter requests when extracting client version and key from YouTube and YouTube Music

This is done by fetching https://www.youtube.com/sw.js for YouTube and https://music.youtube.com/sw.js for YouTube Music.

Two new methods in Utils class have been added which allow to try to get a match of regular expressions in a string array, or a Pattern array, on a content, on a specific index or 0.
Also some code refactoring has been made in this class.

											
										
										
											2022-01-09 22:49:37 +01:00
+								            throws IOException, ReCaptchaException, Parser.RegexException {
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								        if (!isNullOrEmpty(youtubeMusicClientVersion)) {
 								            return youtubeMusicClientVersion;
-												[YouTube] Fix checkstyle issues

											
										
										
											2022-03-18 15:09:06 +01:00
+								        }
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								        if (isHardcodedYoutubeMusicClientVersionValid()) {
 								            youtubeMusicClientVersion = HARDCODED_YOUTUBE_MUSIC_CLIENT_VERSION;
 								            return youtubeMusicClientVersion;
-												Adress reviews and do some improvements

Adress changes requested in reviews.
Do some improvements, remove unused imports and format some code to be in the 100 characters line limit.

											
										
										
											2021-04-25 18:54:26 +02:00
+								        }
-												Add support for YouTube Music search

											
										
										
											2020-03-17 11:33:39 +01:00
-												Improve getYoutubeMusicKeys()

											
										
										
											2020-03-20 11:05:19 +01:00
+								        try {
-												[YouTube] Try to use lighter requests when extracting client version and key from YouTube and YouTube Music

This is done by fetching https://www.youtube.com/sw.js for YouTube and https://music.youtube.com/sw.js for YouTube Music.

Two new methods in Utils class have been added which allow to try to get a match of regular expressions in a string array, or a Pattern array, on a content, on a specific index or 0.
Also some code refactoring has been made in this class.

											
										
										
											2022-01-09 22:49:37 +01:00
+								            final String url = "https://music.youtube.com/sw.js";
-												[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links

webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.

The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).

As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.

URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.

As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).

											
										
										
											2023-02-20 13:21:55 +01:00
+								            final var headers = getOriginReferrerHeaders(YOUTUBE_MUSIC_URL);
-												[YouTube] Try to use lighter requests when extracting client version and key from YouTube and YouTube Music

This is done by fetching https://www.youtube.com/sw.js for YouTube and https://music.youtube.com/sw.js for YouTube Music.

Two new methods in Utils class have been added which allow to try to get a match of regular expressions in a string array, or a Pattern array, on a content, on a specific index or 0.
Also some code refactoring has been made in this class.

											
										
										
											2022-01-09 22:49:37 +01:00
+								            final String response = getDownloader().get(url, headers).responseBody();
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
 								            youtubeMusicClientVersion = getStringResultFromRegexArray(response,
-												Use immutable Map factory methods.

											
										
										
											2022-11-12 05:01:05 +01:00
+								                    INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
-												[YouTube] Try to use lighter requests when extracting client version and key from YouTube and YouTube Music

This is done by fetching https://www.youtube.com/sw.js for YouTube and https://music.youtube.com/sw.js for YouTube Music.

Two new methods in Utils class have been added which allow to try to get a match of regular expressions in a string array, or a Pattern array, on a content, on a specific index or 0.
Also some code refactoring has been made in this class.

											
										
										
											2022-01-09 22:49:37 +01:00
+								        } catch (final Exception e) {
-												[YouTube] Fix extraction of YouTube Music client version and API key when using YouTube Music's website in EU

Google returns now the consent page of YouTube for YouTube Music in EU, which can be also avoided by adding the ucbcb parameter to the URL with the value 1 ("?ucbcb=1").

											
										
										
											2022-05-15 11:20:06 +02:00
+								            final String url = "https://music.youtube.com/?ucbcb=1";
-												Use Collections.singletonMap().

											
										
										
											2022-07-27 03:26:02 +02:00
+								            final String html = getDownloader().get(url, getCookieHeader()).responseBody();
-												[YouTube] Try to use lighter requests when extracting client version and key from YouTube and YouTube Music

This is done by fetching https://www.youtube.com/sw.js for YouTube and https://music.youtube.com/sw.js for YouTube Music.

Two new methods in Utils class have been added which allow to try to get a match of regular expressions in a string array, or a Pattern array, on a content, on a specific index or 0.
Also some code refactoring has been made in this class.

											
										
										
											2022-01-09 22:49:37 +01:00
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								            youtubeMusicClientVersion = getStringResultFromRegexArray(html,
-												fix: set musicClientVersion regex capture group

											
										
										
											2023-04-16 19:25:05 +02:00
+								                    INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
-												Improve getYoutubeMusicKeys()

											
										
										
											2020-03-20 11:05:19 +01:00
+								        }
-												Add support for YouTube Music search

											
										
										
											2020-03-17 11:33:39 +01:00
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								        return youtubeMusicClientVersion;
-												Add support for YouTube Music search

											
										
										
											2020-03-17 11:33:39 +01:00
+								    }
-												Make some vars final and add annotations to methods


											
										
										
											2020-12-15 17:21:21 +01:00
+								    @Nullable
-												[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links

webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.

The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).

As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.

URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.

As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).

											
										
										
											2023-02-20 13:21:55 +01:00
+								    public static String getUrlFromNavigationEndpoint(
 								            @Nonnull final JsonObject navigationEndpoint) {
-												Use our fork of nanojson

											
										
										
											2020-04-16 16:08:14 +02:00
+								        if (navigationEndpoint.has("urlEndpoint")) {
-												[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links

webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.

The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).

As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.

URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.

As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).

											
										
										
											2023-02-20 13:21:55 +01:00
+								            String internUrl = navigationEndpoint.getObject("urlEndpoint")
 								                    .getString("url");
-												fix tests

											
										
										
											2021-02-12 22:22:11 +01:00
+								            if (internUrl.startsWith("https://www.youtube.com/redirect?")) {
-												add comment explaining why .substring(23)

											
										
										
											2021-02-13 12:10:41 +01:00
+								                // remove https://www.youtube.com part to fall in the next if block
-												fix tests

											
										
										
											2021-02-12 22:22:11 +01:00
+								                internUrl = internUrl.substring(23);
 								            }
-												Implement getTextFromObject() function

											
										
										
											2020-02-27 17:39:23 +01:00
+								            if (internUrl.startsWith("/redirect?")) {
 								                // q parameter can be the first parameter
 								                internUrl = internUrl.substring(10);
-												[YouTube] Fix checkstyle issues

											
										
										
											2022-03-18 15:09:06 +01:00
+								                final String[] params = internUrl.split("&");
 								                for (final String param : params) {
-												Implement getTextFromObject() function

											
										
										
											2020-02-27 17:39:23 +01:00
+								                    if (param.split("=")[0].equals("q")) {
 								                        try {
-												Add Utils methods for URL encoding/decoding using UTF-8.

											
										
										
											2022-08-09 04:03:29 +02:00
+								                            return Utils.decodeUrlUtf8(param.split("=")[1]);
-												Use the youtubei API for YouTube playlists

											
										
										
											2021-04-08 16:17:59 +02:00
+								                        } catch (final UnsupportedEncodingException e) {
-												Implement getTextFromObject() function

											
										
										
											2020-02-27 17:39:23 +01:00
+								                            return null;
 								                        }
 								                    }
 								                }
 								            } else if (internUrl.startsWith("http")) {
 								                return internUrl;
-												Adress reviews and do some improvements

Adress changes requested in reviews.
Do some improvements, remove unused imports and format some code to be in the 100 characters line limit.

											
										
										
											2021-04-25 18:54:26 +02:00
+								            } else if (internUrl.startsWith("/channel") || internUrl.startsWith("/user")
 								                    || internUrl.startsWith("/watch")) {
-												fix tests

											
										
										
											2021-02-12 22:22:11 +01:00
+								                return "https://www.youtube.com" + internUrl;
-												Implement getTextFromObject() function

											
										
										
											2020-02-27 17:39:23 +01:00
+								            }
-												[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links

webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.

The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).

As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.

URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.

As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).

											
										
										
											2023-02-20 13:21:55 +01:00
+								        }
 								        if (navigationEndpoint.has("browseEndpoint")) {
-												[YouTube] Fix bug when url isn't present in the browseEndpoint object

											
										
										
											2020-02-29 22:57:25 +01:00
+								            final JsonObject browseEndpoint = navigationEndpoint.getObject("browseEndpoint");
 								            final String canonicalBaseUrl = browseEndpoint.getString("canonicalBaseUrl");
 								            final String browseId = browseEndpoint.getString("browseId");
 								            // All channel ids are prefixed with UC
 								            if (browseId != null && browseId.startsWith("UC")) {
 								                return "https://www.youtube.com/channel/" + browseId;
 								            }
-												refactor: add Utils.isNullOrEmpty()

											
										
										
											2020-04-15 18:49:58 +02:00
+								            if (!isNullOrEmpty(canonicalBaseUrl)) {
-												[YouTube] Fix bug when url isn't present in the browseEndpoint object

											
										
										
											2020-02-29 22:57:25 +01:00
+								                return "https://www.youtube.com" + canonicalBaseUrl;
 								            }
-												[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links

webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.

The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).

As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.

URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.

As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).

											
										
										
											2023-02-20 13:21:55 +01:00
+								        }
-												[YouTube] Fix bug when url isn't present in the browseEndpoint object

											
										
										
											2020-02-29 22:57:25 +01:00
-												[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links

webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.

The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).

As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.

URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.

As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).

											
										
										
											2023-02-20 13:21:55 +01:00
+								        if (navigationEndpoint.has("watchEndpoint")) {
-												[YouTube] Fix checkstyle issues

											
										
										
											2022-03-18 15:09:06 +01:00
+								            final StringBuilder url = new StringBuilder();
-												[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links

webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.

The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).

As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.

URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.

As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).

											
										
										
											2023-02-20 13:21:55 +01:00
+								            url.append("https://www.youtube.com/watch?v=")
 								                    .append(navigationEndpoint.getObject("watchEndpoint")
 								                            .getString(VIDEO_ID));
-												[Youtube] Add cookies to youtube mix request

This way youtube wont return duplicates when getting more items of the mix (but youtube can also track us)

											
										
										
											2020-04-16 19:28:27 +02:00
+								            if (navigationEndpoint.getObject("watchEndpoint").has("playlistId")) {
-												[YouTube] Don't escape & in getUrlFromNavigationEndpoint for playlists

											
										
										
											2020-12-12 20:40:13 +01:00
+								                url.append("&list=").append(navigationEndpoint.getObject("watchEndpoint")
-												[Youtube] Add cookies to youtube mix request

This way youtube wont return duplicates when getting more items of the mix (but youtube can also track us)

											
										
										
											2020-04-16 19:28:27 +02:00
+								                        .getString("playlistId"));
 								            }
 								            if (navigationEndpoint.getObject("watchEndpoint").has("startTimeSeconds")) {
-												[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links

webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.

The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).

As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.

URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.

As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).

											
										
										
											2023-02-20 13:21:55 +01:00
+								                url.append("&t=")
 								                        .append(navigationEndpoint.getObject("watchEndpoint")
-												[Youtube] Add cookies to youtube mix request

This way youtube wont return duplicates when getting more items of the mix (but youtube can also track us)

											
										
										
											2020-04-16 19:28:27 +02:00
+								                        .getInt("startTimeSeconds"));
 								            }
-												Implement getTextFromObject() function

											
										
										
											2020-02-27 17:39:23 +01:00
+								            return url.toString();
-												[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links

webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.

The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).

As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.

URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.

As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).

											
										
										
											2023-02-20 13:21:55 +01:00
+								        }
 								        if (navigationEndpoint.has("watchPlaylistEndpoint")) {
-												[YouTube] Fix checkstyle issues

											
										
										
											2022-03-18 15:09:06 +01:00
+								            return "https://www.youtube.com/playlist?list="
-												[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links

webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.

The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).

As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.

URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.

As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).

											
										
										
											2023-02-20 13:21:55 +01:00
+								                    + navigationEndpoint.getObject("watchPlaylistEndpoint")
 								                    .getString("playlistId");
-												Implement getTextFromObject() function

											
										
										
											2020-02-27 17:39:23 +01:00
+								        }
-												[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links

webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.

The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).

As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.

URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.

As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).

											
										
										
											2023-02-20 13:21:55 +01:00
 								        if (navigationEndpoint.has("commandMetadata")) {
 								            final JsonObject metadata = navigationEndpoint.getObject("commandMetadata")
 								                    .getObject("webCommandMetadata");
 								            if (metadata.has("url")) {
 								                return "https://www.youtube.com" + metadata.getString("url");
 								            }
 								        }
-												Implement getTextFromObject() function

											
										
										
											2020-02-27 17:39:23 +01:00
+								        return null;
 								    }
-												Implement @TobiGr's suggestions

											
										
										
											2020-04-20 14:27:33 +02:00
+								    /**
-												Adress requested changes

											
										
										
											2022-02-05 22:05:07 +01:00
+								     * Get the text from a JSON object that has either a {@code simpleText} or a {@code runs}
 								     * array.
-												use only one constant UTF-8

											
										
										
											2021-02-07 22:12:22 +01:00
+								     *
-												Implement @TobiGr's suggestions

											
										
										
											2020-04-20 14:27:33 +02:00
+								     * @param textObject JSON object to get the text from
-												Adress requested changes

											
										
										
											2022-02-05 22:05:07 +01:00
+								     * @param html       whether to return HTML, by parsing the {@code navigationEndpoint}
-												Return null instead of "" in getTextFromObject()

											
										
										
											2020-05-01 13:55:15 +02:00
+								     * @return text in the JSON object or {@code null}
-												Implement @TobiGr's suggestions

											
										
										
											2020-04-20 14:27:33 +02:00
+								     */
-												Make some vars final and add annotations to methods


											
										
										
											2020-12-15 17:21:21 +01:00
+								    @Nullable
-												[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links

webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.

The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).

As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.

URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.

As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).

											
										
										
											2023-02-20 13:21:55 +01:00
+								    public static String getTextFromObject(final JsonObject textObject, final boolean html) {
-												[YouTube] Fix checkstyle issues

											
										
										
											2022-03-18 15:09:06 +01:00
+								        if (isNullOrEmpty(textObject)) {
 								            return null;
 								        }
-												Return null instead of "" in getTextFromObject()

											
										
										
											2020-05-01 13:55:15 +02:00
-												[YouTube] Fix checkstyle issues

											
										
										
											2022-03-18 15:09:06 +01:00
+								        if (textObject.has("simpleText")) {
 								            return textObject.getString("simpleText");
 								        }
-												Implement getTextFromObject() function

											
										
										
											2020-02-27 17:39:23 +01:00
-												[YouTube] Fix checkstyle issues

											
										
										
											2022-03-18 15:09:06 +01:00
+								        if (textObject.getArray("runs").isEmpty()) {
 								            return null;
 								        }
-												Return null instead of "" in getTextFromObject()

											
										
										
											2020-05-01 13:55:15 +02:00
-												Make some vars final and add annotations to methods


											
										
										
											2020-12-15 17:21:21 +01:00
+								        final StringBuilder textBuilder = new StringBuilder();
-												Implement bold/italic/strike-through support.


											
										
										
											2022-11-28 17:58:10 +01:00
+								        for (final Object o : textObject.getArray("runs")) {
 								            final JsonObject run = (JsonObject) o;
 								            String text = run.getString("text");
 								            if (html) {
 								                if (run.has("navigationEndpoint")) {
-												[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links

webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.

The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).

As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.

URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.

As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).

											
										
										
											2023-02-20 13:21:55 +01:00
+								                    final String url = getUrlFromNavigationEndpoint(
 								                            run.getObject("navigationEndpoint"));
-												Implement bold/italic/strike-through support.


											
										
										
											2022-11-28 17:58:10 +01:00
+								                    if (!isNullOrEmpty(url)) {
-												[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links

webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.

The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).

As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.

URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.

As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).

											
										
										
											2023-02-20 13:21:55 +01:00
+								                        text = "<a href=\"" + Entities.escape(url) + "\">" + Entities.escape(text)
 								                                + "</a>";
-												Implement bold/italic/strike-through support.


											
										
										
											2022-11-28 17:58:10 +01:00
+								                    }
 								                }
 								                final boolean bold = run.has("bold")
 								                        && run.getBoolean("bold");
 								                final boolean italic = run.has("italics")
 								                        && run.getBoolean("italics");
 								                final boolean strikethrough = run.has("strikethrough")
 								                        && run.getBoolean("strikethrough");
 								                if (bold) {
 								                    textBuilder.append("<b>");
 								                }
 								                if (italic) {
 								                    textBuilder.append("<i>");
 								                }
 								                if (strikethrough) {
 								                    textBuilder.append("<s>");
 								                }
-												[YouTube] Fix escaping links in YouTubeParsingHelper.getTextFromObject


											
										
										
											2023-01-05 00:28:12 +01:00
+								                textBuilder.append(text);
-												Implement bold/italic/strike-through support.


											
										
										
											2022-11-28 17:58:10 +01:00
 								                if (strikethrough) {
 								                    textBuilder.append("</s>");
 								                }
 								                if (italic) {
 								                    textBuilder.append("</i>");
 								                }
 								                if (bold) {
 								                    textBuilder.append("</b>");
-												Implement getTextFromObject() function

											
										
										
											2020-02-27 17:39:23 +01:00
+								                }
-												Implement bold/italic/strike-through support.


											
										
										
											2022-11-28 17:58:10 +01:00
+								            } else {
 								                textBuilder.append(text);
-												Implement getTextFromObject() function

											
										
										
											2020-02-27 17:39:23 +01:00
+								            }
 								        }
 								        String text = textBuilder.toString();
 								        if (html) {
 								            text = text.replaceAll("\\n", "<br>");
-												[YouTube] Fix regex warning: use ' {2}' instead of '  '

											
										
										
											2022-03-18 17:21:10 +01:00
+								            text = text.replaceAll(" {2}", " &nbsp;");
-												Implement getTextFromObject() function

											
										
										
											2020-02-27 17:39:23 +01:00
+								        }
 								        return text;
 								    }
-												[YouTube] Implement emergency meta info

YouTube provides that meta info panel when users search for really sensitive content like suicide (e.g. "blue whale").

It contains:
- an encouragement as title (e.g. "We are with you")
- a phone number as action
- details about how to call the phone number (e.g. availability)
- an url pointing to the website of an association

Also add a test that just checks if a meta info is properly extracted

											
										
										
											2023-12-07 20:47:02 +01:00
+								    @Nonnull
 								    public static String getTextFromObjectOrThrow(final JsonObject textObject, final String error)
 								            throws ParsingException {
 								        final String result = getTextFromObject(textObject);
 								        if (result == null) {
 								            throw new ParsingException("Could not extract text: " + error);
 								        }
 								        return result;
 								    }
-												Make some vars final and add annotations to methods


											
										
										
											2020-12-15 17:21:21 +01:00
+								    @Nullable
-												[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links

webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.

The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).

As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.

URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.

As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).

											
										
										
											2023-02-20 13:21:55 +01:00
+								    public static String getTextFromObject(final JsonObject textObject) {
-												Implement getTextFromObject() function

											
										
										
											2020-02-27 17:39:23 +01:00
+								        return getTextFromObject(textObject, false);
 								    }
-												Actually fix thumbnail URLs starting with //

											
										
										
											2020-02-28 09:36:33 +01:00
-												Add uploader url and verified to PlaylistInfoItem.


											
										
										
											2022-10-29 23:43:04 +02:00
+								    @Nullable
-												[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links

webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.

The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).

As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.

URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.

As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).

											
										
										
											2023-02-20 13:21:55 +01:00
+								    public static String getUrlFromObject(final JsonObject textObject) {
-												Add uploader url and verified to PlaylistInfoItem.


											
										
										
											2022-10-29 23:43:04 +02:00
+								        if (isNullOrEmpty(textObject)) {
 								            return null;
 								        }
 								        if (textObject.getArray("runs").isEmpty()) {
 								            return null;
 								        }
 								        for (final Object textPart : textObject.getArray("runs")) {
 								            final String url = getUrlFromNavigationEndpoint(((JsonObject) textPart)
 								                    .getObject("navigationEndpoint"));
 								            if (!isNullOrEmpty(url)) {
 								                return url;
 								            }
 								        }
 								        return null;
 								    }
-												[YouTube] Fix trending getName()

											
										
										
											2021-03-24 09:04:43 +01:00
+								    @Nullable
-												[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links

webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.

The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).

As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.

URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.

As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).

											
										
										
											2023-02-20 13:21:55 +01:00
+								    public static String getTextAtKey(@Nonnull final JsonObject jsonObject, final String theKey) {
-												[YouTube] Fix checkstyle issues

											
										
										
											2022-03-18 15:09:06 +01:00
+								        if (jsonObject.isString(theKey)) {
 								            return jsonObject.getString(theKey);
-												[YouTube] Fix trending getName()

											
										
										
											2021-03-24 09:04:43 +01:00
+								        } else {
-												[YouTube] Fix checkstyle issues

											
										
										
											2022-03-18 15:09:06 +01:00
+								            return getTextFromObject(jsonObject.getObject(theKey));
-												[YouTube] Fix trending getName()

											
										
										
											2021-03-24 09:04:43 +01:00
+								        }
 								    }
-												[YouTube] Fix checkstyle issues

											
										
										
											2022-03-18 15:09:06 +01:00
+								    public static String fixThumbnailUrl(@Nonnull final String thumbnailUrl) {
 								        String result = thumbnailUrl;
 								        if (result.startsWith("//")) {
 								            result = result.substring(2);
-												Actually fix thumbnail URLs starting with //

											
										
										
											2020-02-28 09:36:33 +01:00
+								        }
-												[YouTube] Fix checkstyle issues

											
										
										
											2022-03-18 15:09:06 +01:00
+								        if (result.startsWith(HTTP)) {
 								            result = Utils.replaceHttpWithHttps(result);
 								        } else if (!result.startsWith(HTTPS)) {
 								            result = "https://" + result;
-												Actually fix thumbnail URLs starting with //

											
										
										
											2020-02-28 09:36:33 +01:00
+								        }
-												[YouTube] Fix checkstyle issues

											
										
										
											2022-03-18 15:09:06 +01:00
+								        return result;
-												Actually fix thumbnail URLs starting with //

											
										
										
											2020-02-28 09:36:33 +01:00
+								    }
-												Create getJsonResponse() function

											
										
										
											2020-02-29 16:42:04 +01:00
-												[YouTube] Add utility methods to get images from InfoItems and thumbnails arrays

Unmodifiable lists of Images are returned, parsed from a given YouTube
"thumbnails" JSON array.

These methods will be used in all YouTube extractors and InfoItems, as the
structures between content types (videos, channels, playlists, ...) are common.

											
										
										
											2022-07-22 17:28:39 +02:00
+								    /**
 								     * Get thumbnails from a {@link JsonObject} representing a YouTube
 								     * {@link org.schabi.newpipe.extractor.InfoItem InfoItem}.
 								     *
 								     * <p>
 								     * Thumbnails are got from the {@code thumbnails} {@link JsonArray} inside the {@code thumbnail}
 								     * {@link JsonObject} of the YouTube {@link org.schabi.newpipe.extractor.InfoItem InfoItem},
 								     * using {@link #getImagesFromThumbnailsArray(JsonArray)}.
 								     * </p>
 								     *
 								     * @param infoItem a YouTube {@link org.schabi.newpipe.extractor.InfoItem InfoItem}
 								     * @return an unmodifiable list of {@link Image}s found in the {@code thumbnails}
 								     * {@link JsonArray}
 								     * @throws ParsingException if an exception occurs when
 								     *                          {@link #getImagesFromThumbnailsArray(JsonArray)} is executed
 								     */
 								    @Nonnull
 								    public static List<Image> getThumbnailsFromInfoItem(@Nonnull final JsonObject infoItem)
-												[YouTube] Extract mixes from streams related items

											
										
										
											2022-02-02 20:23:11 +01:00
+								            throws ParsingException {
 								        try {
-												[YouTube] Add utility methods to get images from InfoItems and thumbnails arrays

Unmodifiable lists of Images are returned, parsed from a given YouTube
"thumbnails" JSON array.

These methods will be used in all YouTube extractors and InfoItems, as the
structures between content types (videos, channels, playlists, ...) are common.

											
										
										
											2022-07-22 17:28:39 +02:00
+								            return getImagesFromThumbnailsArray(infoItem.getObject("thumbnail")
 								                    .getArray("thumbnails"));
-												[YouTube] Extract mixes from streams related items

											
										
										
											2022-02-02 20:23:11 +01:00
+								        } catch (final Exception e) {
-												[YouTube] Add utility methods to get images from InfoItems and thumbnails arrays

Unmodifiable lists of Images are returned, parsed from a given YouTube
"thumbnails" JSON array.

These methods will be used in all YouTube extractors and InfoItems, as the
structures between content types (videos, channels, playlists, ...) are common.

											
										
										
											2022-07-22 17:28:39 +02:00
+								            throw new ParsingException("Could not get thumbnails from InfoItem", e);
-												[YouTube] Extract mixes from streams related items

											
										
										
											2022-02-02 20:23:11 +01:00
+								        }
 								    }
-												[YouTube] Add utility methods to get images from InfoItems and thumbnails arrays

Unmodifiable lists of Images are returned, parsed from a given YouTube
"thumbnails" JSON array.

These methods will be used in all YouTube extractors and InfoItems, as the
structures between content types (videos, channels, playlists, ...) are common.

											
										
										
											2022-07-22 17:28:39 +02:00
+								    /**
 								     * Get images from a YouTube {@code thumbnails} {@link JsonArray}.
 								     *
 								     * <p>
 								     * The properties of the {@link Image}s created will be set using the corresponding ones of
 								     * thumbnail items.
 								     * </p>
 								     *
 								     * @param thumbnails a YouTube {@code thumbnails} {@link JsonArray}
 								     * @return an unmodifiable list of {@link Image}s extracted from the given {@link JsonArray}
 								     */
 								    @Nonnull
 								    public static List<Image> getImagesFromThumbnailsArray(
 								            @Nonnull final JsonArray thumbnails) {
 								        return thumbnails.stream()
 								                .filter(JsonObject.class::isInstance)
 								                .map(JsonObject.class::cast)
 								                .filter(thumbnail -> !isNullOrEmpty(thumbnail.getString("url")))
 								                .map(thumbnail -> {
 								                    final int height = thumbnail.getInt("height", Image.HEIGHT_UNKNOWN);
 								                    return new Image(fixThumbnailUrl(thumbnail.getString("url")),
 								                            height,
 								                            thumbnail.getInt("width", Image.WIDTH_UNKNOWN),
 								                            ResolutionLevel.fromHeight(height));
 								                })
 								                .collect(Collectors.toUnmodifiableList());
 								    }
-												Annotate YoutubeParsingHelper methods with Nonnull when needed

											
										
										
											2021-06-11 13:34:23 +02:00
+								    @Nonnull
 								    public static String getValidJsonResponseBody(@Nonnull final Response response)
-												Extract some code to getValidResponseBody()

											
										
										
											2020-04-01 16:01:21 +02:00
+								            throws ParsingException, MalformedURLException {
-												[YouTube] Detect simple 404s in the standard fetch method

											
										
										
											2020-03-01 01:50:31 +01:00
+								        if (response.responseCode() == 404) {
-												[Youtube] Add cookies to youtube mix request

This way youtube wont return duplicates when getting more items of the mix (but youtube can also track us)

											
										
										
											2020-04-16 19:28:27 +02:00
+								            throw new ContentNotAvailableException("Not found"
 								                    + " (\"" + response.responseCode() + " " + response.responseMessage() + "\")");
-												[YouTube] Detect simple 404s in the standard fetch method

											
										
										
											2020-03-01 01:50:31 +01:00
+								        }
 								        final String responseBody = response.responseBody();
-												Use lightweight requests when getting and checking YouTube API key and client version

											
										
										
											2021-04-12 18:24:32 +02:00
+								        if (responseBody.length() < 50) { // Ensure to have a valid response
-												Create getJsonResponse() function

											
										
										
											2020-02-29 16:42:04 +01:00
+								            throw new ParsingException("JSON response is too short");
 								        }
-												[YouTube] Detect deleted/nonexistent/invalid channels and playlists

- Added tests for these cases.

											
										
										
											2020-03-01 01:52:25 +01:00
+								        // Check if the request was redirected to the error page.
 								        final URL latestUrl = new URL(response.latestUrl());
 								        if (latestUrl.getHost().equalsIgnoreCase("www.youtube.com")) {
 								            final String path = latestUrl.getPath();
 								            if (path.equalsIgnoreCase("/oops") || path.equalsIgnoreCase("/error")) {
 								                throw new ContentNotAvailableException("Content unavailable");
 								            }
 								        }
 								        final String responseContentType = response.getHeader("Content-Type");
-												Extract some code to getValidResponseBody()

											
										
										
											2020-04-01 16:01:21 +02:00
+								        if (responseContentType != null
 								                && responseContentType.toLowerCase().contains("text/html")) {
-												[Youtube] Add cookies to youtube mix request

This way youtube wont return duplicates when getting more items of the mix (but youtube can also track us)

											
										
										
											2020-04-16 19:28:27 +02:00
+								            throw new ParsingException("Got HTML document, expected JSON response"
 								                    + " (latest url was: \"" + response.latestUrl() + "\")");
-												[YouTube] Detect deleted/nonexistent/invalid channels and playlists

- Added tests for these cases.

											
										
										
											2020-03-01 01:52:25 +01:00
+								        }
-												Extract some code to getValidResponseBody()

											
										
										
											2020-04-01 16:01:21 +02:00
+								        return responseBody;
 								    }
-												Use the youtubei API for YouTube playlists

											
										
										
											2021-04-08 16:17:59 +02:00
+								    public static JsonObject getJsonPostResponse(final String endpoint,
-												Use the youtubei API for YouTube trends

											
										
										
											2021-04-11 17:01:43 +02:00
+								                                                 final byte[] body,
 								                                                 final Localization localization)
-												Use the youtubei API for YouTube playlists

											
										
										
											2021-04-08 16:17:59 +02:00
+								            throws IOException, ExtractionException {
-												Use immutable Map factory methods.

											
										
										
											2022-11-12 05:01:05 +01:00
+								        final var headers = getYouTubeHeaders();
-												Use the youtubei API for YouTube playlists

											
										
										
											2021-04-08 16:17:59 +02:00
-												[YouTube] Send Content-Type header in all POST requests

This header was not sent partially before and was added and guessed by OkHttp. This can create issues when using other HTTP clients than OkHttp, such as Cronet.

Some code in the modified classes has been improved and / or deduplicated, and usages of the UTF_8 constant of the Utils class has been replaced by StandardCharsets.UTF_8 where possible.

Note that this header has been not added in except in YoutubeDashManifestCreatorsUtils, as an empty body is sent in the POST requests made by this class.

											
										
										
											2022-06-18 16:07:32 +02:00
+								        return JsonUtils.toJsonObject(getValidJsonResponseBody(
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								                getDownloader().postWithContentTypeJson(YOUTUBEI_V1_URL + endpoint + "?"
 								                        + DISABLE_PRETTY_PRINT_PARAMETER, headers, body, localization)));
-												Use the youtubei API for YouTube playlists

											
										
										
											2021-04-08 16:17:59 +02:00
+								    }
-												[YouTube] Add the cpn param to playback requests and try to spoof better the Android client

The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.

For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.

Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).

For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.

This commit also fixes a small bug with the Android User-Agent string.

Some code improvements have been also made.

											
										
										
											2021-12-22 17:55:41 +01:00
+								    public static JsonObject getJsonAndroidPostResponse(
 								            final String endpoint,
 								            final byte[] body,
-												[YouTube] Extract streams of livestreams from the iOS client and disabled the Android client for livestreams

The iOS client is only enabled for livestreams and the Android client is now only enabled for videos, both by default.

A way to force, or not, the fetch of both clients have been added with two new static methods in YoutubeStreamExtractor.

											
										
										
											2022-01-15 17:25:00 +01:00
+								            @Nonnull final Localization localization,
-												[YouTube] Add the cpn param to playback requests and try to spoof better the Android client

The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.

For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.

Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).

For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.

This commit also fixes a small bug with the Android User-Agent string.

Some code improvements have been also made.

											
										
										
											2021-12-22 17:55:41 +01:00
+								            @Nullable final String endPartOfUrlRequest) throws IOException, ExtractionException {
-												[YouTube] Update again hardcoded client versions and update mobile user agents

Also provide ability to get mobile user-agents used for mobile InnerTube requests and deduplicate related code.

											
										
										
											2022-03-15 19:01:24 +01:00
+								        return getMobilePostResponse(endpoint, body, localization,
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								                getAndroidUserAgent(localization), endPartOfUrlRequest);
-												Use new youtube continuations api for playlists

Requires sending a POST request instead of GET.
clientName and clientVersion, which were required as headers previously now need to be part of the request payload.
continuation id also needs to be part of request body.

quick and dirty solution.

											
										
										
											2021-03-03 19:49:26 +01:00
+								    }
-												[YouTube] Extract streams of livestreams from the iOS client and disabled the Android client for livestreams

The iOS client is only enabled for livestreams and the Android client is now only enabled for videos, both by default.

A way to force, or not, the fetch of both clients have been added with two new static methods in YoutubeStreamExtractor.

											
										
										
											2022-01-15 17:25:00 +01:00
+								    public static JsonObject getJsonIosPostResponse(
 								            final String endpoint,
 								            final byte[] body,
 								            @Nonnull final Localization localization,
 								            @Nullable final String endPartOfUrlRequest) throws IOException, ExtractionException {
-												[YouTube] Update again hardcoded client versions and update mobile user agents

Also provide ability to get mobile user-agents used for mobile InnerTube requests and deduplicate related code.

											
										
										
											2022-03-15 19:01:24 +01:00
+								        return getMobilePostResponse(endpoint, body, localization, getIosUserAgent(localization),
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								                endPartOfUrlRequest);
-												[YouTube] Update again hardcoded client versions and update mobile user agents

Also provide ability to get mobile user-agents used for mobile InnerTube requests and deduplicate related code.

											
										
										
											2022-03-15 19:01:24 +01:00
+								    }
 								    private static JsonObject getMobilePostResponse(
 								            final String endpoint,
 								            final byte[] body,
 								            @Nonnull final Localization localization,
 								            @Nonnull final String userAgent,
 								            @Nullable final String endPartOfUrlRequest) throws IOException, ExtractionException {
-												Use immutable Map factory methods.

											
										
										
											2022-11-12 05:01:05 +01:00
+								        final var headers = Map.of("User-Agent", List.of(userAgent),
 								                "X-Goog-Api-Format-Version", List.of("2"));
-												[YouTube] Extract streams of livestreams from the iOS client and disabled the Android client for livestreams

The iOS client is only enabled for livestreams and the Android client is now only enabled for videos, both by default.

A way to force, or not, the fetch of both clients have been added with two new static methods in YoutubeStreamExtractor.

											
										
										
											2022-01-15 17:25:00 +01:00
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								        final String baseEndpointUrl = YOUTUBEI_V1_GAPIS_URL + endpoint + "?"
-												[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more

Use the TV embedded client technique to get streams of embeddable age-restricted videos.

This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.

The outdated code for these contents has been removed.

Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.

Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.

											
										
										
											2022-04-02 19:06:36 +02:00
+								                + DISABLE_PRETTY_PRINT_PARAMETER;
-												[YouTube] Extract streams of livestreams from the iOS client and disabled the Android client for livestreams

The iOS client is only enabled for livestreams and the Android client is now only enabled for videos, both by default.

A way to force, or not, the fetch of both clients have been added with two new static methods in YoutubeStreamExtractor.

											
										
										
											2022-01-15 17:25:00 +01:00
-												[YouTube] Send Content-Type header in all POST requests

This header was not sent partially before and was added and guessed by OkHttp. This can create issues when using other HTTP clients than OkHttp, such as Cronet.

Some code in the modified classes has been improved and / or deduplicated, and usages of the UTF_8 constant of the Utils class has been replaced by StandardCharsets.UTF_8 where possible.

Note that this header has been not added in except in YoutubeDashManifestCreatorsUtils, as an empty body is sent in the POST requests made by this class.

											
										
										
											2022-06-18 16:07:32 +02:00
+								        return JsonUtils.toJsonObject(getValidJsonResponseBody(
-												Use Downloader's postWithContentType and postWithContentTypeJson methods in services and extractors

											
										
										
											2022-07-15 20:56:37 +02:00
+								                getDownloader().postWithContentTypeJson(isNullOrEmpty(endPartOfUrlRequest)
-												[YouTube] Send Content-Type header in all POST requests

This header was not sent partially before and was added and guessed by OkHttp. This can create issues when using other HTTP clients than OkHttp, such as Cronet.

Some code in the modified classes has been improved and / or deduplicated, and usages of the UTF_8 constant of the Utils class has been replaced by StandardCharsets.UTF_8 where possible.

Note that this header has been not added in except in YoutubeDashManifestCreatorsUtils, as an empty body is sent in the POST requests made by this class.

											
										
										
											2022-06-18 16:07:32 +02:00
+								                                ? baseEndpointUrl
 								                                : baseEndpointUrl + endPartOfUrlRequest,
 								                        headers, body, localization)));
-												[YouTube] Extract streams of livestreams from the iOS client and disabled the Android client for livestreams

The iOS client is only enabled for livestreams and the Android client is now only enabled for videos, both by default.

A way to force, or not, the fetch of both clients have been added with two new static methods in YoutubeStreamExtractor.

											
										
										
											2022-01-15 17:25:00 +01:00
+								    }
-												Annotate YoutubeParsingHelper methods with Nonnull when needed

											
										
										
											2021-06-11 13:34:23 +02:00
+								    @Nonnull
-												Rebase + some code improvements + fix extraction of age-restricted videos + update clients version

Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:

- the desktop API is fetched.

If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status

If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.

If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.

We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.

Other code changes:

- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed

Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>

											
										
										
											2021-07-28 23:55:09 +02:00
+								    public static JsonBuilder<JsonObject> prepareDesktopJsonBuilder(
 								            @Nonnull final Localization localization,
 								            @Nonnull final ContentCountry contentCountry)
-												Use Page.body for YoutubePlaylistExtractor

											
										
										
											2021-04-02 21:34:47 +02:00
+								            throws IOException, ExtractionException {
-												[YouTube] Add support for channel tabs and tags and age-restricted channels

Support of tags and videos, shorts, live, playlists and channels tabs has been
added for non-age restricted channels.

Age-restricted channels are now also supported and always returned the videos,
shorts and live tabs, accessible using system playlists. These tabs are the
only ones which can be accessed using YouTube's desktop website without being
logged-in.

The videos channel tab parameter has been updated to the one used by the
desktop website and when a channel extraction is fetched, this tab is returned
in the list of tabs as a cached one in the corresponding link handler.

Visitor data support per request has been added, as a valid visitor data is
required to fetch continuations with contents on the shorts tab. It is only
used in this case to enhance privacy.

A dedicated shorts UI elements (reelItemRenderers) extractor has been added,
YoutubeReelInfoItemExtractor. These elements do not provide the exact view
count, any uploader info (name, URL, avatar, verified status) and the upload
date.

All service's LinkHandlers are now using the singleton pattern and some code
has been also improved on the files changed.

Co-authored-by: ThetaDev <t.testboy@gmail.com>
Co-authored-by: Stypox <stypox@pm.me>

											
										
										
											2023-07-14 23:46:48 +02:00
+								        return prepareDesktopJsonBuilder(localization, contentCountry, null);
 								    }
 								    @Nonnull
 								    public static JsonBuilder<JsonObject> prepareDesktopJsonBuilder(
 								            @Nonnull final Localization localization,
 								            @Nonnull final ContentCountry contentCountry,
 								            @Nullable final String visitorData)
 								            throws IOException, ExtractionException {
-												Use Page.body for YoutubePlaylistExtractor

											
										
										
											2021-04-02 21:34:47 +02:00
+								        // @formatter:off
-												[YouTube] Add support for channel tabs and tags and age-restricted channels

Support of tags and videos, shorts, live, playlists and channels tabs has been
added for non-age restricted channels.

Age-restricted channels are now also supported and always returned the videos,
shorts and live tabs, accessible using system playlists. These tabs are the
only ones which can be accessed using YouTube's desktop website without being
logged-in.

The videos channel tab parameter has been updated to the one used by the
desktop website and when a channel extraction is fetched, this tab is returned
in the list of tabs as a cached one in the corresponding link handler.

Visitor data support per request has been added, as a valid visitor data is
required to fetch continuations with contents on the shorts tab. It is only
used in this case to enhance privacy.

A dedicated shorts UI elements (reelItemRenderers) extractor has been added,
YoutubeReelInfoItemExtractor. These elements do not provide the exact view
count, any uploader info (name, URL, avatar, verified status) and the upload
date.

All service's LinkHandlers are now using the singleton pattern and some code
has been also improved on the files changed.

Co-authored-by: ThetaDev <t.testboy@gmail.com>
Co-authored-by: Stypox <stypox@pm.me>

											
										
										
											2023-07-14 23:46:48 +02:00
+								        final JsonBuilder<JsonObject> builder = JsonObject.builder()
-												Use Page.body for YoutubePlaylistExtractor

											
										
										
											2021-04-02 21:34:47 +02:00
+								                .object("context")
 								                    .object("client")
-												Fix localization and update client version

											
										
										
											2021-04-30 19:06:56 +02:00
+								                        .value("hl", localization.getLocalizationCode())
 								                        .value("gl", contentCountry.getCountryCode())
-												Adress changes

											
										
										
											2021-06-24 18:39:16 +02:00
+								                        .value("clientName", "WEB")
 								                        .value("clientVersion", getClientVersion())
-												[YouTube] Add the cpn param to playback requests and try to spoof better the Android client

The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.

For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.

Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).

For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.

This commit also fixes a small bug with the Android User-Agent string.

Some code improvements have been also made.

											
										
										
											2021-12-22 17:55:41 +01:00
+								                        .value("originalUrl", "https://www.youtube.com")
-												[YouTube] Add utcOffsetMinutes to all InnerTube payloads

This should make returned dates consistent between timezones and countries on
which the extractor is ran.

It was previously only set on YouTube Music search continuations.

											
										
										
											2023-10-07 15:04:36 +02:00
+								                        .value("platform", "DESKTOP")
 								                        .value("utcOffsetMinutes", 0);
-												[YouTube] Add support for channel tabs and tags and age-restricted channels

Support of tags and videos, shorts, live, playlists and channels tabs has been
added for non-age restricted channels.

Age-restricted channels are now also supported and always returned the videos,
shorts and live tabs, accessible using system playlists. These tabs are the
only ones which can be accessed using YouTube's desktop website without being
logged-in.

The videos channel tab parameter has been updated to the one used by the
desktop website and when a channel extraction is fetched, this tab is returned
in the list of tabs as a cached one in the corresponding link handler.

Visitor data support per request has been added, as a valid visitor data is
required to fetch continuations with contents on the shorts tab. It is only
used in this case to enhance privacy.

A dedicated shorts UI elements (reelItemRenderers) extractor has been added,
YoutubeReelInfoItemExtractor. These elements do not provide the exact view
count, any uploader info (name, URL, avatar, verified status) and the upload
date.

All service's LinkHandlers are now using the singleton pattern and some code
has been also improved on the files changed.

Co-authored-by: ThetaDev <t.testboy@gmail.com>
Co-authored-by: Stypox <stypox@pm.me>

											
										
										
											2023-07-14 23:46:48 +02:00
 								        if (visitorData != null) {
 								            builder.value("visitorData", visitorData);
 								        }
 								        return builder.end()
-												[YouTube] Add the cpn param to playback requests and try to spoof better the Android client

The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.

For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.

Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).

For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.

This commit also fixes a small bug with the Android User-Agent string.

Some code improvements have been also made.

											
										
										
											2021-12-22 17:55:41 +01:00
+								                    .object("request")
 								                        .array("internalExperimentFlags")
 								                        .end()
 								                        .value("useSsl", true)
-												Adress changes

											
										
										
											2021-06-24 18:39:16 +02:00
+								                    .end()
 								                    .object("user")
-												[YouTube] Improve payloads and URLs of InnerTube requests

For every InnerTube request:
- Always add a `request` object with the following properties:
  - "internalExperimentFlags" set to an empty array;
  - "useSsl" set to "true";
  - "lockedSafetyMode" set to "false".
- Use proper TODO comment to provide a way to enable restricted mode on every
request and add it on requests on which it wasn't present.

For YouTube Music:
- Remove alt query parameter, as it is not used anymore by the website;
- Add prettyPrint query parameter with false value on YouTube Music search
continuations.

											
										
										
											2023-10-07 15:00:40 +02:00
+								                        // TODO: provide a way to enable restricted mode with:
 								                        //  .value("enableSafetyMode", boolean)
-												Adress changes

											
										
										
											2021-06-24 18:39:16 +02:00
+								                        .value("lockedSafetyMode", false)
-												Use Page.body for YoutubePlaylistExtractor

											
										
										
											2021-04-02 21:34:47 +02:00
+								                    .end()
 								                .end();
-												Use the Android mobile API when there are OTF streams or the content is protected by signatureCiphers

Use the Android mobile API to get the itag 22 (720p with audio), removed when the content is protected by signatureCiphers.
Also use this API when they are OTF streams, to get the itag 17 and 36, low 3GPP quality streams but also the itag 139.
Update the web client version.

											
										
										
											2021-05-29 14:43:26 +02:00
+								        // @formatter:on
 								    }
-												Annotate YoutubeParsingHelper methods with Nonnull when needed

											
										
										
											2021-06-11 13:34:23 +02:00
+								    @Nonnull
-												Rebase + some code improvements + fix extraction of age-restricted videos + update clients version

Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:

- the desktop API is fetched.

If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status

If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.

If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.

We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.

Other code changes:

- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed

Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>

											
										
										
											2021-07-28 23:55:09 +02:00
+								    public static JsonBuilder<JsonObject> prepareAndroidMobileJsonBuilder(
 								            @Nonnull final Localization localization,
 								            @Nonnull final ContentCountry contentCountry) {
-												Use the Android mobile API when there are OTF streams or the content is protected by signatureCiphers

Use the Android mobile API to get the itag 22 (720p with audio), removed when the content is protected by signatureCiphers.
Also use this API when they are OTF streams, to get the itag 17 and 36, low 3GPP quality streams but also the itag 139.
Update the web client version.

											
										
										
											2021-05-29 14:43:26 +02:00
+								        // @formatter:off
 								        return JsonObject.builder()
 								                .object("context")
 								                    .object("client")
 								                        .value("clientName", "ANDROID")
-												[YouTube] Update client versions and use a real version for the iOS client

The iOS version can be got easily in fact, by looking at the What's New section of the App Store' app page.

											
										
										
											2022-08-08 20:12:32 +02:00
+								                        .value("clientVersion", ANDROID_YOUTUBE_CLIENT_VERSION)
-												[YouTube] Extract streams of livestreams from the iOS client and disabled the Android client for livestreams

The iOS client is only enabled for livestreams and the Android client is now only enabled for videos, both by default.

A way to force, or not, the fetch of both clients have been added with two new static methods in YoutubeStreamExtractor.

											
										
										
											2022-01-15 17:25:00 +01:00
+								                        .value("platform", "MOBILE")
-												[YouTube] Spoof more mobile clients

Additional parameters have been added to the player requests of ANDROID and IOS
clients:

- for both clients: osName and osVersion: their respective values are:
  - for the ANDROID one: Android and 12;
  - for the IOS one: iOS and 15.6.0.19G71.
- for the ANDROID client: androidTargetSdkVersion, with the Android SDK version
  corresponding to the Android version used in the player requests of this
  client. This parameter is now required with this client to be sure to get a
  correct player response, otherwise, the one of a video saying that this
  content is not available in this app and to watch it with the latest version
  of YouTube can be returned instead;
- for the IOS client: deviceMake, with Apple as its value.

The iOS version sent in the IOS client player requests has been also updated to
the version 15.6 of the OS.

Finally, a comment about the requirement to use the signature timestamp from
the player JavaScript base file for HTML5 player requests on videos with
obfuscated URLs has been added and replaces a previous one which may be not
true.

											
										
										
											2022-08-08 22:06:10 +02:00
+								                        .value("osName", "Android")
-												[YouTube] Update InnerTube clients' version and devices' OS version and model

											
										
										
											2023-10-07 14:14:34 +02:00
+								                        .value("osVersion", "14")
-												[YouTube] Spoof more mobile clients

Additional parameters have been added to the player requests of ANDROID and IOS
clients:

- for both clients: osName and osVersion: their respective values are:
  - for the ANDROID one: Android and 12;
  - for the IOS one: iOS and 15.6.0.19G71.
- for the ANDROID client: androidTargetSdkVersion, with the Android SDK version
  corresponding to the Android version used in the player requests of this
  client. This parameter is now required with this client to be sure to get a
  correct player response, otherwise, the one of a video saying that this
  content is not available in this app and to watch it with the latest version
  of YouTube can be returned instead;
- for the IOS client: deviceMake, with Apple as its value.

The iOS version sent in the IOS client player requests has been also updated to
the version 15.6 of the OS.

Finally, a comment about the requirement to use the signature timestamp from
the player JavaScript base file for HTML5 player requests on videos with
obfuscated URLs has been added and replaces a previous one which may be not
true.

											
										
										
											2022-08-08 22:06:10 +02:00
+								                        /*
 								                        A valid Android SDK version is required to be sure to get a valid player
 								                        response
-												[YouTube] Update InnerTube clients' version and devices' OS version and model

											
										
										
											2023-10-07 14:14:34 +02:00
+								                        If this parameter is not provided, the player response is replaced by an
 								                        error saying the message "The following content is not available on this
 								                        app. Watch this content on the latest version on YouTube" (it was
 								                        previously a 5-minute video with this message)
-												[YouTube] Spoof more mobile clients

Additional parameters have been added to the player requests of ANDROID and IOS
clients:

- for both clients: osName and osVersion: their respective values are:
  - for the ANDROID one: Android and 12;
  - for the IOS one: iOS and 15.6.0.19G71.
- for the ANDROID client: androidTargetSdkVersion, with the Android SDK version
  corresponding to the Android version used in the player requests of this
  client. This parameter is now required with this client to be sure to get a
  correct player response, otherwise, the one of a video saying that this
  content is not available in this app and to watch it with the latest version
  of YouTube can be returned instead;
- for the IOS client: deviceMake, with Apple as its value.

The iOS version sent in the IOS client player requests has been also updated to
the version 15.6 of the OS.

Finally, a comment about the requirement to use the signature timestamp from
the player JavaScript base file for HTML5 player requests on videos with
obfuscated URLs has been added and replaces a previous one which may be not
true.

											
										
										
											2022-08-08 22:06:10 +02:00
+								                        See https://github.com/TeamNewPipe/NewPipe/issues/8713
 								                        The Android SDK version corresponding to the Android version used in
 								                        requests is sent
 								                        */
-												[YouTube] Update InnerTube clients' version and devices' OS version and model

											
										
										
											2023-10-07 14:14:34 +02:00
+								                        .value("androidSdkVersion", 34)
-												[YouTube] Extract streams of livestreams from the iOS client and disabled the Android client for livestreams

The iOS client is only enabled for livestreams and the Android client is now only enabled for videos, both by default.

A way to force, or not, the fetch of both clients have been added with two new static methods in YoutubeStreamExtractor.

											
										
										
											2022-01-15 17:25:00 +01:00
+								                        .value("hl", localization.getLocalizationCode())
 								                        .value("gl", contentCountry.getCountryCode())
-												[YouTube] Add utcOffsetMinutes to all InnerTube payloads

This should make returned dates consistent between timezones and countries on
which the extractor is ran.

It was previously only set on YouTube Music search continuations.

											
										
										
											2023-10-07 15:04:36 +02:00
+								                        .value("utcOffsetMinutes", 0)
-												[YouTube] Extract streams of livestreams from the iOS client and disabled the Android client for livestreams

The iOS client is only enabled for livestreams and the Android client is now only enabled for videos, both by default.

A way to force, or not, the fetch of both clients have been added with two new static methods in YoutubeStreamExtractor.

											
										
										
											2022-01-15 17:25:00 +01:00
+								                    .end()
-												[YouTube] Improve payloads and URLs of InnerTube requests

For every InnerTube request:
- Always add a `request` object with the following properties:
  - "internalExperimentFlags" set to an empty array;
  - "useSsl" set to "true";
  - "lockedSafetyMode" set to "false".
- Use proper TODO comment to provide a way to enable restricted mode on every
request and add it on requests on which it wasn't present.

For YouTube Music:
- Remove alt query parameter, as it is not used anymore by the website;
- Add prettyPrint query parameter with false value on YouTube Music search
continuations.

											
										
										
											2023-10-07 15:00:40 +02:00
+								                    .object("request")
 								                        .array("internalExperimentFlags")
 								                        .end()
 								                        .value("useSsl", true)
 								                    .end()
-												[YouTube] Extract streams of livestreams from the iOS client and disabled the Android client for livestreams

The iOS client is only enabled for livestreams and the Android client is now only enabled for videos, both by default.

A way to force, or not, the fetch of both clients have been added with two new static methods in YoutubeStreamExtractor.

											
										
										
											2022-01-15 17:25:00 +01:00
+								                    .object("user")
-												[YouTube] Improve payloads and URLs of InnerTube requests

For every InnerTube request:
- Always add a `request` object with the following properties:
  - "internalExperimentFlags" set to an empty array;
  - "useSsl" set to "true";
  - "lockedSafetyMode" set to "false".
- Use proper TODO comment to provide a way to enable restricted mode on every
request and add it on requests on which it wasn't present.

For YouTube Music:
- Remove alt query parameter, as it is not used anymore by the website;
- Add prettyPrint query parameter with false value on YouTube Music search
continuations.

											
										
										
											2023-10-07 15:00:40 +02:00
+								                        // TODO: provide a way to enable restricted mode with:
 								                        //  .value("enableSafetyMode", boolean)
-												[YouTube] Extract streams of livestreams from the iOS client and disabled the Android client for livestreams

The iOS client is only enabled for livestreams and the Android client is now only enabled for videos, both by default.

A way to force, or not, the fetch of both clients have been added with two new static methods in YoutubeStreamExtractor.

											
										
										
											2022-01-15 17:25:00 +01:00
+								                        .value("lockedSafetyMode", false)
 								                    .end()
 								                .end();
 								        // @formatter:on
 								    }
 								    @Nonnull
 								    public static JsonBuilder<JsonObject> prepareIosMobileJsonBuilder(
 								            @Nonnull final Localization localization,
 								            @Nonnull final ContentCountry contentCountry) {
 								        // @formatter:off
 								        return JsonObject.builder()
 								                .object("context")
 								                    .object("client")
 								                        .value("clientName", "IOS")
-												[YouTube] Update client versions and use a real version for the iOS client

The iOS version can be got easily in fact, by looking at the What's New section of the App Store' app page.

											
										
										
											2022-08-08 20:12:32 +02:00
+								                        .value("clientVersion", IOS_YOUTUBE_CLIENT_VERSION)
-												[YouTube] Spoof more mobile clients

Additional parameters have been added to the player requests of ANDROID and IOS
clients:

- for both clients: osName and osVersion: their respective values are:
  - for the ANDROID one: Android and 12;
  - for the IOS one: iOS and 15.6.0.19G71.
- for the ANDROID client: androidTargetSdkVersion, with the Android SDK version
  corresponding to the Android version used in the player requests of this
  client. This parameter is now required with this client to be sure to get a
  correct player response, otherwise, the one of a video saying that this
  content is not available in this app and to watch it with the latest version
  of YouTube can be returned instead;
- for the IOS client: deviceMake, with Apple as its value.

The iOS version sent in the IOS client player requests has been also updated to
the version 15.6 of the OS.

Finally, a comment about the requirement to use the signature timestamp from
the player JavaScript base file for HTML5 player requests on videos with
obfuscated URLs has been added and replaces a previous one which may be not
true.

											
										
										
											2022-08-08 22:06:10 +02:00
+								                        .value("deviceMake",  "Apple")
-												[YouTube] Extract streams of livestreams from the iOS client and disabled the Android client for livestreams

The iOS client is only enabled for livestreams and the Android client is now only enabled for videos, both by default.

A way to force, or not, the fetch of both clients have been added with two new static methods in YoutubeStreamExtractor.

											
										
										
											2022-01-15 17:25:00 +01:00
+								                        // Device model is required to get 60fps streams
-												[YouTube] Add documentation about parameters added and clients versions and key

Also move the iPhone device machine id to a constant, explain how it is used and move the licence in the header of the file, and fix missing imports in YoutubeStreamExtractor (due to a rebase issue).

											
										
										
											2022-03-26 20:02:35 +01:00
+								                        .value("deviceModel", IOS_DEVICE_MODEL)
-												[YouTube] Extract streams of livestreams from the iOS client and disabled the Android client for livestreams

The iOS client is only enabled for livestreams and the Android client is now only enabled for videos, both by default.

A way to force, or not, the fetch of both clients have been added with two new static methods in YoutubeStreamExtractor.

											
										
										
											2022-01-15 17:25:00 +01:00
+								                        .value("platform", "MOBILE")
-												[YouTube] Spoof more mobile clients

Additional parameters have been added to the player requests of ANDROID and IOS
clients:

- for both clients: osName and osVersion: their respective values are:
  - for the ANDROID one: Android and 12;
  - for the IOS one: iOS and 15.6.0.19G71.
- for the ANDROID client: androidTargetSdkVersion, with the Android SDK version
  corresponding to the Android version used in the player requests of this
  client. This parameter is now required with this client to be sure to get a
  correct player response, otherwise, the one of a video saying that this
  content is not available in this app and to watch it with the latest version
  of YouTube can be returned instead;
- for the IOS client: deviceMake, with Apple as its value.

The iOS version sent in the IOS client player requests has been also updated to
the version 15.6 of the OS.

Finally, a comment about the requirement to use the signature timestamp from
the player JavaScript base file for HTML5 player requests on videos with
obfuscated URLs has been added and replaces a previous one which may be not
true.

											
										
										
											2022-08-08 22:06:10 +02:00
+								                        .value("osName", "iOS")
-												[YouTube] Create constants for client names/versions

											
										
										
											2024-04-20 11:43:54 +02:00
+								                        .value("osVersion", IOS_OS_VERSION)
-												Use the Android mobile API when there are OTF streams or the content is protected by signatureCiphers

Use the Android mobile API to get the itag 22 (720p with audio), removed when the content is protected by signatureCiphers.
Also use this API when they are OTF streams, to get the itag 17 and 36, low 3GPP quality streams but also the itag 139.
Update the web client version.

											
										
										
											2021-05-29 14:43:26 +02:00
+								                        .value("hl", localization.getLocalizationCode())
 								                        .value("gl", contentCountry.getCountryCode())
-												[YouTube] Add utcOffsetMinutes to all InnerTube payloads

This should make returned dates consistent between timezones and countries on
which the extractor is ran.

It was previously only set on YouTube Music search continuations.

											
										
										
											2023-10-07 15:04:36 +02:00
+								                        .value("utcOffsetMinutes", 0)
-												Use the Android mobile API when there are OTF streams or the content is protected by signatureCiphers

Use the Android mobile API to get the itag 22 (720p with audio), removed when the content is protected by signatureCiphers.
Also use this API when they are OTF streams, to get the itag 17 and 36, low 3GPP quality streams but also the itag 139.
Update the web client version.

											
										
										
											2021-05-29 14:43:26 +02:00
+								                    .end()
-												[YouTube] Improve payloads and URLs of InnerTube requests

For every InnerTube request:
- Always add a `request` object with the following properties:
  - "internalExperimentFlags" set to an empty array;
  - "useSsl" set to "true";
  - "lockedSafetyMode" set to "false".
- Use proper TODO comment to provide a way to enable restricted mode on every
request and add it on requests on which it wasn't present.

For YouTube Music:
- Remove alt query parameter, as it is not used anymore by the website;
- Add prettyPrint query parameter with false value on YouTube Music search
continuations.

											
										
										
											2023-10-07 15:00:40 +02:00
+								                    .object("request")
 								                        .array("internalExperimentFlags")
 								                        .end()
 								                        .value("useSsl", true)
 								                    .end()
-												Adress changes

											
										
										
											2021-06-24 18:39:16 +02:00
+								                    .object("user")
-												[YouTube] Improve payloads and URLs of InnerTube requests

For every InnerTube request:
- Always add a `request` object with the following properties:
  - "internalExperimentFlags" set to an empty array;
  - "useSsl" set to "true";
  - "lockedSafetyMode" set to "false".
- Use proper TODO comment to provide a way to enable restricted mode on every
request and add it on requests on which it wasn't present.

For YouTube Music:
- Remove alt query parameter, as it is not used anymore by the website;
- Add prettyPrint query parameter with false value on YouTube Music search
continuations.

											
										
										
											2023-10-07 15:00:40 +02:00
+								                        // TODO: provide a way to enable restricted mode with:
 								                        //  .value("enableSafetyMode", boolean)
-												Adress changes

											
										
										
											2021-06-24 18:39:16 +02:00
+								                        .value("lockedSafetyMode", false)
 								                    .end()
-												Use the Android mobile API when there are OTF streams or the content is protected by signatureCiphers

Use the Android mobile API to get the itag 22 (720p with audio), removed when the content is protected by signatureCiphers.
Also use this API when they are OTF streams, to get the itag 17 and 36, low 3GPP quality streams but also the itag 139.
Update the web client version.

											
										
										
											2021-05-29 14:43:26 +02:00
+								                .end();
-												Use Page.body for YoutubePlaylistExtractor

											
										
										
											2021-04-02 21:34:47 +02:00
+								        // @formatter:on
 								    }
-												Rebase + some code improvements + fix extraction of age-restricted videos + update clients version

Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:

- the desktop API is fetched.

If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status

If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.

If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.

We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.

Other code changes:

- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed

Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>

											
										
										
											2021-07-28 23:55:09 +02:00
+								    @Nonnull
-												[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more

Use the TV embedded client technique to get streams of embeddable age-restricted videos.

This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.

The outdated code for these contents has been removed.

Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.

Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.

											
										
										
											2022-04-02 19:06:36 +02:00
+								    public static JsonBuilder<JsonObject> prepareTvHtml5EmbedJsonBuilder(
-												[YouTube] Extract streams of livestreams from the iOS client and disabled the Android client for livestreams

The iOS client is only enabled for livestreams and the Android client is now only enabled for videos, both by default.

A way to force, or not, the fetch of both clients have been added with two new static methods in YoutubeStreamExtractor.

											
										
										
											2022-01-15 17:25:00 +01:00
+								            @Nonnull final Localization localization,
 								            @Nonnull final ContentCountry contentCountry,
-												[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more

Use the TV embedded client technique to get streams of embeddable age-restricted videos.

This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.

The outdated code for these contents has been removed.

Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.

Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.

											
										
										
											2022-04-02 19:06:36 +02:00
+								            @Nonnull final String videoId) {
-												Apply changes in YoutubeStreamExtractor

Extract post live DVR streams as post live streams instead of live streams.

A new class has been in order to improve code: ItagInfo, which stores an itag, the content (URL) extracted and if its an URL or not.
A functional interface has been added in order to abstract the stream building: StreamBuilderHelper.
Also add the cver parameter added by the desktop web client on the corresponding streams (a new method has been added in YoutubeParsingHelper to check this and another for Android streams).

Some code in these classes has been also refactored/improved/optimized.

											
										
										
											2022-03-06 20:10:11 +01:00
+								        // @formatter:off
-												[YouTube] Extract streams of livestreams from the iOS client and disabled the Android client for livestreams

The iOS client is only enabled for livestreams and the Android client is now only enabled for videos, both by default.

A way to force, or not, the fetch of both clients have been added with two new static methods in YoutubeStreamExtractor.

											
										
										
											2022-01-15 17:25:00 +01:00
+								        return JsonObject.builder()
 								                .object("context")
 								                    .object("client")
-												[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more

Use the TV embedded client technique to get streams of embeddable age-restricted videos.

This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.

The outdated code for these contents has been removed.

Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.

Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.

											
										
										
											2022-04-02 19:06:36 +02:00
+								                        .value("clientName", "TVHTML5_SIMPLY_EMBEDDED_PLAYER")
 								                        .value("clientVersion", TVHTML5_SIMPLY_EMBED_CLIENT_VERSION)
-												[YouTube] Extract streams of livestreams from the iOS client and disabled the Android client for livestreams

The iOS client is only enabled for livestreams and the Android client is now only enabled for videos, both by default.

A way to force, or not, the fetch of both clients have been added with two new static methods in YoutubeStreamExtractor.

											
										
										
											2022-01-15 17:25:00 +01:00
+								                        .value("clientScreen", "EMBED")
-												[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more

Use the TV embedded client technique to get streams of embeddable age-restricted videos.

This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.

The outdated code for these contents has been removed.

Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.

Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.

											
										
										
											2022-04-02 19:06:36 +02:00
+								                        .value("platform", "TV")
-												Rebase + some code improvements + fix extraction of age-restricted videos + update clients version

Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:

- the desktop API is fetched.

If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status

If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.

If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.

We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.

Other code changes:

- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed

Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>

											
										
										
											2021-07-28 23:55:09 +02:00
+								                        .value("hl", localization.getLocalizationCode())
 								                        .value("gl", contentCountry.getCountryCode())
-												[YouTube] Add utcOffsetMinutes to all InnerTube payloads

This should make returned dates consistent between timezones and countries on
which the extractor is ran.

It was previously only set on YouTube Music search continuations.

											
										
										
											2023-10-07 15:04:36 +02:00
+								                        .value("utcOffsetMinutes", 0)
-												Rebase + some code improvements + fix extraction of age-restricted videos + update clients version

Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:

- the desktop API is fetched.

If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status

If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.

If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.

We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.

Other code changes:

- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed

Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>

											
										
										
											2021-07-28 23:55:09 +02:00
+								                    .end()
 								                    .object("thirdParty")
 								                        .value("embedUrl", "https://www.youtube.com/watch?v=" + videoId)
 								                    .end()
-												[YouTube] Improve payloads and URLs of InnerTube requests

For every InnerTube request:
- Always add a `request` object with the following properties:
  - "internalExperimentFlags" set to an empty array;
  - "useSsl" set to "true";
  - "lockedSafetyMode" set to "false".
- Use proper TODO comment to provide a way to enable restricted mode on every
request and add it on requests on which it wasn't present.

For YouTube Music:
- Remove alt query parameter, as it is not used anymore by the website;
- Add prettyPrint query parameter with false value on YouTube Music search
continuations.

											
										
										
											2023-10-07 15:00:40 +02:00
+								                    .object("request")
 								                        .array("internalExperimentFlags")
 								                        .end()
 								                        .value("useSsl", true)
 								                    .end()
-												Rebase + some code improvements + fix extraction of age-restricted videos + update clients version

Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:

- the desktop API is fetched.

If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status

If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.

If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.

We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.

Other code changes:

- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed

Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>

											
										
										
											2021-07-28 23:55:09 +02:00
+								                    .object("user")
-												[YouTube] Improve payloads and URLs of InnerTube requests

For every InnerTube request:
- Always add a `request` object with the following properties:
  - "internalExperimentFlags" set to an empty array;
  - "useSsl" set to "true";
  - "lockedSafetyMode" set to "false".
- Use proper TODO comment to provide a way to enable restricted mode on every
request and add it on requests on which it wasn't present.

For YouTube Music:
- Remove alt query parameter, as it is not used anymore by the website;
- Add prettyPrint query parameter with false value on YouTube Music search
continuations.

											
										
										
											2023-10-07 15:00:40 +02:00
+								                        // TODO: provide a way to enable restricted mode with:
 								                        //  .value("enableSafetyMode", boolean)
-												Rebase + some code improvements + fix extraction of age-restricted videos + update clients version

Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:

- the desktop API is fetched.

If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status

If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.

If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.

We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.

Other code changes:

- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed

Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>

											
										
										
											2021-07-28 23:55:09 +02:00
+								                        .value("lockedSafetyMode", false)
 								                    .end()
-												[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more

Use the TV embedded client technique to get streams of embeddable age-restricted videos.

This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.

The outdated code for these contents has been removed.

Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.

Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.

											
										
										
											2022-04-02 19:06:36 +02:00
+								                .end();
-												Rebase + some code improvements + fix extraction of age-restricted videos + update clients version

Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:

- the desktop API is fetched.

If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status

If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.

If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.

We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.

Other code changes:

- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed

Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>

											
										
										
											2021-07-28 23:55:09 +02:00
+								        // @formatter:on
 								    }
 								    @Nonnull
-												[YouTube] Add the cpn param to playback requests and try to spoof better the Android client

The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.

For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.

Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).

For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.

This commit also fixes a small bug with the Android User-Agent string.

Some code improvements have been also made.

											
										
										
											2021-12-22 17:55:41 +01:00
+								    public static byte[] createDesktopPlayerBody(
 								            @Nonnull final Localization localization,
 								            @Nonnull final ContentCountry contentCountry,
 								            @Nonnull final String videoId,
-												[YouTube] Convert signature timestamp to integer

The signature timestamp is used as a number by HTML5 clients, so it should be
used in the same way by the extractor too instead of being a string.

As the timestamp doesn't seem to exceed 5 digits, an integer is used to store
its value.

											
										
										
											2023-09-16 22:22:09 +02:00
+								            @Nonnull final Integer sts,
-												[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more

Use the TV embedded client technique to get streams of embeddable age-restricted videos.

This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.

The outdated code for these contents has been removed.

Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.

Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.

											
										
										
											2022-04-02 19:06:36 +02:00
+								            final boolean isTvHtml5DesktopJsonBuilder,
-												[YouTube] Add the cpn param to playback requests and try to spoof better the Android client

The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.

For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.

Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).

For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.

This commit also fixes a small bug with the Android User-Agent string.

Some code improvements have been also made.

											
										
										
											2021-12-22 17:55:41 +01:00
+								            @Nonnull final String contentPlaybackNonce) throws IOException, ExtractionException {
 								        // @formatter:off
-												[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more

Use the TV embedded client technique to get streams of embeddable age-restricted videos.

This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.

The outdated code for these contents has been removed.

Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.

Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.

											
										
										
											2022-04-02 19:06:36 +02:00
+								        return JsonWriter.string((isTvHtml5DesktopJsonBuilder
 								                        ? prepareTvHtml5EmbedJsonBuilder(localization, contentCountry, videoId)
-												[YouTube] Add the cpn param to playback requests and try to spoof better the Android client

The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.

For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.

Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).

For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.

This commit also fixes a small bug with the Android User-Agent string.

Some code improvements have been also made.

											
										
										
											2021-12-22 17:55:41 +01:00
+								                        : prepareDesktopJsonBuilder(localization, contentCountry))
 								                .object("playbackContext")
 								                    .object("contentPlaybackContext")
-												[YouTube] Spoof more mobile clients

Additional parameters have been added to the player requests of ANDROID and IOS
clients:

- for both clients: osName and osVersion: their respective values are:
  - for the ANDROID one: Android and 12;
  - for the IOS one: iOS and 15.6.0.19G71.
- for the ANDROID client: androidTargetSdkVersion, with the Android SDK version
  corresponding to the Android version used in the player requests of this
  client. This parameter is now required with this client to be sure to get a
  correct player response, otherwise, the one of a video saying that this
  content is not available in this app and to watch it with the latest version
  of YouTube can be returned instead;
- for the IOS client: deviceMake, with Apple as its value.

The iOS version sent in the IOS client player requests has been also updated to
the version 15.6 of the OS.

Finally, a comment about the requirement to use the signature timestamp from
the player JavaScript base file for HTML5 player requests on videos with
obfuscated URLs has been added and replaces a previous one which may be not
true.

											
										
										
											2022-08-08 22:06:10 +02:00
+								                        // Signature timestamp from the JavaScript base player is needed to get
 								                        // working obfuscated URLs
-												[YouTube] Add the cpn param to playback requests and try to spoof better the Android client

The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.

For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.

Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).

For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.

This commit also fixes a small bug with the Android User-Agent string.

Some code improvements have been also made.

											
										
										
											2021-12-22 17:55:41 +01:00
+								                        .value("signatureTimestamp", sts)
 								                        .value("referer", "https://www.youtube.com/watch?v=" + videoId)
-												Rebase + some code improvements + fix extraction of age-restricted videos + update clients version

Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:

- the desktop API is fetched.

If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status

If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.

If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.

We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.

Other code changes:

- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed

Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>

											
										
										
											2021-07-28 23:55:09 +02:00
+								                    .end()
-												[YouTube] Add the cpn param to playback requests and try to spoof better the Android client

The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.

For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.

Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).

For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.

This commit also fixes a small bug with the Android User-Agent string.

Some code improvements have been also made.

											
										
										
											2021-12-22 17:55:41 +01:00
+								                .end()
 								                .value(CPN, contentPlaybackNonce)
 								                .value(VIDEO_ID, videoId)
-												[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more

Use the TV embedded client technique to get streams of embeddable age-restricted videos.

This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.

The outdated code for these contents has been removed.

Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.

Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.

											
										
										
											2022-04-02 19:06:36 +02:00
+								                .value(CONTENT_CHECK_OK, true)
 								                .value(RACY_CHECK_OK, true)
-												[YouTube] Add the cpn param to playback requests and try to spoof better the Android client

The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.

For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.

Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).

For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.

This commit also fixes a small bug with the Android User-Agent string.

Some code improvements have been also made.

											
										
										
											2021-12-22 17:55:41 +01:00
+								                .done())
 								                .getBytes(StandardCharsets.UTF_8);
 								        // @formatter:on
-												Rebase + some code improvements + fix extraction of age-restricted videos + update clients version

Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:

- the desktop API is fetched.

If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status

If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.

If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.

We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.

Other code changes:

- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed

Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>

											
										
										
											2021-07-28 23:55:09 +02:00
+								    }
-												[YouTube] Update again hardcoded client versions and update mobile user agents

Also provide ability to get mobile user-agents used for mobile InnerTube requests and deduplicate related code.

											
										
										
											2022-03-15 19:01:24 +01:00
+								    /**
 								     * Get the user-agent string used as the user-agent for InnerTube requests with the Android
 								     * client.
 								     *
-												[YouTube] Update client versions and use a real version for the iOS client

The iOS version can be got easily in fact, by looking at the What's New section of the App Store' app page.

											
										
										
											2022-08-08 20:12:32 +02:00
+								     * <p>
-												[YouTube] Update again hardcoded client versions and update mobile user agents

Also provide ability to get mobile user-agents used for mobile InnerTube requests and deduplicate related code.

											
										
										
											2022-03-15 19:01:24 +01:00
+								     * If the {@link Localization} provided is {@code null}, fallbacks to
 								     * {@link Localization#DEFAULT the default one}.
-												[YouTube] Update client versions and use a real version for the iOS client

The iOS version can be got easily in fact, by looking at the What's New section of the App Store' app page.

											
										
										
											2022-08-08 20:12:32 +02:00
+								     * </p>
-												[YouTube] Update again hardcoded client versions and update mobile user agents

Also provide ability to get mobile user-agents used for mobile InnerTube requests and deduplicate related code.

											
										
										
											2022-03-15 19:01:24 +01:00
+								     *
 								     * @param localization the {@link Localization} to set in the user-agent
 								     * @return the Android user-agent used for InnerTube requests with the Android client,
 								     * depending on the {@link Localization} provided
 								     */
 								    @Nonnull
 								    public static String getAndroidUserAgent(@Nullable final Localization localization) {
-												[YouTube] Update InnerTube clients' version and devices' OS version and model

											
										
										
											2023-10-07 14:14:34 +02:00
+								        // Spoofing an Android 14 device with the hardcoded version of the Android app
-												[YouTube] Update client versions and use a real version for the iOS client

The iOS version can be got easily in fact, by looking at the What's New section of the App Store' app page.

											
										
										
											2022-08-08 20:12:32 +02:00
+								        return "com.google.android.youtube/" + ANDROID_YOUTUBE_CLIENT_VERSION
-												[YouTube] Update InnerTube clients' version and devices' OS version and model

											
										
										
											2023-10-07 14:14:34 +02:00
+								                + " (Linux; U; Android 14; "
-												Apply requested changes in YoutubeParsingHelper

											
										
										
											2022-05-10 21:38:15 +02:00
+								                + (localization != null ? localization : Localization.DEFAULT).getCountryCode()
-												[YouTube] Update again hardcoded client versions and update mobile user agents

Also provide ability to get mobile user-agents used for mobile InnerTube requests and deduplicate related code.

											
										
										
											2022-03-15 19:01:24 +01:00
+								                + ") gzip";
 								    }
 								    /**
 								     * Get the user-agent string used as the user-agent for InnerTube requests with the iOS
 								     * client.
 								     *
-												[YouTube] Update client versions and use a real version for the iOS client

The iOS version can be got easily in fact, by looking at the What's New section of the App Store' app page.

											
										
										
											2022-08-08 20:12:32 +02:00
+								     * <p>
-												[YouTube] Update again hardcoded client versions and update mobile user agents

Also provide ability to get mobile user-agents used for mobile InnerTube requests and deduplicate related code.

											
										
										
											2022-03-15 19:01:24 +01:00
+								     * If the {@link Localization} provided is {@code null}, fallbacks to
 								     * {@link Localization#DEFAULT the default one}.
-												[YouTube] Update client versions and use a real version for the iOS client

The iOS version can be got easily in fact, by looking at the What's New section of the App Store' app page.

											
										
										
											2022-08-08 20:12:32 +02:00
+								     * </p>
-												[YouTube] Update again hardcoded client versions and update mobile user agents

Also provide ability to get mobile user-agents used for mobile InnerTube requests and deduplicate related code.

											
										
										
											2022-03-15 19:01:24 +01:00
+								     *
 								     * @param localization the {@link Localization} to set in the user-agent
 								     * @return the iOS user-agent used for InnerTube requests with the iOS client, depending on the
 								     * {@link Localization} provided
 								     */
 								    @Nonnull
 								    public static String getIosUserAgent(@Nullable final Localization localization) {
-												[YouTube] Remove usage of API keys for InnerTube requests, bump versions

The API keys are not used anymore by official clients in almost all cases
(still used by the Android app until it gets a configuration) for all requests
we made.

Clients and device OS versions have been bumped to their latest stable version
known.

Methods and fields related to API keys have been renamed or deleted if they're
no longer relevant.

											
										
										
											2024-04-04 23:33:41 +02:00
+								        // Spoofing an iPhone 15 running iOS 17.4.1 with the hardcoded version of the iOS app
-												[YouTube] Update client versions and use a real version for the iOS client

The iOS version can be got easily in fact, by looking at the What's New section of the App Store' app page.

											
										
										
											2022-08-08 20:12:32 +02:00
+								        return "com.google.ios.youtube/" + IOS_YOUTUBE_CLIENT_VERSION
-												[YouTube] Create constants for client names/versions

											
										
										
											2024-04-20 11:43:54 +02:00
+								                + "(" + IOS_DEVICE_MODEL + "; U; CPU iOS "
 								                + IOS_USER_AGENT_VERSION + " like Mac OS X; "
-												Apply requested changes in YoutubeParsingHelper

											
										
										
											2022-05-10 21:38:15 +02:00
+								                + (localization != null ? localization : Localization.DEFAULT).getCountryCode()
-												[YouTube] Update again hardcoded client versions and update mobile user agents

Also provide ability to get mobile user-agents used for mobile InnerTube requests and deduplicate related code.

											
										
										
											2022-03-15 19:01:24 +01:00
+								                + ")";
 								    }
-												Use immutable Map factory methods.

											
										
										
											2022-11-12 05:01:05 +01:00
+								    /**
 								     * Returns a {@link Map} containing the required YouTube Music headers.
 								     */
-												[YouTube] Send Content-Type header in all POST requests

This header was not sent partially before and was added and guessed by OkHttp. This can create issues when using other HTTP clients than OkHttp, such as Cronet.

Some code in the modified classes has been improved and / or deduplicated, and usages of the UTF_8 constant of the Utils class has been replaced by StandardCharsets.UTF_8 where possible.

Note that this header has been not added in except in YoutubeDashManifestCreatorsUtils, as an empty body is sent in the POST requests made by this class.

											
										
										
											2022-06-18 16:07:32 +02:00
+								    @Nonnull
 								    public static Map<String, List<String>> getYoutubeMusicHeaders() {
-												Use immutable Map factory methods.

											
										
										
											2022-11-12 05:01:05 +01:00
+								        final var headers = new HashMap<>(getOriginReferrerHeaders(YOUTUBE_MUSIC_URL));
-												[YouTube] Create constants for client names/versions

											
										
										
											2024-04-20 11:43:54 +02:00
+								        headers.putAll(getClientHeaders(YOUTUBE_MUSIC_CLIENT_ID,
 								                youtubeMusicClientVersion));
-												[YouTube] Send Content-Type header in all POST requests

This header was not sent partially before and was added and guessed by OkHttp. This can create issues when using other HTTP clients than OkHttp, such as Cronet.

Some code in the modified classes has been improved and / or deduplicated, and usages of the UTF_8 constant of the Utils class has been replaced by StandardCharsets.UTF_8 where possible.

Note that this header has been not added in except in YoutubeDashManifestCreatorsUtils, as an empty body is sent in the POST requests made by this class.

											
										
										
											2022-06-18 16:07:32 +02:00
+								        return headers;
 								    }
-												[YouTube] Set CONSENT cookie


											
										
										
											2021-04-07 12:25:59 +02:00
+								    /**
-												Update extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java
											
										
										
											2023-01-02 18:11:03 +01:00
+								     * Returns a {@link Map} containing the required YouTube headers, including the
 								     * <code>CONSENT</code> cookie to prevent redirects to <code>consent.youtube.com</code>
-												[YouTube] Set CONSENT cookie


											
										
										
											2021-04-07 12:25:59 +02:00
+								     */
-												Use immutable Map factory methods.

											
										
										
											2022-11-12 05:01:05 +01:00
+								    public static Map<String, List<String>> getYouTubeHeaders()
 								            throws ExtractionException, IOException {
 								        final var headers = getClientInfoHeaders();
 								        headers.put("Cookie", List.of(generateConsentCookie()));
 								        return headers;
-												[YouTube] Set CONSENT cookie


											
										
										
											2021-04-07 12:25:59 +02:00
+								    }
 								    /**
-												Use immutable Map factory methods.

											
										
										
											2022-11-12 05:01:05 +01:00
+								     * Returns a {@link Map} containing the {@code X-YouTube-Client-Name},
 								     * {@code X-YouTube-Client-Version}, {@code Origin}, and {@code Referer} headers.
-												[YouTube] Set CONSENT cookie


											
										
										
											2021-04-07 12:25:59 +02:00
+								     */
-												Use immutable Map factory methods.

											
										
										
											2022-11-12 05:01:05 +01:00
+								    public static Map<String, List<String>> getClientInfoHeaders()
 								            throws ExtractionException, IOException {
 								        final var headers = new HashMap<>(getOriginReferrerHeaders("https://www.youtube.com"));
-												[YouTube] Create constants for client names/versions

											
										
										
											2024-04-20 11:43:54 +02:00
+								        headers.putAll(getClientHeaders(WEB_CLIENT_ID, getClientVersion()));
-												Use immutable Map factory methods.

											
										
										
											2022-11-12 05:01:05 +01:00
+								        return headers;
-												[YouTube] Set CONSENT cookie


											
										
										
											2021-04-07 12:25:59 +02:00
+								    }
-												Use Collections.singletonMap().

											
										
										
											2022-07-27 03:26:02 +02:00
+								    /**
-												Use immutable Map factory methods.

											
										
										
											2022-11-12 05:01:05 +01:00
+								     * Returns an unmodifiable {@link Map} containing the {@code Origin} and {@code Referer}
 								     * headers set to the given URL.
 								     *
 								     * @param url The URL to be set as the origin and referrer.
-												Use Collections.singletonMap().

											
										
										
											2022-07-27 03:26:02 +02:00
+								     */
-												Use immutable Map factory methods.

											
										
										
											2022-11-12 05:01:05 +01:00
+								    private static Map<String, List<String>> getOriginReferrerHeaders(@Nonnull final String url) {
 								        final var urlList = List.of(url);
 								        return Map.of("Origin", urlList, "Referer", urlList);
-												Use Collections.singletonMap().

											
										
										
											2022-07-27 03:26:02 +02:00
+								    }
-												[YouTube] Set CONSENT cookie


											
										
										
											2021-04-07 12:25:59 +02:00
+								    /**
-												Use immutable Map factory methods.

											
										
										
											2022-11-12 05:01:05 +01:00
+								     * Returns an unmodifiable {@link Map} containing the {@code X-YouTube-Client-Name} and
 								     * {@code X-YouTube-Client-Version} headers.
 								     *
 								     * @param name The X-YouTube-Client-Name value.
 								     * @param version X-YouTube-Client-Version value.
-												[YouTube] Set CONSENT cookie


											
										
										
											2021-04-07 12:25:59 +02:00
+								     */
-												Use immutable Map factory methods.

											
										
										
											2022-11-12 05:01:05 +01:00
+								    private static Map<String, List<String>> getClientHeaders(@Nonnull final String name,
 								                                                              @Nonnull final String version) {
 								        return Map.of("X-YouTube-Client-Name", List.of(name),
 								                "X-YouTube-Client-Version", List.of(version));
 								    }
 								    /**
 								     * Create a map with the required cookie header.
 								     * @return A singleton map containing the header.
 								     */
 								    public static Map<String, List<String>> getCookieHeader() {
 								        return Map.of("Cookie", List.of(generateConsentCookie()));
-												[YouTube] Set CONSENT cookie


											
										
										
											2021-04-07 12:25:59 +02:00
+								    }
-												Annotate YoutubeParsingHelper methods with Nonnull when needed

											
										
										
											2021-06-11 13:34:23 +02:00
+								    @Nonnull
-												Add method to inject Random into YoutubeParsingHelper and use in tests

											
										
										
											2021-04-08 16:36:55 +02:00
+								    public static String generateConsentCookie() {
-												[YouTube] Switch to new consent cookie

Also move the documentation of the consent in its setter method in order to be
accessible publicly and improve it.

											
										
										
											2023-10-07 19:36:02 +02:00
+								        return "SOCS=" + (isConsentAccepted()
 								                // CAISAiAD means that the user configured manually cookies YouTube, regardless of
 								                // the consent values
 								                // This value surprisingly allows to extract mixes and some YouTube Music playlists
 								                // in the same way when a user allows all cookies
 								                ? "CAISAiAD"
 								                // CAE= means that the user rejected all non-necessary cookies with the "Reject
 								                // all" button on the consent page
 								                : "CAE=");
-												Add method to inject Random into YoutubeParsingHelper and use in tests

											
										
										
											2021-04-08 16:36:55 +02:00
+								    }
-												Annotate YoutubeParsingHelper methods with Nonnull when needed

											
										
										
											2021-06-11 13:34:23 +02:00
+								    public static String extractCookieValue(final String cookieName,
 								                                            @Nonnull final Response response) {
-												[YouTube] Set CONSENT cookie


											
										
										
											2021-04-07 12:25:59 +02:00
+								        final List<String> cookies = response.responseHeaders().get("set-cookie");
-												[YouTube] Fix music mixes in some countries

											
										
										
											2022-03-01 23:02:56 +01:00
+								        if (cookies == null) {
-												Remove EMPTY_STRING.

											
										
										
											2022-08-15 05:49:40 +02:00
+								            return "";
-												[YouTube] Fix music mixes in some countries

											
										
										
											2022-03-01 23:02:56 +01:00
+								        }
-												Remove EMPTY_STRING.

											
										
										
											2022-08-15 05:49:40 +02:00
+								        String result = "";
-												[YouTube] Set CONSENT cookie


											
										
										
											2021-04-07 12:25:59 +02:00
+								        for (final String cookie : cookies) {
-												[YouTube] Fix music mixes in some countries

											
										
										
											2022-03-01 23:02:56 +01:00
+								            final int startIndex = cookie.indexOf(cookieName);
-												[YouTube] Set CONSENT cookie


											
										
										
											2021-04-07 12:25:59 +02:00
+								            if (startIndex != -1) {
 								                result = cookie.substring(startIndex + cookieName.length() + "=".length(),
 								                        cookie.indexOf(";", startIndex));
 								            }
 								        }
 								        return result;
 								    }
-												[YouTube] Detect deleted/nonexistent/invalid channels and playlists

- Added tests for these cases.

											
										
										
											2020-03-01 01:52:25 +01:00
+								    /**
 								     * Shared alert detection function, multiple endpoints return the error similarly structured.
 								     * <p>
 								     * Will check if the object has an alert of the type "ERROR".
-												Extract some code to getValidResponseBody()

											
										
										
											2020-04-01 16:01:21 +02:00
+								     * </p>
-												[YouTube] Detect deleted/nonexistent/invalid channels and playlists

- Added tests for these cases.

											
										
										
											2020-03-01 01:52:25 +01:00
+								     *
 								     * @param initialData the object which will be checked if an alert is present
 								     * @throws ContentNotAvailableException if an alert is detected
 								     */
-												Annotate YoutubeParsingHelper methods with Nonnull when needed

											
										
										
											2021-06-11 13:34:23 +02:00
+								    public static void defaultAlertsCheck(@Nonnull final JsonObject initialData)
 								            throws ParsingException {
-												[YouTube] Detect deleted/nonexistent/invalid channels and playlists

- Added tests for these cases.

											
										
										
											2020-03-01 01:52:25 +01:00
+								        final JsonArray alerts = initialData.getArray("alerts");
-												refactor: add Utils.isNullOrEmpty()

											
										
										
											2020-04-15 18:49:58 +02:00
+								        if (!isNullOrEmpty(alerts)) {
-												[YouTube] Detect deleted/nonexistent/invalid channels and playlists

- Added tests for these cases.

											
										
										
											2020-03-01 01:52:25 +01:00
+								            final JsonObject alertRenderer = alerts.getObject(0).getObject("alertRenderer");
-												Fix NPE in defaultAlertsCheck()

											
										
										
											2020-05-03 10:28:45 +02:00
+								            final String alertText = getTextFromObject(alertRenderer.getObject("text"));
-												Remove EMPTY_STRING.

											
										
										
											2022-08-15 05:49:40 +02:00
+								            final String alertType = alertRenderer.getString("type", "");
-												[YouTube] Detect deleted/nonexistent/invalid channels and playlists

- Added tests for these cases.

											
										
										
											2020-03-01 01:52:25 +01:00
+								            if (alertType.equalsIgnoreCase("ERROR")) {
-												[YouTube] Detect new account termination messages

											
										
										
											2024-03-20 14:49:19 +01:00
+								                if (alertText != null
 								                        && (alertText.contains("This account has been terminated")
 								                        || alertText.contains("This channel was removed"))) {
 								                    if (alertText.matches(".*violat(ed|ion|ing).*")
-												Detect channels which have been terminated due to copyright infringement


											
										
										
											2021-03-23 00:15:21 +01:00
+								                            || alertText.contains("infringement")) {
-												Annotate YoutubeParsingHelper methods with Nonnull when needed

											
										
										
											2021-06-11 13:34:23 +02:00
+								                        // Possible error messages:
-												[YouTube] Fix checkstyle issues

											
										
										
											2022-03-18 15:09:06 +01:00
+								                        // "This account has been terminated for a violation of YouTube's Terms of
 								                        //     Service."
 								                        // "This account has been terminated due to multiple or severe violations of
 								                        //     YouTube's policy prohibiting hate speech."
 								                        // "This account has been terminated due to multiple or severe violations of
 								                        //     YouTube's policy prohibiting content designed to harass, bully or
 								                        //     threaten."
 								                        // "This account has been terminated due to multiple or severe violations
 								                        //     of YouTube's policy against spam, deceptive practices and misleading
 								                        //     content or other Terms of Service violations."
 								                        // "This account has been terminated due to multiple or severe violations of
 								                        //     YouTube's policy on nudity or sexual content."
 								                        // "This account has been terminated for violating YouTube's Community
 								                        //     Guidelines."
 								                        // "This account has been terminated because we received multiple
 								                        //     third-party claims of copyright infringement regarding material that
 								                        //     the user posted."
 								                        // "This account has been terminated because it is linked to an account that
 								                        //     received multiple third-party claims of copyright infringement."
-												[YouTube] Detect new account termination messages

											
										
										
											2024-03-20 14:49:19 +01:00
+								                        // "This channel was removed because it violated our Community Guidelines."
-												[YouTube] Fix checkstyle issues

											
										
										
											2022-03-18 15:09:06 +01:00
+								                        throw new AccountTerminatedException(alertText,
 								                                AccountTerminatedException.Reason.VIOLATION);
-												Add AccountTerminatedException for better error handling of terminated channels


											
										
										
											2021-03-22 10:35:05 +01:00
+								                    } else {
 								                        throw new AccountTerminatedException(alertText);
 								                    }
 								                }
-												[YouTube] Detect deleted/nonexistent/invalid channels and playlists

- Added tests for these cases.

											
										
										
											2020-03-01 01:52:25 +01:00
+								                throw new ContentNotAvailableException("Got error: \"" + alertText + "\"");
 								            }
 								        }
 								    }
-												Extract stream and search meta info for YouTube

Add method to extract Google webcache URLs.

											
										
										
											2020-12-20 19:54:12 +01:00
 								    /**
 								     * Sometimes, YouTube provides URLs which use Google's cache. They look like
 								     * {@code https://webcache.googleusercontent.com/search?q=cache:CACHED_URL}
-												use only one constant UTF-8

											
										
										
											2021-02-07 22:12:22 +01:00
+								     *
-												Extract stream and search meta info for YouTube

Add method to extract Google webcache URLs.

											
										
										
											2020-12-20 19:54:12 +01:00
+								     * @param url the URL which might refer to the Google's webcache
 								     * @return the URL which is referring to the original site
 								     */
 								    public static String extractCachedUrlIfNeeded(final String url) {
 								        if (url == null) {
 								            return null;
 								        }
 								        if (url.contains("webcache.googleusercontent.com")) {
 								            return url.split("cache:")[1];
 								        }
 								        return url;
 								    }
-												Add uploader verified by service extraction

											
										
										
											2021-01-22 01:44:58 +01:00
 								    public static boolean isVerified(final JsonArray badges) {
 								        if (Utils.isNullOrEmpty(badges)) {
 								            return false;
 								        }
-												[YouTube] Fix checkstyle issues

											
										
										
											2022-03-18 15:09:06 +01:00
+								        for (final Object badge : badges) {
-												Add uploader verified by service extraction

											
										
										
											2021-01-22 01:44:58 +01:00
+								            final String style = ((JsonObject) badge).getObject("metadataBadgeRenderer")
 								                    .getString("style");
 								            if (style != null && (style.equals("BADGE_STYLE_TYPE_VERIFIED")
 								                    || style.equals("BADGE_STYLE_TYPE_VERIFIED_ARTIST"))) {
 								                return true;
 								            }
 								        }
 								        return false;
 								    }
-												[youtube] improve comments extraction performance

- do not parse responseBody twice for continuation
instead try to get commentsTokenInside with the new pattern ("sectionListRenderer")
and try again with the old pattern ("commentSectionRenderer") on failure
- do not unescape responseBody multiple times
   -> parse responseBody less times

											
										
										
											2021-03-05 13:33:25 +01:00
-												[YouTube] Add the cpn param to playback requests and try to spoof better the Android client

The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.

For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.

Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).

For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.

This commit also fixes a small bug with the Android User-Agent string.

Some code improvements have been also made.

											
										
										
											2021-12-22 17:55:41 +01:00
+								    /**
 								     * Generate a content playback nonce (also called {@code cpn}), sent by YouTube clients in
 								     * playback requests (and also for some clients, in the player request body).
 								     *
 								     * @return a content playback nonce string
 								     */
 								    @Nonnull
 								    public static String generateContentPlaybackNonce() {
-												Improve tests and randomness

- Use the existing RNG inside YoutubeParsingHelper
- Deduplicated test-setup for YouTube tests
- Minor improvements

											
										
										
											2022-02-07 21:23:38 +01:00
+								        return RandomStringFromAlphabetGenerator.generate(
 								                CONTENT_PLAYBACK_NONCE_ALPHABET, 16, numberGenerator);
-												[YouTube] Add the cpn param to playback requests and try to spoof better the Android client

The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.

For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.

Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).

For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.

This commit also fixes a small bug with the Android User-Agent string.

Some code improvements have been also made.

											
										
										
											2021-12-22 17:55:41 +01:00
+								    }
 								    /**
 								     * Try to generate a {@code t} parameter, sent by mobile clients as a query of the player
 								     * request.
 								     *
 								     * <p>
 								     * Some researches needs to be done to know how this parameter, unique at each request, is
 								     * generated.
 								     * </p>
 								     *
 								     * @return a 12 characters string to try to reproduce the {@code} parameter
 								     */
 								    @Nonnull
 								    public static String generateTParameter() {
-												Improve tests and randomness

- Use the existing RNG inside YoutubeParsingHelper
- Deduplicated test-setup for YouTube tests
- Minor improvements

											
										
										
											2022-02-07 21:23:38 +01:00
+								        return RandomStringFromAlphabetGenerator.generate(
 								                CONTENT_PLAYBACK_NONCE_ALPHABET, 12, numberGenerator);
-												[YouTube] Add the cpn param to playback requests and try to spoof better the Android client

The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.

For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.

Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).

For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.

This commit also fixes a small bug with the Android User-Agent string.

Some code improvements have been also made.

											
										
										
											2021-12-22 17:55:41 +01:00
+								    }
-												Apply changes in YoutubeStreamExtractor

Extract post live DVR streams as post live streams instead of live streams.

A new class has been in order to improve code: ItagInfo, which stores an itag, the content (URL) extracted and if its an URL or not.
A functional interface has been added in order to abstract the stream building: StreamBuilderHelper.
Also add the cver parameter added by the desktop web client on the corresponding streams (a new method has been added in YoutubeParsingHelper to check this and another for Android streams).

Some code in these classes has been also refactored/improved/optimized.

											
										
										
											2022-03-06 20:10:11 +01:00
 								    /**
-												Improve documentation and adress most of the requested changes

Also fix some issues in several places, in the code and the documentation.

											
										
										
											2022-03-15 11:19:13 +01:00
+								     * Check if the streaming URL is from the YouTube {@code WEB} client.
-												Apply changes in YoutubeStreamExtractor

Extract post live DVR streams as post live streams instead of live streams.

A new class has been in order to improve code: ItagInfo, which stores an itag, the content (URL) extracted and if its an URL or not.
A functional interface has been added in order to abstract the stream building: StreamBuilderHelper.
Also add the cver parameter added by the desktop web client on the corresponding streams (a new method has been added in YoutubeParsingHelper to check this and another for Android streams).

Some code in these classes has been also refactored/improved/optimized.

											
										
										
											2022-03-06 20:10:11 +01:00
+								     *
-												Improve documentation and adress most of the requested changes

Also fix some issues in several places, in the code and the documentation.

											
										
										
											2022-03-15 11:19:13 +01:00
+								     * @param url the streaming URL to be checked.
-												Apply changes in YoutubeStreamExtractor

Extract post live DVR streams as post live streams instead of live streams.

A new class has been in order to improve code: ItagInfo, which stores an itag, the content (URL) extracted and if its an URL or not.
A functional interface has been added in order to abstract the stream building: StreamBuilderHelper.
Also add the cver parameter added by the desktop web client on the corresponding streams (a new method has been added in YoutubeParsingHelper to check this and another for Android streams).

Some code in these classes has been also refactored/improved/optimized.

											
										
										
											2022-03-06 20:10:11 +01:00
+								     * @return true if it's a {@code WEB} streaming URL, false otherwise
 								     */
 								    public static boolean isWebStreamingUrl(@Nonnull final String url) {
 								        return Parser.isMatch(C_WEB_PATTERN, url);
 								    }
 								    /**
 								     * Check if the streaming URL is a URL from the YouTube {@code TVHTML5_SIMPLY_EMBEDDED_PLAYER}
 								     * client.
 								     *
 								     * @param url the streaming URL on which check if it's a {@code TVHTML5_SIMPLY_EMBEDDED_PLAYER}
 								     *            streaming URL.
 								     * @return true if it's a {@code TVHTML5_SIMPLY_EMBEDDED_PLAYER} streaming URL, false otherwise
 								     */
 								    public static boolean isTvHtml5SimplyEmbeddedPlayerStreamingUrl(@Nonnull final String url) {
 								        return Parser.isMatch(C_TVHTML5_SIMPLY_EMBEDDED_PLAYER_PATTERN, url);
 								    }
 								    /**
 								     * Check if the streaming URL is a URL from the YouTube {@code ANDROID} client.
 								     *
-												Improve documentation and adress most of the requested changes

Also fix some issues in several places, in the code and the documentation.

											
										
										
											2022-03-15 11:19:13 +01:00
+								     * @param url the streaming URL to be checked.
-												Apply changes in YoutubeStreamExtractor

Extract post live DVR streams as post live streams instead of live streams.

A new class has been in order to improve code: ItagInfo, which stores an itag, the content (URL) extracted and if its an URL or not.
A functional interface has been added in order to abstract the stream building: StreamBuilderHelper.
Also add the cver parameter added by the desktop web client on the corresponding streams (a new method has been added in YoutubeParsingHelper to check this and another for Android streams).

Some code in these classes has been also refactored/improved/optimized.

											
										
										
											2022-03-06 20:10:11 +01:00
+								     * @return true if it's a {@code ANDROID} streaming URL, false otherwise
 								     */
 								    public static boolean isAndroidStreamingUrl(@Nonnull final String url) {
 								        return Parser.isMatch(C_ANDROID_PATTERN, url);
 								    }
 								    /**
 								     * Check if the streaming URL is a URL from the YouTube {@code IOS} client.
 								     *
 								     * @param url the streaming URL on which check if it's a {@code IOS} streaming URL.
 								     * @return true if it's a {@code IOS} streaming URL, false otherwise
 								     */
 								    public static boolean isIosStreamingUrl(@Nonnull final String url) {
 								        return Parser.isMatch(C_IOS_PATTERN, url);
 								    }
-												Fixed all YTMixPlaylists

Added option to choose if you want to consent or not - currently this is done by a static variable in ``YoutubeParsingHelper`` - may not be the best long-term solution but for now the tests work again (in EU countries) 🥳

											
										
										
											2022-07-30 16:05:52 +02:00
-												Improved consent cookie related constants and documentation

											
										
										
											2022-08-21 18:27:31 +02:00
+								    /**
-												[YouTube] Switch to new consent cookie

Also move the documentation of the consent in its setter method in order to be
accessible publicly and improve it.

											
										
										
											2023-10-07 19:36:02 +02:00
+								     * Determines how the consent cookie that is required for YouTube, {@code SOCS}, will be
 								     * generated.
 								     *
 								     * <ul>
 								     *   <li>{@code false} (the default value) will use {@code CAE=};</li>
 								     *   <li>{@code true} will use {@code CAISAiAD}.</li>
 								     * </ul>
 								     *
 								     * <p>
 								     * Setting this value to {@code true} is needed to extract mixes and some YouTube Music
 								     * playlists in some countries such as the EU ones.
 								     * </p>
-												Improved consent cookie related constants and documentation

											
										
										
											2022-08-21 18:27:31 +02:00
+								     */
-												Fixed all YTMixPlaylists

Added option to choose if you want to consent or not - currently this is done by a static variable in ``YoutubeParsingHelper`` - may not be the best long-term solution but for now the tests work again (in EU countries) 🥳

											
										
										
											2022-07-30 16:05:52 +02:00
+								    public static void setConsentAccepted(final boolean accepted) {
 								        consentAccepted = accepted;
 								    }
-												Improved consent cookie related constants and documentation

											
										
										
											2022-08-21 18:27:31 +02:00
+								    /**
-												[YouTube] Switch to new consent cookie

Also move the documentation of the consent in its setter method in order to be
accessible publicly and improve it.

											
										
										
											2023-10-07 19:36:02 +02:00
+								     * Get the value of the consent's acceptance.
 								     *
 								     * @see #setConsentAccepted(boolean)
 								     * @return the consent's acceptance value
-												Improved consent cookie related constants and documentation

											
										
										
											2022-08-21 18:27:31 +02:00
+								     */
-												Fixed all YTMixPlaylists

Added option to choose if you want to consent or not - currently this is done by a static variable in ``YoutubeParsingHelper`` - may not be the best long-term solution but for now the tests work again (in EU countries) 🥳

											
										
										
											2022-07-30 16:05:52 +02:00
+								    public static boolean isConsentAccepted() {
 								        return consentAccepted;
 								    }
-												Add track types to audio streams (#1041)


											
										
										
											2023-03-28 00:02:20 +02:00
 								    /**
 								     * Extract the audio track type from a YouTube stream URL.
 								     * <p>
 								     * The track type is parsed from the {@code xtags} URL parameter
 								     * (Example: {@code acont=original:lang=en}).
 								     * </p>
 								     * @param streamUrl YouTube stream URL
 								     * @return {@link AudioTrackType} or {@code null} if no track type was found
 								     */
 								    @Nullable
 								    public static AudioTrackType extractAudioTrackType(final String streamUrl) {
 								        final String xtags;
 								        try {
 								            xtags = Utils.getQueryValue(new URL(streamUrl), "xtags");
 								        } catch (final MalformedURLException e) {
 								            return null;
 								        }
 								        if (xtags == null) {
 								            return null;
 								        }
 								        String atype = null;
 								        for (final String param : xtags.split(":")) {
 								            final String[] kv = param.split("=", 2);
 								            if (kv.length > 1 && kv[0].equals("acont")) {
 								                atype = kv[1];
 								                break;
 								            }
 								        }
 								        if (atype == null) {
 								            return null;
 								        }
 								        switch (atype) {
 								            case "original":
 								                return AudioTrackType.ORIGINAL;
 								            case "dubbed":
 								                return AudioTrackType.DUBBED;
 								            case "descriptive":
 								                return AudioTrackType.DESCRIPTIVE;
 								            default:
 								                return null;
 								        }
 								    }
-												initial commit

											
										
										
											2017-03-01 18:47:52 +01:00
+								}