2022-02-07 21:23:38 +01:00
|
|
|
|
/*
|
|
|
|
|
* Created by Christian Schabesberger on 02.03.16.
|
|
|
|
|
*
|
2023-09-23 00:10:15 +02:00
|
|
|
|
* Copyright (C) 2016 Christian Schabesberger <chris.schabesberger@mailbox.org>
|
2022-02-07 21:23:38 +01:00
|
|
|
|
* YoutubeParsingHelper.java is part of NewPipe Extractor.
|
|
|
|
|
*
|
|
|
|
|
* NewPipe Extractor is free software: you can redistribute it and/or modify
|
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
|
* (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
* NewPipe Extractor is distributed in the hope that it will be useful,
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
*
|
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
|
* along with NewPipe Extractor. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
|
*/
|
|
|
|
|
|
2020-04-10 10:51:05 +02:00
|
|
|
|
package org.schabi.newpipe.extractor.services.youtube;
|
2017-03-01 18:47:52 +01:00
|
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
|
import static org.schabi.newpipe.extractor.NewPipe.getDownloader;
|
|
|
|
|
import static org.schabi.newpipe.extractor.utils.Utils.HTTP;
|
|
|
|
|
import static org.schabi.newpipe.extractor.utils.Utils.HTTPS;
|
2022-01-09 22:49:37 +01:00
|
|
|
|
import static org.schabi.newpipe.extractor.utils.Utils.getStringResultFromRegexArray;
|
2022-03-18 15:09:06 +01:00
|
|
|
|
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
|
2022-07-27 03:26:02 +02:00
|
|
|
|
|
2021-03-03 19:49:26 +01:00
|
|
|
|
import com.grack.nanojson.JsonArray;
|
2021-04-02 21:34:47 +02:00
|
|
|
|
import com.grack.nanojson.JsonBuilder;
|
2021-03-03 19:49:26 +01:00
|
|
|
|
import com.grack.nanojson.JsonObject;
|
|
|
|
|
import com.grack.nanojson.JsonParser;
|
|
|
|
|
import com.grack.nanojson.JsonParserException;
|
|
|
|
|
import com.grack.nanojson.JsonWriter;
|
2022-07-22 17:28:39 +02:00
|
|
|
|
|
2023-12-07 20:47:02 +01:00
|
|
|
|
import org.jsoup.nodes.Entities;
|
2022-07-22 17:28:39 +02:00
|
|
|
|
import org.schabi.newpipe.extractor.Image;
|
|
|
|
|
import org.schabi.newpipe.extractor.Image.ResolutionLevel;
|
2019-04-28 22:03:16 +02:00
|
|
|
|
import org.schabi.newpipe.extractor.downloader.Response;
|
2022-03-18 15:09:06 +01:00
|
|
|
|
import org.schabi.newpipe.extractor.exceptions.AccountTerminatedException;
|
|
|
|
|
import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
|
|
|
|
|
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
|
|
|
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
|
|
|
|
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
2021-04-30 19:06:56 +02:00
|
|
|
|
import org.schabi.newpipe.extractor.localization.ContentCountry;
|
2020-02-29 16:55:07 +01:00
|
|
|
|
import org.schabi.newpipe.extractor.localization.Localization;
|
2022-02-17 17:19:54 +01:00
|
|
|
|
import org.schabi.newpipe.extractor.playlist.PlaylistInfo;
|
2023-03-28 00:02:20 +02:00
|
|
|
|
import org.schabi.newpipe.extractor.stream.AudioTrackType;
|
2021-03-04 18:58:51 +01:00
|
|
|
|
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
2020-02-22 23:51:02 +01:00
|
|
|
|
import org.schabi.newpipe.extractor.utils.Parser;
|
2022-02-07 21:23:38 +01:00
|
|
|
|
import org.schabi.newpipe.extractor.utils.RandomStringFromAlphabetGenerator;
|
2020-02-28 09:36:33 +01:00
|
|
|
|
import org.schabi.newpipe.extractor.utils.Utils;
|
2017-03-01 18:47:52 +01:00
|
|
|
|
|
2020-02-29 16:42:04 +01:00
|
|
|
|
import java.io.IOException;
|
2020-02-27 17:39:23 +01:00
|
|
|
|
import java.io.UnsupportedEncodingException;
|
2020-04-01 16:01:21 +02:00
|
|
|
|
import java.net.MalformedURLException;
|
2019-01-13 12:52:07 +01:00
|
|
|
|
import java.net.URL;
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
|
import java.nio.charset.StandardCharsets;
|
2020-11-03 11:54:46 +01:00
|
|
|
|
import java.time.LocalDate;
|
2020-10-18 05:48:14 +02:00
|
|
|
|
import java.time.OffsetDateTime;
|
2020-11-03 11:54:46 +01:00
|
|
|
|
import java.time.ZoneOffset;
|
2020-10-18 05:48:14 +02:00
|
|
|
|
import java.time.format.DateTimeParseException;
|
2022-03-18 15:09:06 +01:00
|
|
|
|
import java.util.HashMap;
|
|
|
|
|
import java.util.List;
|
2022-08-09 05:44:34 +02:00
|
|
|
|
import java.util.Locale;
|
2022-03-18 15:09:06 +01:00
|
|
|
|
import java.util.Map;
|
|
|
|
|
import java.util.Optional;
|
|
|
|
|
import java.util.Random;
|
2022-08-09 05:44:34 +02:00
|
|
|
|
import java.util.Set;
|
2022-03-06 20:10:11 +01:00
|
|
|
|
import java.util.regex.Pattern;
|
2022-07-22 17:28:39 +02:00
|
|
|
|
import java.util.stream.Collectors;
|
2022-08-08 19:36:19 +02:00
|
|
|
|
import java.util.stream.Stream;
|
2021-01-17 18:48:16 +01:00
|
|
|
|
|
|
|
|
|
import javax.annotation.Nonnull;
|
|
|
|
|
import javax.annotation.Nullable;
|
2020-02-26 15:22:59 +01:00
|
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
|
public final class YoutubeParsingHelper {
|
2017-03-01 18:47:52 +01:00
|
|
|
|
|
|
|
|
|
private YoutubeParsingHelper() {
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-26 20:02:35 +01:00
|
|
|
|
/**
|
[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more
Use the TV embedded client technique to get streams of embeddable age-restricted videos.
This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.
The outdated code for these contents has been removed.
Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.
Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.
2022-04-02 19:06:36 +02:00
|
|
|
|
* The base URL of requests of the {@code WEB} clients to the InnerTube internal API.
|
2022-03-26 20:02:35 +01:00
|
|
|
|
*/
|
2021-05-30 17:23:51 +02:00
|
|
|
|
public static final String YOUTUBEI_V1_URL = "https://www.youtube.com/youtubei/v1/";
|
2022-03-15 11:46:12 +01:00
|
|
|
|
|
[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more
Use the TV embedded client technique to get streams of embeddable age-restricted videos.
This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.
The outdated code for these contents has been removed.
Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.
Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.
2022-04-02 19:06:36 +02:00
|
|
|
|
/**
|
|
|
|
|
* The base URL of requests of non-web clients to the InnerTube internal API.
|
|
|
|
|
*/
|
|
|
|
|
public static final String YOUTUBEI_V1_GAPIS_URL =
|
|
|
|
|
"https://youtubei.googleapis.com/youtubei/v1/";
|
|
|
|
|
|
2022-11-12 05:01:05 +01:00
|
|
|
|
/**
|
|
|
|
|
* The base URL of YouTube Music.
|
|
|
|
|
*/
|
|
|
|
|
private static final String YOUTUBE_MUSIC_URL = "https://music.youtube.com";
|
|
|
|
|
|
2022-03-15 11:46:12 +01:00
|
|
|
|
/**
|
|
|
|
|
* A parameter to disable pretty-printed response of InnerTube requests, to reduce response
|
|
|
|
|
* sizes.
|
|
|
|
|
*
|
|
|
|
|
* <p>
|
2024-04-04 23:33:41 +02:00
|
|
|
|
* Sent in query parameters of the requests.
|
2022-03-15 11:46:12 +01:00
|
|
|
|
* </p>
|
|
|
|
|
**/
|
2024-04-04 23:33:41 +02:00
|
|
|
|
public static final String DISABLE_PRETTY_PRINT_PARAMETER = "prettyPrint=false";
|
2022-03-26 20:02:35 +01:00
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* A parameter sent by official clients named {@code contentPlaybackNonce}.
|
|
|
|
|
*
|
|
|
|
|
* <p>
|
2024-04-04 23:33:41 +02:00
|
|
|
|
* It is sent by official clients on videoplayback requests and InnerTube player requests in
|
|
|
|
|
* most cases.
|
2022-03-26 20:02:35 +01:00
|
|
|
|
* </p>
|
|
|
|
|
*
|
|
|
|
|
* <p>
|
|
|
|
|
* It is composed of 16 characters which are generated from
|
|
|
|
|
* {@link #CONTENT_PLAYBACK_NONCE_ALPHABET this alphabet}, with the use of strong random
|
|
|
|
|
* values.
|
|
|
|
|
* </p>
|
|
|
|
|
*
|
|
|
|
|
* @see #generateContentPlaybackNonce()
|
|
|
|
|
*/
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
|
public static final String CPN = "cpn";
|
|
|
|
|
public static final String VIDEO_ID = "videoId";
|
2021-05-30 17:23:51 +02:00
|
|
|
|
|
[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more
Use the TV embedded client technique to get streams of embeddable age-restricted videos.
This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.
The outdated code for these contents has been removed.
Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.
Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.
2022-04-02 19:06:36 +02:00
|
|
|
|
/**
|
|
|
|
|
* A parameter sent by official clients named {@code contentCheckOk}.
|
|
|
|
|
*
|
|
|
|
|
* <p>
|
|
|
|
|
* Setting it to {@code true} allows us to get streaming data on videos with a warning about
|
|
|
|
|
* what the sensible content they contain.
|
|
|
|
|
* </p>
|
|
|
|
|
*/
|
|
|
|
|
public static final String CONTENT_CHECK_OK = "contentCheckOk";
|
|
|
|
|
|
|
|
|
|
/**
|
2022-08-08 20:12:32 +02:00
|
|
|
|
* A parameter which may be sent by official clients named {@code racyCheckOk}.
|
[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more
Use the TV embedded client technique to get streams of embeddable age-restricted videos.
This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.
The outdated code for these contents has been removed.
Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.
Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.
2022-04-02 19:06:36 +02:00
|
|
|
|
*
|
|
|
|
|
* <p>
|
|
|
|
|
* What this parameter does is not really known, but it seems to be linked to sensitive
|
|
|
|
|
* contents such as age-restricted content.
|
|
|
|
|
* </p>
|
|
|
|
|
*/
|
|
|
|
|
public static final String RACY_CHECK_OK = "racyCheckOk";
|
|
|
|
|
|
2024-04-20 11:43:54 +02:00
|
|
|
|
/**
|
|
|
|
|
* The hardcoded client ID used for InnerTube requests with the {@code WEB} client.
|
|
|
|
|
*/
|
|
|
|
|
private static final String WEB_CLIENT_ID = "1";
|
|
|
|
|
|
2022-03-26 20:02:35 +01:00
|
|
|
|
/**
|
|
|
|
|
* The client version for InnerTube requests with the {@code WEB} client, used as the last
|
|
|
|
|
* fallback if the extraction of the real one failed.
|
|
|
|
|
*/
|
2024-04-04 23:33:41 +02:00
|
|
|
|
private static final String HARDCODED_CLIENT_VERSION = "2.20240410.01.00";
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
|
|
2022-03-26 20:02:35 +01:00
|
|
|
|
/**
|
2022-08-08 20:12:32 +02:00
|
|
|
|
* The hardcoded client version of the Android app used for InnerTube requests with this
|
|
|
|
|
* client.
|
|
|
|
|
*
|
|
|
|
|
* <p>
|
|
|
|
|
* It can be extracted by getting the latest release version of the app in an APK repository
|
|
|
|
|
* such as <a href="https://www.apkmirror.com/apk/google-inc/youtube/">APKMirror</a>.
|
|
|
|
|
* </p>
|
2022-03-26 20:02:35 +01:00
|
|
|
|
*/
|
2024-04-04 23:33:41 +02:00
|
|
|
|
private static final String ANDROID_YOUTUBE_CLIENT_VERSION = "19.13.36";
|
2022-03-26 20:02:35 +01:00
|
|
|
|
|
|
|
|
|
/**
|
2024-04-04 23:33:41 +02:00
|
|
|
|
* The hardcoded client version of the iOS app used for InnerTube requests with this client.
|
2022-03-26 20:02:35 +01:00
|
|
|
|
*
|
|
|
|
|
* <p>
|
2022-08-08 20:12:32 +02:00
|
|
|
|
* It can be extracted by getting the latest release version of the app on
|
|
|
|
|
* <a href="https://apps.apple.com/us/app/youtube-watch-listen-stream/id544007664/">the App
|
|
|
|
|
* Store page of the YouTube app</a>, in the {@code What’s New} section.
|
2022-03-26 20:02:35 +01:00
|
|
|
|
* </p>
|
|
|
|
|
*/
|
2024-04-04 23:33:41 +02:00
|
|
|
|
private static final String IOS_YOUTUBE_CLIENT_VERSION = "19.14.3";
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
|
|
[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more
Use the TV embedded client technique to get streams of embeddable age-restricted videos.
This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.
The outdated code for these contents has been removed.
Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.
Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.
2022-04-02 19:06:36 +02:00
|
|
|
|
/**
|
2024-04-04 23:33:41 +02:00
|
|
|
|
* The hardcoded client version used for InnerTube requests with the TV HTML5 embed client.
|
2022-08-08 20:12:32 +02:00
|
|
|
|
*/
|
2024-04-04 23:33:41 +02:00
|
|
|
|
private static final String TVHTML5_SIMPLY_EMBED_CLIENT_VERSION = "2.0";
|
2022-08-08 20:12:32 +02:00
|
|
|
|
|
2024-04-20 11:43:54 +02:00
|
|
|
|
/**
|
|
|
|
|
* The hardcoded client ID used for InnerTube requests with the YouTube Music desktop client.
|
|
|
|
|
*/
|
|
|
|
|
private static final String YOUTUBE_MUSIC_CLIENT_ID = "67";
|
|
|
|
|
|
2022-08-08 20:12:32 +02:00
|
|
|
|
/**
|
2024-04-04 23:33:41 +02:00
|
|
|
|
* The hardcoded client version used for InnerTube requests with the YouTube Music desktop
|
|
|
|
|
* client.
|
[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more
Use the TV embedded client technique to get streams of embeddable age-restricted videos.
This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.
The outdated code for these contents has been removed.
Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.
Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.
2022-04-02 19:06:36 +02:00
|
|
|
|
*/
|
2024-04-04 23:33:41 +02:00
|
|
|
|
private static final String HARDCODED_YOUTUBE_MUSIC_CLIENT_VERSION = "1.20240403.01.00";
|
[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more
Use the TV embedded client technique to get streams of embeddable age-restricted videos.
This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.
The outdated code for these contents has been removed.
Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.
Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.
2022-04-02 19:06:36 +02:00
|
|
|
|
|
2020-02-26 15:22:59 +01:00
|
|
|
|
private static String clientVersion;
|
2020-07-26 12:00:56 +02:00
|
|
|
|
|
2024-04-04 23:33:41 +02:00
|
|
|
|
private static String youtubeMusicClientVersion;
|
2020-03-17 11:33:39 +01:00
|
|
|
|
|
2024-04-04 23:33:41 +02:00
|
|
|
|
private static boolean clientVersionExtracted = false;
|
2021-07-09 18:23:46 +02:00
|
|
|
|
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
|
2024-04-04 23:33:41 +02:00
|
|
|
|
private static Optional<Boolean> hardcodedClientVersionValid = Optional.empty();
|
2022-02-05 22:05:07 +01:00
|
|
|
|
|
2022-01-09 22:49:37 +01:00
|
|
|
|
private static final String[] INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES =
|
|
|
|
|
{"INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"",
|
|
|
|
|
"innertube_context_client_version\":\"([0-9\\.]+?)\"",
|
|
|
|
|
"client.version=([0-9\\.]+)"};
|
2022-02-05 22:05:07 +01:00
|
|
|
|
private static final String[] INITIAL_DATA_REGEXES =
|
|
|
|
|
{"window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});",
|
|
|
|
|
"var\\s*ytInitialData\\s*=\\s*(\\{.*?\\});"};
|
2021-04-12 18:24:32 +02:00
|
|
|
|
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
|
private static final String CONTENT_PLAYBACK_NONCE_ALPHABET =
|
|
|
|
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
|
|
|
|
|
|
2022-03-26 20:02:35 +01:00
|
|
|
|
/**
|
2023-10-07 14:14:34 +02:00
|
|
|
|
* The device machine id for the iPhone 15, used to get 60fps with the {@code iOS} client.
|
2022-03-26 20:02:35 +01:00
|
|
|
|
*
|
|
|
|
|
* <p>
|
|
|
|
|
* See <a href="https://gist.github.com/adamawolf/3048717">this GitHub Gist</a> for more
|
|
|
|
|
* information.
|
|
|
|
|
* </p>
|
|
|
|
|
*/
|
2023-10-07 14:14:34 +02:00
|
|
|
|
private static final String IOS_DEVICE_MODEL = "iPhone15,4";
|
2022-03-26 20:02:35 +01:00
|
|
|
|
|
2024-04-20 11:43:54 +02:00
|
|
|
|
/**
|
|
|
|
|
* Spoofing an iPhone 15 running iOS 17.4.1 with the hardcoded version of the iOS app. To be
|
|
|
|
|
* used for the {@code "osVersion"} field in JSON POST requests.
|
|
|
|
|
* <p>
|
|
|
|
|
* The value of this field seems to use the following structure:
|
|
|
|
|
* "iOS major version.minor version.patch version.build version", where
|
|
|
|
|
* "patch version" is equal to 0 if it isn't set
|
|
|
|
|
* The build version corresponding to the iOS version used can be found on
|
|
|
|
|
* <a href="https://theapplewiki.com/wiki/Firmware/iPhone/17.x#iPhone_15">
|
|
|
|
|
* https://theapplewiki.com/wiki/Firmware/iPhone/17.x#iPhone_15</a>
|
|
|
|
|
* </p>
|
|
|
|
|
*
|
|
|
|
|
* @see #IOS_USER_AGENT_VERSION
|
|
|
|
|
*/
|
|
|
|
|
private static final String IOS_OS_VERSION = "17.4.1.21E237";
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Spoofing an iPhone 15 running iOS 17.4.1 with the hardcoded version of the iOS app. To be
|
|
|
|
|
* used in the user agent for requests.
|
|
|
|
|
*
|
|
|
|
|
* @see #IOS_OS_VERSION
|
|
|
|
|
*/
|
|
|
|
|
private static final String IOS_USER_AGENT_VERSION = "17_4_1";
|
|
|
|
|
|
2023-08-04 00:00:02 +02:00
|
|
|
|
private static Random numberGenerator = new Random();
|
2021-04-08 16:36:55 +02:00
|
|
|
|
|
2021-04-25 18:54:26 +02:00
|
|
|
|
private static final String FEED_BASE_CHANNEL_ID =
|
|
|
|
|
"https://www.youtube.com/feeds/videos.xml?channel_id=";
|
2019-12-16 08:35:44 +01:00
|
|
|
|
private static final String FEED_BASE_USER = "https://www.youtube.com/feeds/videos.xml?user=";
|
2022-03-06 20:10:11 +01:00
|
|
|
|
private static final Pattern C_WEB_PATTERN = Pattern.compile("&c=WEB");
|
|
|
|
|
private static final Pattern C_TVHTML5_SIMPLY_EMBEDDED_PLAYER_PATTERN =
|
|
|
|
|
Pattern.compile("&c=TVHTML5_SIMPLY_EMBEDDED_PLAYER");
|
|
|
|
|
private static final Pattern C_ANDROID_PATTERN = Pattern.compile("&c=ANDROID");
|
|
|
|
|
private static final Pattern C_IOS_PATTERN = Pattern.compile("&c=IOS");
|
2019-12-16 08:35:44 +01:00
|
|
|
|
|
2022-08-09 05:44:34 +02:00
|
|
|
|
private static final Set<String> GOOGLE_URLS = Set.of("google.", "m.google.", "www.google.");
|
|
|
|
|
private static final Set<String> INVIDIOUS_URLS = Set.of("invidio.us", "dev.invidio.us",
|
|
|
|
|
"www.invidio.us", "redirect.invidious.io", "invidious.snopyta.org", "yewtu.be",
|
|
|
|
|
"tube.connect.cafe", "tubus.eduvid.org", "invidious.kavin.rocks", "invidious.site",
|
|
|
|
|
"invidious-us.kavin.rocks", "piped.kavin.rocks", "vid.mint.lgbt", "invidiou.site",
|
|
|
|
|
"invidious.fdn.fr", "invidious.048596.xyz", "invidious.zee.li", "vid.puffyan.us",
|
|
|
|
|
"ytprivate.com", "invidious.namazso.eu", "invidious.silkky.cloud", "ytb.trom.tf",
|
|
|
|
|
"invidious.exonip.de", "inv.riverside.rocks", "invidious.blamefran.net", "y.com.cm",
|
|
|
|
|
"invidious.moomoo.me", "yt.cyberhost.uk");
|
|
|
|
|
private static final Set<String> YOUTUBE_URLS = Set.of("youtube.com", "www.youtube.com",
|
|
|
|
|
"m.youtube.com", "music.youtube.com");
|
|
|
|
|
|
2022-07-30 16:05:52 +02:00
|
|
|
|
private static boolean consentAccepted = false;
|
|
|
|
|
|
2023-12-07 20:57:44 +01:00
|
|
|
|
public static boolean isGoogleURL(final String url) {
|
2022-03-18 15:09:06 +01:00
|
|
|
|
final String cachedUrl = extractCachedUrlIfNeeded(url);
|
2020-12-20 19:54:12 +01:00
|
|
|
|
try {
|
2022-03-18 15:09:06 +01:00
|
|
|
|
final URL u = new URL(cachedUrl);
|
2022-08-09 05:44:34 +02:00
|
|
|
|
return GOOGLE_URLS.stream().anyMatch(item -> u.getHost().startsWith(item));
|
2021-04-08 16:17:59 +02:00
|
|
|
|
} catch (final MalformedURLException e) {
|
2020-12-20 19:54:12 +01:00
|
|
|
|
return false;
|
2019-10-29 06:00:29 +01:00
|
|
|
|
}
|
2020-12-20 19:54:12 +01:00
|
|
|
|
}
|
2019-10-29 06:00:29 +01:00
|
|
|
|
|
2021-06-11 13:34:23 +02:00
|
|
|
|
public static boolean isYoutubeURL(@Nonnull final URL url) {
|
2022-08-09 05:44:34 +02:00
|
|
|
|
return YOUTUBE_URLS.contains(url.getHost().toLowerCase(Locale.ROOT));
|
2019-01-13 12:52:07 +01:00
|
|
|
|
}
|
|
|
|
|
|
2021-06-11 13:34:23 +02:00
|
|
|
|
public static boolean isYoutubeServiceURL(@Nonnull final URL url) {
|
2020-12-15 17:21:21 +01:00
|
|
|
|
final String host = url.getHost();
|
2021-04-25 18:54:26 +02:00
|
|
|
|
return host.equalsIgnoreCase("www.youtube-nocookie.com")
|
|
|
|
|
|| host.equalsIgnoreCase("youtu.be");
|
2019-01-27 01:28:51 +01:00
|
|
|
|
}
|
2019-01-13 12:52:07 +01:00
|
|
|
|
|
2021-06-11 13:34:23 +02:00
|
|
|
|
public static boolean isHooktubeURL(@Nonnull final URL url) {
|
2020-12-15 17:21:21 +01:00
|
|
|
|
final String host = url.getHost();
|
2019-01-27 01:28:51 +01:00
|
|
|
|
return host.equalsIgnoreCase("hooktube.com");
|
|
|
|
|
}
|
|
|
|
|
|
2022-08-09 05:44:34 +02:00
|
|
|
|
public static boolean isInvidiousURL(@Nonnull final URL url) {
|
|
|
|
|
return INVIDIOUS_URLS.contains(url.getHost().toLowerCase(Locale.ROOT));
|
2019-01-13 12:52:07 +01:00
|
|
|
|
}
|
|
|
|
|
|
2021-10-22 21:48:18 +02:00
|
|
|
|
public static boolean isY2ubeURL(@Nonnull final URL url) {
|
|
|
|
|
return url.getHost().equalsIgnoreCase("y2u.be");
|
|
|
|
|
}
|
|
|
|
|
|
2020-06-15 11:27:44 +02:00
|
|
|
|
/**
|
2020-07-02 21:31:05 +02:00
|
|
|
|
* Parses the duration string of the video expecting ":" or "." as separators
|
2021-02-07 22:12:22 +01:00
|
|
|
|
*
|
2020-06-15 11:27:44 +02:00
|
|
|
|
* @return the duration in seconds
|
2020-07-02 21:31:05 +02:00
|
|
|
|
* @throws ParsingException when more than 3 separators are found
|
2020-06-15 11:27:44 +02:00
|
|
|
|
*/
|
2021-06-11 13:34:23 +02:00
|
|
|
|
public static int parseDurationString(@Nonnull final String input)
|
2017-03-01 18:47:52 +01:00
|
|
|
|
throws ParsingException, NumberFormatException {
|
2018-09-09 11:53:10 +02:00
|
|
|
|
// If time separator : is not detected, try . instead
|
2018-09-09 14:01:39 +02:00
|
|
|
|
final String[] splitInput = input.contains(":")
|
|
|
|
|
? input.split(":")
|
|
|
|
|
: input.split("\\.");
|
|
|
|
|
|
2022-11-09 09:41:29 +01:00
|
|
|
|
final int[] units = {24, 60, 60, 1};
|
|
|
|
|
final int offset = units.length - splitInput.length;
|
|
|
|
|
if (offset < 0) {
|
|
|
|
|
throw new ParsingException("Error duration string with unknown format: " + input);
|
2017-03-01 18:47:52 +01:00
|
|
|
|
}
|
2022-11-09 09:41:29 +01:00
|
|
|
|
int duration = 0;
|
|
|
|
|
for (int i = 0; i < splitInput.length; i++) {
|
|
|
|
|
duration = units[i + offset] * (duration + convertDurationToInt(splitInput[i]));
|
|
|
|
|
}
|
|
|
|
|
return duration;
|
2022-03-17 14:50:12 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Tries to convert a duration string to an integer without throwing an exception.
|
|
|
|
|
* <br/>
|
|
|
|
|
* Helper method for {@link #parseDurationString(String)}.
|
|
|
|
|
* <br/>
|
|
|
|
|
* Note: This method is also used as a workaround for NewPipe#8034 (YT shorts no longer
|
|
|
|
|
* display any duration in channels).
|
|
|
|
|
*
|
|
|
|
|
* @param input The string to process
|
|
|
|
|
* @return The converted integer or 0 if the conversion failed.
|
|
|
|
|
*/
|
|
|
|
|
private static int convertDurationToInt(final String input) {
|
|
|
|
|
if (input == null || input.isEmpty()) {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
final String clearedInput = Utils.removeNonDigitCharacters(input);
|
|
|
|
|
try {
|
|
|
|
|
return Integer.parseInt(clearedInput);
|
|
|
|
|
} catch (final NumberFormatException ex) {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
2017-03-01 18:47:52 +01:00
|
|
|
|
}
|
2019-04-28 22:03:16 +02:00
|
|
|
|
|
2021-06-11 13:34:23 +02:00
|
|
|
|
@Nonnull
|
|
|
|
|
public static String getFeedUrlFrom(@Nonnull final String channelIdOrUser) {
|
2019-12-16 08:35:44 +01:00
|
|
|
|
if (channelIdOrUser.startsWith("user/")) {
|
|
|
|
|
return FEED_BASE_USER + channelIdOrUser.replace("user/", "");
|
|
|
|
|
} else if (channelIdOrUser.startsWith("channel/")) {
|
|
|
|
|
return FEED_BASE_CHANNEL_ID + channelIdOrUser.replace("channel/", "");
|
|
|
|
|
} else {
|
|
|
|
|
return FEED_BASE_CHANNEL_ID + channelIdOrUser;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-25 18:54:26 +02:00
|
|
|
|
public static OffsetDateTime parseDateFrom(final String textualUploadDate)
|
|
|
|
|
throws ParsingException {
|
2019-04-28 22:03:16 +02:00
|
|
|
|
try {
|
2020-10-18 05:48:14 +02:00
|
|
|
|
return OffsetDateTime.parse(textualUploadDate);
|
2021-04-08 16:17:59 +02:00
|
|
|
|
} catch (final DateTimeParseException e) {
|
2020-11-03 11:54:46 +01:00
|
|
|
|
try {
|
|
|
|
|
return LocalDate.parse(textualUploadDate).atStartOfDay().atOffset(ZoneOffset.UTC);
|
2021-04-08 16:17:59 +02:00
|
|
|
|
} catch (final DateTimeParseException e1) {
|
2021-04-25 18:54:26 +02:00
|
|
|
|
throw new ParsingException("Could not parse date: \"" + textualUploadDate + "\"",
|
|
|
|
|
e1);
|
2020-11-03 11:54:46 +01:00
|
|
|
|
}
|
2019-04-28 22:03:16 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
2020-02-22 23:51:02 +01:00
|
|
|
|
|
2020-02-02 18:15:47 +01:00
|
|
|
|
/**
|
2020-04-16 19:28:27 +02:00
|
|
|
|
* Checks if the given playlist id is a YouTube Mix (auto-generated playlist)
|
|
|
|
|
* Ids from a YouTube Mix start with "RD"
|
2021-02-07 22:12:22 +01:00
|
|
|
|
*
|
2021-06-24 18:39:16 +02:00
|
|
|
|
* @param playlistId the playlist id
|
2020-04-16 19:28:27 +02:00
|
|
|
|
* @return Whether given id belongs to a YouTube Mix
|
2020-02-02 18:15:47 +01:00
|
|
|
|
*/
|
2021-06-11 13:34:23 +02:00
|
|
|
|
public static boolean isYoutubeMixId(@Nonnull final String playlistId) {
|
2023-01-15 23:28:59 +01:00
|
|
|
|
return playlistId.startsWith("RD");
|
2020-03-21 18:48:12 +01:00
|
|
|
|
}
|
|
|
|
|
|
2022-02-02 20:23:11 +01:00
|
|
|
|
/**
|
|
|
|
|
* Checks if the given playlist id is a YouTube My Mix (auto-generated playlist)
|
|
|
|
|
* Ids from a YouTube My Mix start with "RDMM"
|
|
|
|
|
*
|
|
|
|
|
* @param playlistId the playlist id
|
|
|
|
|
* @return Whether given id belongs to a YouTube My Mix
|
|
|
|
|
*/
|
|
|
|
|
public static boolean isYoutubeMyMixId(@Nonnull final String playlistId) {
|
|
|
|
|
return playlistId.startsWith("RDMM");
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-21 18:48:12 +01:00
|
|
|
|
/**
|
2020-04-16 19:28:27 +02:00
|
|
|
|
* Checks if the given playlist id is a YouTube Music Mix (auto-generated playlist)
|
2020-12-23 21:07:30 +01:00
|
|
|
|
* Ids from a YouTube Music Mix start with "RDAMVM" or "RDCLAK"
|
2021-02-07 22:12:22 +01:00
|
|
|
|
*
|
2021-04-12 18:24:32 +02:00
|
|
|
|
* @param playlistId the playlist id
|
2020-04-16 19:28:27 +02:00
|
|
|
|
* @return Whether given id belongs to a YouTube Music Mix
|
2020-03-21 18:48:12 +01:00
|
|
|
|
*/
|
2021-06-11 13:34:23 +02:00
|
|
|
|
public static boolean isYoutubeMusicMixId(@Nonnull final String playlistId) {
|
2020-12-23 21:07:30 +01:00
|
|
|
|
return playlistId.startsWith("RDAMVM") || playlistId.startsWith("RDCLAK");
|
2020-02-02 14:19:48 +01:00
|
|
|
|
}
|
2021-02-07 22:12:22 +01:00
|
|
|
|
|
2020-09-26 11:22:24 +02:00
|
|
|
|
/**
|
|
|
|
|
* Checks if the given playlist id is a YouTube Channel Mix (auto-generated playlist)
|
|
|
|
|
* Ids from a YouTube channel Mix start with "RDCM"
|
2021-02-07 22:12:22 +01:00
|
|
|
|
*
|
2020-09-26 11:22:24 +02:00
|
|
|
|
* @return Whether given id belongs to a YouTube Channel Mix
|
|
|
|
|
*/
|
2021-06-11 13:34:23 +02:00
|
|
|
|
public static boolean isYoutubeChannelMixId(@Nonnull final String playlistId) {
|
2020-09-26 11:22:24 +02:00
|
|
|
|
return playlistId.startsWith("RDCM");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2022-02-17 17:19:54 +01:00
|
|
|
|
* Checks if the given playlist id is a YouTube Genre Mix (auto-generated playlist)
|
|
|
|
|
* Ids from a YouTube Genre Mix start with "RDGMEM"
|
|
|
|
|
*
|
|
|
|
|
* @return Whether given id belongs to a YouTube Genre Mix
|
|
|
|
|
*/
|
|
|
|
|
public static boolean isYoutubeGenreMixId(@Nonnull final String playlistId) {
|
|
|
|
|
return playlistId.startsWith("RDGMEM");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @param playlistId the playlist id to parse
|
|
|
|
|
* @return the {@link PlaylistInfo.PlaylistType} extracted from the playlistId (mix playlist
|
|
|
|
|
* types included)
|
|
|
|
|
* @throws ParsingException if the playlistId is null or empty, if the playlistId is not a mix,
|
|
|
|
|
* if it is a mix but it's not based on a specific stream (this is the
|
|
|
|
|
* case for channel or genre mixes)
|
2020-09-26 11:22:24 +02:00
|
|
|
|
*/
|
2021-06-11 13:34:23 +02:00
|
|
|
|
@Nonnull
|
2022-02-17 17:19:54 +01:00
|
|
|
|
public static String extractVideoIdFromMixId(final String playlistId)
|
2021-06-11 13:34:23 +02:00
|
|
|
|
throws ParsingException {
|
2022-02-17 17:19:54 +01:00
|
|
|
|
if (isNullOrEmpty(playlistId)) {
|
|
|
|
|
throw new ParsingException("Video id could not be determined from empty playlist id");
|
|
|
|
|
|
|
|
|
|
} else if (isYoutubeMyMixId(playlistId)) {
|
2020-09-26 11:22:24 +02:00
|
|
|
|
return playlistId.substring(4);
|
|
|
|
|
|
2022-02-17 15:25:06 +01:00
|
|
|
|
} else if (isYoutubeMusicMixId(playlistId)) {
|
2020-09-26 11:22:24 +02:00
|
|
|
|
return playlistId.substring(6);
|
|
|
|
|
|
2022-02-17 15:25:06 +01:00
|
|
|
|
} else if (isYoutubeChannelMixId(playlistId)) {
|
2022-02-17 17:19:54 +01:00
|
|
|
|
// Channel mixes are of the form RMCM{channelId}, so videoId can't be determined
|
|
|
|
|
throw new ParsingException("Video id could not be determined from channel mix id: "
|
|
|
|
|
+ playlistId);
|
|
|
|
|
|
|
|
|
|
} else if (isYoutubeGenreMixId(playlistId)) {
|
|
|
|
|
// Genre mixes are of the form RDGMEM{garbage}, so videoId can't be determined
|
|
|
|
|
throw new ParsingException("Video id could not be determined from genre mix id: "
|
2021-04-25 18:54:26 +02:00
|
|
|
|
+ playlistId);
|
2020-09-26 11:22:24 +02:00
|
|
|
|
|
2022-02-17 15:25:06 +01:00
|
|
|
|
} else if (isYoutubeMixId(playlistId)) { // normal mix
|
2022-02-17 17:19:54 +01:00
|
|
|
|
if (playlistId.length() != 13) {
|
|
|
|
|
// Stream YouTube mixes are of the form RD{videoId}, but if videoId is not exactly
|
|
|
|
|
// 11 characters then it can't be a video id, hence we are dealing with a different
|
|
|
|
|
// type of mix (e.g. genre mixes handled above, of the form RDGMEM{garbage})
|
|
|
|
|
throw new ParsingException("Video id could not be determined from mix id: "
|
|
|
|
|
+ playlistId);
|
|
|
|
|
}
|
2020-09-26 11:22:24 +02:00
|
|
|
|
return playlistId.substring(2);
|
|
|
|
|
|
2020-12-25 15:00:31 +01:00
|
|
|
|
} else { // not a mix
|
2022-02-17 17:19:54 +01:00
|
|
|
|
throw new ParsingException("Video id could not be determined from playlist id: "
|
2021-04-25 18:54:26 +02:00
|
|
|
|
+ playlistId);
|
2020-09-26 11:22:24 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
2020-02-02 14:19:48 +01:00
|
|
|
|
|
2022-02-17 17:39:49 +01:00
|
|
|
|
/**
|
|
|
|
|
* @param playlistId the playlist id to parse
|
|
|
|
|
* @return the {@link PlaylistInfo.PlaylistType} extracted from the playlistId (mix playlist
|
|
|
|
|
* types included)
|
|
|
|
|
* @throws ParsingException if the playlistId is null or empty
|
|
|
|
|
*/
|
|
|
|
|
@Nonnull
|
|
|
|
|
public static PlaylistInfo.PlaylistType extractPlaylistTypeFromPlaylistId(
|
|
|
|
|
final String playlistId) throws ParsingException {
|
|
|
|
|
if (isNullOrEmpty(playlistId)) {
|
|
|
|
|
throw new ParsingException("Could not extract playlist type from empty playlist id");
|
|
|
|
|
} else if (isYoutubeMusicMixId(playlistId)) {
|
|
|
|
|
return PlaylistInfo.PlaylistType.MIX_MUSIC;
|
|
|
|
|
} else if (isYoutubeChannelMixId(playlistId)) {
|
|
|
|
|
return PlaylistInfo.PlaylistType.MIX_CHANNEL;
|
|
|
|
|
} else if (isYoutubeGenreMixId(playlistId)) {
|
|
|
|
|
return PlaylistInfo.PlaylistType.MIX_GENRE;
|
|
|
|
|
} else if (isYoutubeMixId(playlistId)) { // normal mix
|
|
|
|
|
// Either a normal mix based on a stream, or a "my mix" (still based on a stream).
|
|
|
|
|
// NOTE: if YouTube introduces even more types of mixes that still start with RD,
|
|
|
|
|
// they will default to this, even though they might not be based on a stream.
|
|
|
|
|
return PlaylistInfo.PlaylistType.MIX_STREAM;
|
|
|
|
|
} else {
|
|
|
|
|
// not a known type of mix: just consider it a normal playlist
|
|
|
|
|
return PlaylistInfo.PlaylistType.NORMAL;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @param playlistUrl the playlist url to parse
|
|
|
|
|
* @return the {@link PlaylistInfo.PlaylistType} extracted from the playlistUrl's list param
|
|
|
|
|
* (mix playlist types included)
|
|
|
|
|
* @throws ParsingException if the playlistUrl is malformed, if has no list param or if the list
|
|
|
|
|
* param is empty
|
|
|
|
|
*/
|
|
|
|
|
public static PlaylistInfo.PlaylistType extractPlaylistTypeFromPlaylistUrl(
|
|
|
|
|
final String playlistUrl) throws ParsingException {
|
|
|
|
|
try {
|
|
|
|
|
return extractPlaylistTypeFromPlaylistId(
|
|
|
|
|
Utils.getQueryValue(Utils.stringToURL(playlistUrl), "list"));
|
|
|
|
|
} catch (final MalformedURLException e) {
|
|
|
|
|
throw new ParsingException("Could not extract playlist type from malformed url", e);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-02-05 22:05:07 +01:00
|
|
|
|
private static JsonObject getInitialData(final String html) throws ParsingException {
|
2020-02-22 23:51:02 +01:00
|
|
|
|
try {
|
2022-02-05 22:05:07 +01:00
|
|
|
|
return JsonParser.object().from(getStringResultFromRegexArray(html,
|
|
|
|
|
INITIAL_DATA_REGEXES, 1));
|
2021-04-08 16:17:59 +02:00
|
|
|
|
} catch (final JsonParserException | Parser.RegexException e) {
|
2020-02-22 23:51:02 +01:00
|
|
|
|
throw new ParsingException("Could not get ytInitialData", e);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-04 23:33:41 +02:00
|
|
|
|
public static boolean isHardcodedClientVersionValid()
|
2021-06-24 18:39:16 +02:00
|
|
|
|
throws IOException, ExtractionException {
|
2024-04-04 23:33:41 +02:00
|
|
|
|
if (hardcodedClientVersionValid.isPresent()) {
|
|
|
|
|
return hardcodedClientVersionValid.get();
|
2021-04-25 18:54:26 +02:00
|
|
|
|
}
|
2021-04-12 18:24:32 +02:00
|
|
|
|
// @formatter:off
|
|
|
|
|
final byte[] body = JsonWriter.string()
|
|
|
|
|
.object()
|
|
|
|
|
.object("context")
|
|
|
|
|
.object("client")
|
2021-06-24 18:39:16 +02:00
|
|
|
|
.value("hl", "en-GB")
|
2021-04-12 18:24:32 +02:00
|
|
|
|
.value("gl", "GB")
|
2021-06-24 18:39:16 +02:00
|
|
|
|
.value("clientName", "WEB")
|
2021-04-12 18:24:32 +02:00
|
|
|
|
.value("clientVersion", HARDCODED_CLIENT_VERSION)
|
2023-10-07 15:00:40 +02:00
|
|
|
|
.value("platform", "DESKTOP")
|
2023-10-07 15:04:36 +02:00
|
|
|
|
.value("utcOffsetMinutes", 0)
|
2023-10-07 15:00:40 +02:00
|
|
|
|
.end()
|
|
|
|
|
.object("request")
|
|
|
|
|
.array("internalExperimentFlags")
|
|
|
|
|
.end()
|
|
|
|
|
.value("useSsl", true)
|
|
|
|
|
.end()
|
|
|
|
|
.object("user")
|
|
|
|
|
// TODO: provide a way to enable restricted mode with:
|
|
|
|
|
// .value("enableSafetyMode", boolean)
|
|
|
|
|
.value("lockedSafetyMode", false)
|
2021-04-12 18:24:32 +02:00
|
|
|
|
.end()
|
2021-06-24 18:39:16 +02:00
|
|
|
|
.end()
|
|
|
|
|
.value("fetchLiveState", true)
|
2022-07-28 04:19:21 +02:00
|
|
|
|
.end().done().getBytes(StandardCharsets.UTF_8);
|
2021-04-12 18:24:32 +02:00
|
|
|
|
// @formatter:on
|
2020-02-29 22:42:43 +01:00
|
|
|
|
|
2024-04-20 11:43:54 +02:00
|
|
|
|
final var headers = getClientHeaders(WEB_CLIENT_ID, HARDCODED_CLIENT_VERSION);
|
2020-02-26 15:22:59 +01:00
|
|
|
|
|
2021-04-12 18:24:32 +02:00
|
|
|
|
// This endpoint is fetched by the YouTube website to get the items of its main menu and is
|
|
|
|
|
// pretty lightweight (around 30kB)
|
2022-07-15 20:56:37 +02:00
|
|
|
|
final Response response = getDownloader().postWithContentTypeJson(
|
2024-04-04 23:33:41 +02:00
|
|
|
|
YOUTUBEI_V1_URL + "guide?" + DISABLE_PRETTY_PRINT_PARAMETER,
|
2022-07-15 20:56:37 +02:00
|
|
|
|
headers, body);
|
2021-04-12 18:24:32 +02:00
|
|
|
|
final String responseBody = response.responseBody();
|
|
|
|
|
final int responseCode = response.responseCode();
|
|
|
|
|
|
2024-04-04 23:33:41 +02:00
|
|
|
|
hardcodedClientVersionValid = Optional.of(responseBody.length() > 5000
|
2021-07-09 18:23:46 +02:00
|
|
|
|
&& responseCode == 200); // Ensure to have a valid response
|
2024-04-04 23:33:41 +02:00
|
|
|
|
return hardcodedClientVersionValid.get();
|
2020-02-28 16:35:24 +01:00
|
|
|
|
}
|
|
|
|
|
|
2022-01-09 22:49:37 +01:00
|
|
|
|
|
2024-04-04 23:33:41 +02:00
|
|
|
|
private static void extractClientVersionFromSwJs()
|
2022-01-09 22:49:37 +01:00
|
|
|
|
throws IOException, ExtractionException {
|
2024-04-04 23:33:41 +02:00
|
|
|
|
if (clientVersionExtracted) {
|
2022-03-18 15:09:06 +01:00
|
|
|
|
return;
|
|
|
|
|
}
|
2022-01-09 22:49:37 +01:00
|
|
|
|
final String url = "https://www.youtube.com/sw.js";
|
2022-11-12 05:01:05 +01:00
|
|
|
|
final var headers = getOriginReferrerHeaders("https://www.youtube.com");
|
2022-01-09 22:49:37 +01:00
|
|
|
|
final String response = getDownloader().get(url, headers).responseBody();
|
|
|
|
|
try {
|
|
|
|
|
clientVersion = getStringResultFromRegexArray(response,
|
|
|
|
|
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
|
|
|
|
|
} catch (final Parser.RegexException e) {
|
2022-03-27 20:51:30 +02:00
|
|
|
|
throw new ParsingException("Could not extract YouTube WEB InnerTube client version "
|
2024-04-04 23:33:41 +02:00
|
|
|
|
+ "from sw.js", e);
|
2022-01-09 22:49:37 +01:00
|
|
|
|
}
|
2024-04-04 23:33:41 +02:00
|
|
|
|
clientVersionExtracted = true;
|
2022-01-09 22:49:37 +01:00
|
|
|
|
}
|
2022-03-18 15:09:06 +01:00
|
|
|
|
|
2024-04-04 23:33:41 +02:00
|
|
|
|
private static void extractClientVersionFromHtmlSearchResultsPage()
|
2022-01-09 22:49:37 +01:00
|
|
|
|
throws IOException, ExtractionException {
|
2024-04-04 23:33:41 +02:00
|
|
|
|
// Don't extract the InnerTube client version if it has been already extracted
|
|
|
|
|
if (clientVersionExtracted) {
|
2022-01-09 22:49:37 +01:00
|
|
|
|
return;
|
|
|
|
|
}
|
2022-08-08 19:36:19 +02:00
|
|
|
|
|
2021-04-12 18:24:32 +02:00
|
|
|
|
// Don't provide a search term in order to have a smaller response
|
2021-06-26 20:04:55 +02:00
|
|
|
|
final String url = "https://www.youtube.com/results?search_query=&ucbcb=1";
|
2022-07-27 03:26:02 +02:00
|
|
|
|
final String html = getDownloader().get(url, getCookieHeader()).responseBody();
|
2020-07-26 12:00:56 +02:00
|
|
|
|
final JsonObject initialData = getInitialData(html);
|
2021-04-12 18:24:32 +02:00
|
|
|
|
final JsonArray serviceTrackingParams = initialData.getObject("responseContext")
|
|
|
|
|
.getArray("serviceTrackingParams");
|
2020-02-29 22:42:43 +01:00
|
|
|
|
|
2021-04-12 18:24:32 +02:00
|
|
|
|
// Try to get version from initial data first
|
2022-08-08 19:36:19 +02:00
|
|
|
|
final Stream<JsonObject> serviceTrackingParamsStream = serviceTrackingParams.stream()
|
|
|
|
|
.filter(JsonObject.class::isInstance)
|
|
|
|
|
.map(JsonObject.class::cast);
|
|
|
|
|
|
|
|
|
|
clientVersion = getClientVersionFromServiceTrackingParam(
|
|
|
|
|
serviceTrackingParamsStream, "CSI", "cver");
|
|
|
|
|
|
|
|
|
|
if (clientVersion == null) {
|
|
|
|
|
try {
|
|
|
|
|
clientVersion = getStringResultFromRegexArray(html,
|
|
|
|
|
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
|
|
|
|
|
} catch (final Parser.RegexException ignored) {
|
2020-02-24 19:03:54 +01:00
|
|
|
|
}
|
2020-02-29 22:42:43 +01:00
|
|
|
|
}
|
2020-02-24 19:03:54 +01:00
|
|
|
|
|
2022-08-08 19:36:19 +02:00
|
|
|
|
// Fallback to get a shortened client version which does not contain the last two
|
|
|
|
|
// digits
|
|
|
|
|
if (isNullOrEmpty(clientVersion)) {
|
|
|
|
|
clientVersion = getClientVersionFromServiceTrackingParam(
|
|
|
|
|
serviceTrackingParamsStream, "ECATCHER", "client.version");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (clientVersion == null) {
|
|
|
|
|
throw new ParsingException(
|
|
|
|
|
// CHECKSTYLE:OFF
|
|
|
|
|
"Could not extract YouTube WEB InnerTube client version from HTML search results page");
|
|
|
|
|
// CHECKSTYLE:ON
|
2020-02-29 22:42:43 +01:00
|
|
|
|
}
|
2022-08-08 19:36:19 +02:00
|
|
|
|
|
2024-04-04 23:33:41 +02:00
|
|
|
|
clientVersionExtracted = true;
|
2020-07-26 12:00:56 +02:00
|
|
|
|
}
|
|
|
|
|
|
2022-08-08 19:36:19 +02:00
|
|
|
|
@Nullable
|
|
|
|
|
private static String getClientVersionFromServiceTrackingParam(
|
|
|
|
|
@Nonnull final Stream<JsonObject> serviceTrackingParamsStream,
|
|
|
|
|
@Nonnull final String serviceName,
|
|
|
|
|
@Nonnull final String clientVersionKey) {
|
|
|
|
|
return serviceTrackingParamsStream.filter(serviceTrackingParam ->
|
|
|
|
|
serviceTrackingParam.getString("service", "")
|
|
|
|
|
.equals(serviceName))
|
|
|
|
|
.flatMap(serviceTrackingParam -> serviceTrackingParam.getArray("params")
|
|
|
|
|
.stream())
|
|
|
|
|
.filter(JsonObject.class::isInstance)
|
|
|
|
|
.map(JsonObject.class::cast)
|
|
|
|
|
.filter(param -> param.getString("key", "")
|
|
|
|
|
.equals(clientVersionKey))
|
|
|
|
|
.map(param -> param.getString("value"))
|
|
|
|
|
.filter(paramValue -> !isNullOrEmpty(paramValue))
|
|
|
|
|
.findFirst()
|
|
|
|
|
.orElse(null);
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-26 12:00:56 +02:00
|
|
|
|
/**
|
2021-12-11 16:52:17 +01:00
|
|
|
|
* Get the client version used by YouTube website on InnerTube requests.
|
2020-07-26 12:00:56 +02:00
|
|
|
|
*/
|
|
|
|
|
public static String getClientVersion() throws IOException, ExtractionException {
|
2022-03-18 15:09:06 +01:00
|
|
|
|
if (!isNullOrEmpty(clientVersion)) {
|
|
|
|
|
return clientVersion;
|
|
|
|
|
}
|
2020-07-26 12:00:56 +02:00
|
|
|
|
|
2022-08-08 19:36:19 +02:00
|
|
|
|
// Always extract the latest client version, by trying first to extract it from the
|
|
|
|
|
// JavaScript service worker, then from HTML search results page as a fallback, to prevent
|
[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more
Use the TV embedded client technique to get streams of embeddable age-restricted videos.
This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.
The outdated code for these contents has been removed.
Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.
Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.
2022-04-02 19:06:36 +02:00
|
|
|
|
// fingerprinting based on the client version used
|
2022-01-09 22:49:37 +01:00
|
|
|
|
try {
|
2024-04-04 23:33:41 +02:00
|
|
|
|
extractClientVersionFromSwJs();
|
2022-01-09 22:49:37 +01:00
|
|
|
|
} catch (final Exception e) {
|
2024-04-04 23:33:41 +02:00
|
|
|
|
extractClientVersionFromHtmlSearchResultsPage();
|
2022-01-09 22:49:37 +01:00
|
|
|
|
}
|
2021-12-11 16:52:17 +01:00
|
|
|
|
|
2024-04-04 23:33:41 +02:00
|
|
|
|
if (clientVersionExtracted) {
|
2021-12-11 16:52:17 +01:00
|
|
|
|
return clientVersion;
|
|
|
|
|
}
|
2022-02-07 21:23:38 +01:00
|
|
|
|
|
2022-08-08 19:36:19 +02:00
|
|
|
|
// Fallback to the hardcoded one if it is valid
|
2024-04-04 23:33:41 +02:00
|
|
|
|
if (isHardcodedClientVersionValid()) {
|
2022-02-07 21:23:38 +01:00
|
|
|
|
clientVersion = HARDCODED_CLIENT_VERSION;
|
|
|
|
|
return clientVersion;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-11 16:52:17 +01:00
|
|
|
|
throw new ExtractionException("Could not get YouTube WEB client version");
|
2020-07-26 12:00:56 +02:00
|
|
|
|
}
|
|
|
|
|
|
2021-01-14 20:01:52 +01:00
|
|
|
|
/**
|
2021-02-17 19:21:39 +01:00
|
|
|
|
* <p>
|
2021-12-11 16:52:17 +01:00
|
|
|
|
* <b>Only used in tests.</b>
|
2021-02-17 19:21:39 +01:00
|
|
|
|
* </p>
|
2021-01-14 20:01:52 +01:00
|
|
|
|
*
|
2021-02-17 19:21:39 +01:00
|
|
|
|
* <p>
|
2021-01-14 20:01:52 +01:00
|
|
|
|
* Quick-and-dirty solution to reset global state in between test classes.
|
2021-02-17 19:21:39 +01:00
|
|
|
|
* </p>
|
|
|
|
|
* <p>
|
|
|
|
|
* This is needed for the mocks because in order to reach that state a network request has to
|
|
|
|
|
* be made. If the global state is not reset and the RecordingDownloader is used,
|
|
|
|
|
* then only the first test class has that request recorded. Meaning running the other
|
|
|
|
|
* tests with mocks will fail, because the mock is missing.
|
|
|
|
|
* </p>
|
2021-01-14 20:01:52 +01:00
|
|
|
|
*/
|
2024-04-04 23:33:41 +02:00
|
|
|
|
public static void resetClientVersion() {
|
2021-01-10 20:24:50 +01:00
|
|
|
|
clientVersion = null;
|
2024-04-04 23:33:41 +02:00
|
|
|
|
clientVersionExtracted = false;
|
2021-01-10 20:24:50 +01:00
|
|
|
|
}
|
|
|
|
|
|
2021-04-08 16:36:55 +02:00
|
|
|
|
/**
|
|
|
|
|
* <p>
|
2021-12-11 16:52:17 +01:00
|
|
|
|
* <b>Only used in tests.</b>
|
2021-04-08 16:36:55 +02:00
|
|
|
|
* </p>
|
|
|
|
|
*/
|
2021-06-26 20:04:55 +02:00
|
|
|
|
public static void setNumberGenerator(final Random random) {
|
2021-04-08 16:36:55 +02:00
|
|
|
|
numberGenerator = random;
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-04 23:33:41 +02:00
|
|
|
|
public static boolean isHardcodedYoutubeMusicClientVersionValid() throws IOException,
|
2021-04-25 18:54:26 +02:00
|
|
|
|
ReCaptchaException {
|
2021-05-09 16:14:37 +02:00
|
|
|
|
final String url =
|
2024-04-04 23:33:41 +02:00
|
|
|
|
"https://music.youtube.com/youtubei/v1/music/get_search_suggestions?"
|
|
|
|
|
+ DISABLE_PRETTY_PRINT_PARAMETER;
|
2020-03-20 11:05:19 +01:00
|
|
|
|
|
|
|
|
|
// @formatter:off
|
2022-03-18 15:09:06 +01:00
|
|
|
|
final byte[] json = JsonWriter.string()
|
2020-03-20 11:05:19 +01:00
|
|
|
|
.object()
|
|
|
|
|
.object("context")
|
|
|
|
|
.object("client")
|
|
|
|
|
.value("clientName", "WEB_REMIX")
|
2024-04-04 23:33:41 +02:00
|
|
|
|
.value("clientVersion", HARDCODED_YOUTUBE_MUSIC_CLIENT_VERSION)
|
2021-05-09 16:14:37 +02:00
|
|
|
|
.value("hl", "en-GB")
|
2020-03-20 11:05:19 +01:00
|
|
|
|
.value("gl", "GB")
|
2023-10-07 15:00:40 +02:00
|
|
|
|
.value("platform", "DESKTOP")
|
2023-10-07 15:04:36 +02:00
|
|
|
|
.value("utcOffsetMinutes", 0)
|
2020-03-20 11:05:19 +01:00
|
|
|
|
.end()
|
|
|
|
|
.object("request")
|
2023-10-07 15:00:40 +02:00
|
|
|
|
.array("internalExperimentFlags")
|
|
|
|
|
.end()
|
|
|
|
|
.value("useSsl", true)
|
2020-03-20 11:05:19 +01:00
|
|
|
|
.end()
|
|
|
|
|
.object("user")
|
2023-10-07 15:00:40 +02:00
|
|
|
|
// TODO: provide a way to enable restricted mode with:
|
|
|
|
|
// .value("enableSafetyMode", boolean)
|
|
|
|
|
.value("lockedSafetyMode", false)
|
2020-03-20 11:05:19 +01:00
|
|
|
|
.end()
|
|
|
|
|
.end()
|
2021-04-11 17:41:40 +02:00
|
|
|
|
.value("input", "")
|
2022-07-28 04:19:21 +02:00
|
|
|
|
.end().done().getBytes(StandardCharsets.UTF_8);
|
2020-03-20 11:05:19 +01:00
|
|
|
|
// @formatter:on
|
|
|
|
|
|
2022-11-12 05:01:05 +01:00
|
|
|
|
final var headers = new HashMap<>(getOriginReferrerHeaders(YOUTUBE_MUSIC_URL));
|
2024-04-20 11:43:54 +02:00
|
|
|
|
headers.putAll(getClientHeaders(YOUTUBE_MUSIC_CLIENT_ID,
|
|
|
|
|
HARDCODED_YOUTUBE_MUSIC_CLIENT_VERSION));
|
2020-03-20 11:05:19 +01:00
|
|
|
|
|
2022-07-15 20:56:37 +02:00
|
|
|
|
final Response response = getDownloader().postWithContentTypeJson(url, headers, json);
|
2021-04-25 18:54:26 +02:00
|
|
|
|
// Ensure to have a valid response
|
2021-06-06 15:39:45 +02:00
|
|
|
|
return response.responseBody().length() > 500 && response.responseCode() == 200;
|
2020-03-20 11:05:19 +01:00
|
|
|
|
}
|
|
|
|
|
|
2024-04-04 23:33:41 +02:00
|
|
|
|
public static String getYoutubeMusicClientVersion()
|
2022-01-09 22:49:37 +01:00
|
|
|
|
throws IOException, ReCaptchaException, Parser.RegexException {
|
2024-04-04 23:33:41 +02:00
|
|
|
|
if (!isNullOrEmpty(youtubeMusicClientVersion)) {
|
|
|
|
|
return youtubeMusicClientVersion;
|
2022-03-18 15:09:06 +01:00
|
|
|
|
}
|
2024-04-04 23:33:41 +02:00
|
|
|
|
if (isHardcodedYoutubeMusicClientVersionValid()) {
|
|
|
|
|
youtubeMusicClientVersion = HARDCODED_YOUTUBE_MUSIC_CLIENT_VERSION;
|
|
|
|
|
return youtubeMusicClientVersion;
|
2021-04-25 18:54:26 +02:00
|
|
|
|
}
|
2020-03-17 11:33:39 +01:00
|
|
|
|
|
2020-03-20 11:05:19 +01:00
|
|
|
|
try {
|
2022-01-09 22:49:37 +01:00
|
|
|
|
final String url = "https://music.youtube.com/sw.js";
|
[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links
webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.
The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).
As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.
URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.
As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).
2023-02-20 13:21:55 +01:00
|
|
|
|
final var headers = getOriginReferrerHeaders(YOUTUBE_MUSIC_URL);
|
2022-01-09 22:49:37 +01:00
|
|
|
|
final String response = getDownloader().get(url, headers).responseBody();
|
2024-04-04 23:33:41 +02:00
|
|
|
|
|
|
|
|
|
youtubeMusicClientVersion = getStringResultFromRegexArray(response,
|
2022-11-12 05:01:05 +01:00
|
|
|
|
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
|
2022-01-09 22:49:37 +01:00
|
|
|
|
} catch (final Exception e) {
|
2022-05-15 11:20:06 +02:00
|
|
|
|
final String url = "https://music.youtube.com/?ucbcb=1";
|
2022-07-27 03:26:02 +02:00
|
|
|
|
final String html = getDownloader().get(url, getCookieHeader()).responseBody();
|
2022-01-09 22:49:37 +01:00
|
|
|
|
|
2024-04-04 23:33:41 +02:00
|
|
|
|
youtubeMusicClientVersion = getStringResultFromRegexArray(html,
|
2023-04-16 19:25:05 +02:00
|
|
|
|
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
|
2020-03-20 11:05:19 +01:00
|
|
|
|
}
|
2020-03-17 11:33:39 +01:00
|
|
|
|
|
2024-04-04 23:33:41 +02:00
|
|
|
|
return youtubeMusicClientVersion;
|
2020-03-17 11:33:39 +01:00
|
|
|
|
}
|
|
|
|
|
|
2020-12-15 17:21:21 +01:00
|
|
|
|
@Nullable
|
[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links
webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.
The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).
As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.
URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.
As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).
2023-02-20 13:21:55 +01:00
|
|
|
|
public static String getUrlFromNavigationEndpoint(
|
|
|
|
|
@Nonnull final JsonObject navigationEndpoint) {
|
2020-04-16 16:08:14 +02:00
|
|
|
|
if (navigationEndpoint.has("urlEndpoint")) {
|
[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links
webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.
The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).
As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.
URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.
As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).
2023-02-20 13:21:55 +01:00
|
|
|
|
String internUrl = navigationEndpoint.getObject("urlEndpoint")
|
|
|
|
|
.getString("url");
|
2021-02-12 22:22:11 +01:00
|
|
|
|
if (internUrl.startsWith("https://www.youtube.com/redirect?")) {
|
2021-02-13 12:10:41 +01:00
|
|
|
|
// remove https://www.youtube.com part to fall in the next if block
|
2021-02-12 22:22:11 +01:00
|
|
|
|
internUrl = internUrl.substring(23);
|
|
|
|
|
}
|
|
|
|
|
|
2020-02-27 17:39:23 +01:00
|
|
|
|
if (internUrl.startsWith("/redirect?")) {
|
|
|
|
|
// q parameter can be the first parameter
|
|
|
|
|
internUrl = internUrl.substring(10);
|
2022-03-18 15:09:06 +01:00
|
|
|
|
final String[] params = internUrl.split("&");
|
|
|
|
|
for (final String param : params) {
|
2020-02-27 17:39:23 +01:00
|
|
|
|
if (param.split("=")[0].equals("q")) {
|
|
|
|
|
try {
|
2022-08-09 04:03:29 +02:00
|
|
|
|
return Utils.decodeUrlUtf8(param.split("=")[1]);
|
2021-04-08 16:17:59 +02:00
|
|
|
|
} catch (final UnsupportedEncodingException e) {
|
2020-02-27 17:39:23 +01:00
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else if (internUrl.startsWith("http")) {
|
|
|
|
|
return internUrl;
|
2021-04-25 18:54:26 +02:00
|
|
|
|
} else if (internUrl.startsWith("/channel") || internUrl.startsWith("/user")
|
|
|
|
|
|| internUrl.startsWith("/watch")) {
|
2021-02-12 22:22:11 +01:00
|
|
|
|
return "https://www.youtube.com" + internUrl;
|
2020-02-27 17:39:23 +01:00
|
|
|
|
}
|
[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links
webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.
The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).
As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.
URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.
As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).
2023-02-20 13:21:55 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (navigationEndpoint.has("browseEndpoint")) {
|
2020-02-29 22:57:25 +01:00
|
|
|
|
final JsonObject browseEndpoint = navigationEndpoint.getObject("browseEndpoint");
|
|
|
|
|
final String canonicalBaseUrl = browseEndpoint.getString("canonicalBaseUrl");
|
|
|
|
|
final String browseId = browseEndpoint.getString("browseId");
|
|
|
|
|
|
|
|
|
|
// All channel ids are prefixed with UC
|
|
|
|
|
if (browseId != null && browseId.startsWith("UC")) {
|
|
|
|
|
return "https://www.youtube.com/channel/" + browseId;
|
|
|
|
|
}
|
|
|
|
|
|
2020-04-15 18:49:58 +02:00
|
|
|
|
if (!isNullOrEmpty(canonicalBaseUrl)) {
|
2020-02-29 22:57:25 +01:00
|
|
|
|
return "https://www.youtube.com" + canonicalBaseUrl;
|
|
|
|
|
}
|
[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links
webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.
The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).
As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.
URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.
As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).
2023-02-20 13:21:55 +01:00
|
|
|
|
}
|
2020-02-29 22:57:25 +01:00
|
|
|
|
|
[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links
webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.
The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).
As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.
URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.
As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).
2023-02-20 13:21:55 +01:00
|
|
|
|
if (navigationEndpoint.has("watchEndpoint")) {
|
2022-03-18 15:09:06 +01:00
|
|
|
|
final StringBuilder url = new StringBuilder();
|
[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links
webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.
The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).
As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.
URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.
As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).
2023-02-20 13:21:55 +01:00
|
|
|
|
url.append("https://www.youtube.com/watch?v=")
|
|
|
|
|
.append(navigationEndpoint.getObject("watchEndpoint")
|
|
|
|
|
.getString(VIDEO_ID));
|
2020-04-16 19:28:27 +02:00
|
|
|
|
if (navigationEndpoint.getObject("watchEndpoint").has("playlistId")) {
|
2020-12-12 20:40:13 +01:00
|
|
|
|
url.append("&list=").append(navigationEndpoint.getObject("watchEndpoint")
|
2020-04-16 19:28:27 +02:00
|
|
|
|
.getString("playlistId"));
|
|
|
|
|
}
|
|
|
|
|
if (navigationEndpoint.getObject("watchEndpoint").has("startTimeSeconds")) {
|
[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links
webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.
The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).
As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.
URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.
As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).
2023-02-20 13:21:55 +01:00
|
|
|
|
url.append("&t=")
|
|
|
|
|
.append(navigationEndpoint.getObject("watchEndpoint")
|
2020-04-16 19:28:27 +02:00
|
|
|
|
.getInt("startTimeSeconds"));
|
|
|
|
|
}
|
2020-02-27 17:39:23 +01:00
|
|
|
|
return url.toString();
|
[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links
webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.
The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).
As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.
URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.
As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).
2023-02-20 13:21:55 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (navigationEndpoint.has("watchPlaylistEndpoint")) {
|
2022-03-18 15:09:06 +01:00
|
|
|
|
return "https://www.youtube.com/playlist?list="
|
[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links
webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.
The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).
As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.
URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.
As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).
2023-02-20 13:21:55 +01:00
|
|
|
|
+ navigationEndpoint.getObject("watchPlaylistEndpoint")
|
|
|
|
|
.getString("playlistId");
|
2020-02-27 17:39:23 +01:00
|
|
|
|
}
|
[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links
webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.
The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).
As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.
URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.
As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).
2023-02-20 13:21:55 +01:00
|
|
|
|
|
|
|
|
|
if (navigationEndpoint.has("commandMetadata")) {
|
|
|
|
|
final JsonObject metadata = navigationEndpoint.getObject("commandMetadata")
|
|
|
|
|
.getObject("webCommandMetadata");
|
|
|
|
|
if (metadata.has("url")) {
|
|
|
|
|
return "https://www.youtube.com" + metadata.getString("url");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-02-27 17:39:23 +01:00
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
2020-04-20 14:27:33 +02:00
|
|
|
|
/**
|
2022-02-05 22:05:07 +01:00
|
|
|
|
* Get the text from a JSON object that has either a {@code simpleText} or a {@code runs}
|
|
|
|
|
* array.
|
2021-02-07 22:12:22 +01:00
|
|
|
|
*
|
2020-04-20 14:27:33 +02:00
|
|
|
|
* @param textObject JSON object to get the text from
|
2022-02-05 22:05:07 +01:00
|
|
|
|
* @param html whether to return HTML, by parsing the {@code navigationEndpoint}
|
2020-05-01 13:55:15 +02:00
|
|
|
|
* @return text in the JSON object or {@code null}
|
2020-04-20 14:27:33 +02:00
|
|
|
|
*/
|
2020-12-15 17:21:21 +01:00
|
|
|
|
@Nullable
|
[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links
webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.
The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).
As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.
URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.
As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).
2023-02-20 13:21:55 +01:00
|
|
|
|
public static String getTextFromObject(final JsonObject textObject, final boolean html) {
|
2022-03-18 15:09:06 +01:00
|
|
|
|
if (isNullOrEmpty(textObject)) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
2020-05-01 13:55:15 +02:00
|
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
|
if (textObject.has("simpleText")) {
|
|
|
|
|
return textObject.getString("simpleText");
|
|
|
|
|
}
|
2020-02-27 17:39:23 +01:00
|
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
|
if (textObject.getArray("runs").isEmpty()) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
2020-05-01 13:55:15 +02:00
|
|
|
|
|
2020-12-15 17:21:21 +01:00
|
|
|
|
final StringBuilder textBuilder = new StringBuilder();
|
2022-11-28 17:58:10 +01:00
|
|
|
|
for (final Object o : textObject.getArray("runs")) {
|
|
|
|
|
final JsonObject run = (JsonObject) o;
|
|
|
|
|
String text = run.getString("text");
|
|
|
|
|
|
|
|
|
|
if (html) {
|
|
|
|
|
if (run.has("navigationEndpoint")) {
|
[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links
webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.
The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).
As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.
URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.
As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).
2023-02-20 13:21:55 +01:00
|
|
|
|
final String url = getUrlFromNavigationEndpoint(
|
|
|
|
|
run.getObject("navigationEndpoint"));
|
2022-11-28 17:58:10 +01:00
|
|
|
|
if (!isNullOrEmpty(url)) {
|
[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links
webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.
The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).
As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.
URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.
As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).
2023-02-20 13:21:55 +01:00
|
|
|
|
text = "<a href=\"" + Entities.escape(url) + "\">" + Entities.escape(text)
|
|
|
|
|
+ "</a>";
|
2022-11-28 17:58:10 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
final boolean bold = run.has("bold")
|
|
|
|
|
&& run.getBoolean("bold");
|
|
|
|
|
final boolean italic = run.has("italics")
|
|
|
|
|
&& run.getBoolean("italics");
|
|
|
|
|
final boolean strikethrough = run.has("strikethrough")
|
|
|
|
|
&& run.getBoolean("strikethrough");
|
|
|
|
|
|
|
|
|
|
if (bold) {
|
|
|
|
|
textBuilder.append("<b>");
|
|
|
|
|
}
|
|
|
|
|
if (italic) {
|
|
|
|
|
textBuilder.append("<i>");
|
|
|
|
|
}
|
|
|
|
|
if (strikethrough) {
|
|
|
|
|
textBuilder.append("<s>");
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-05 00:28:12 +01:00
|
|
|
|
textBuilder.append(text);
|
2022-11-28 17:58:10 +01:00
|
|
|
|
|
|
|
|
|
if (strikethrough) {
|
|
|
|
|
textBuilder.append("</s>");
|
|
|
|
|
}
|
|
|
|
|
if (italic) {
|
|
|
|
|
textBuilder.append("</i>");
|
|
|
|
|
}
|
|
|
|
|
if (bold) {
|
|
|
|
|
textBuilder.append("</b>");
|
2020-02-27 17:39:23 +01:00
|
|
|
|
}
|
2022-11-28 17:58:10 +01:00
|
|
|
|
} else {
|
|
|
|
|
textBuilder.append(text);
|
2020-02-27 17:39:23 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
String text = textBuilder.toString();
|
|
|
|
|
|
|
|
|
|
if (html) {
|
|
|
|
|
text = text.replaceAll("\\n", "<br>");
|
2022-03-18 17:21:10 +01:00
|
|
|
|
text = text.replaceAll(" {2}", " ");
|
2020-02-27 17:39:23 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return text;
|
|
|
|
|
}
|
|
|
|
|
|
2023-12-07 20:47:02 +01:00
|
|
|
|
@Nonnull
|
|
|
|
|
public static String getTextFromObjectOrThrow(final JsonObject textObject, final String error)
|
|
|
|
|
throws ParsingException {
|
|
|
|
|
final String result = getTextFromObject(textObject);
|
|
|
|
|
if (result == null) {
|
|
|
|
|
throw new ParsingException("Could not extract text: " + error);
|
|
|
|
|
}
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-15 17:21:21 +01:00
|
|
|
|
@Nullable
|
[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links
webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.
The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).
As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.
URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.
As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).
2023-02-20 13:21:55 +01:00
|
|
|
|
public static String getTextFromObject(final JsonObject textObject) {
|
2020-02-27 17:39:23 +01:00
|
|
|
|
return getTextFromObject(textObject, false);
|
|
|
|
|
}
|
2020-02-28 09:36:33 +01:00
|
|
|
|
|
2022-10-29 23:43:04 +02:00
|
|
|
|
@Nullable
|
[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links
webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.
The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).
As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.
URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.
As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).
2023-02-20 13:21:55 +01:00
|
|
|
|
public static String getUrlFromObject(final JsonObject textObject) {
|
2022-10-29 23:43:04 +02:00
|
|
|
|
if (isNullOrEmpty(textObject)) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (textObject.getArray("runs").isEmpty()) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (final Object textPart : textObject.getArray("runs")) {
|
|
|
|
|
final String url = getUrlFromNavigationEndpoint(((JsonObject) textPart)
|
|
|
|
|
.getObject("navigationEndpoint"));
|
|
|
|
|
if (!isNullOrEmpty(url)) {
|
|
|
|
|
return url;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
2021-03-24 09:04:43 +01:00
|
|
|
|
@Nullable
|
[YouTube] Fix hashtags links extraction and escape text in attribute descriptions + HTML links
webCommandMetadata object is contained inside a commandMetadata one, so it is
not accessible from the root of the navigationEndpoint object.
The corresponding statement has been moved at the bottom of the specific
endpoints parsing, as the webCommandMetadata object is present almost
everywhere, otherwise URLs of some endpoints would have be changed, such as
uploader URLs (from channel IDs to handles).
As no ParsingException is now thrown by getUrlFromNavigationEndpoint, and so by
getTextFromObject, getUrlFromObject and getTextAtKey, the methods which were
catching ParsingExceptions thrown by these methods had to be updated.
URLs got in the HTML version of getTextFromObject are now escaped properly to
provide valid HTML to clients. This has been also done for attribute
descriptions, with the description text for this type of descriptions.
As YouTube descriptions are in HTML format (except for the fallback on the JSON
player response, which is plain text and only happens when there is no visual
metadata or a breaking change), all URLs returned are escaped, so tests which
are testing presence of URLs with escaped characters had to be updated (it was
only the case for YoutubeStreamExtractorDefaultTest.DescriptionTestUnboxing).
2023-02-20 13:21:55 +01:00
|
|
|
|
public static String getTextAtKey(@Nonnull final JsonObject jsonObject, final String theKey) {
|
2022-03-18 15:09:06 +01:00
|
|
|
|
if (jsonObject.isString(theKey)) {
|
|
|
|
|
return jsonObject.getString(theKey);
|
2021-03-24 09:04:43 +01:00
|
|
|
|
} else {
|
2022-03-18 15:09:06 +01:00
|
|
|
|
return getTextFromObject(jsonObject.getObject(theKey));
|
2021-03-24 09:04:43 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
|
public static String fixThumbnailUrl(@Nonnull final String thumbnailUrl) {
|
|
|
|
|
String result = thumbnailUrl;
|
|
|
|
|
if (result.startsWith("//")) {
|
|
|
|
|
result = result.substring(2);
|
2020-02-28 09:36:33 +01:00
|
|
|
|
}
|
|
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
|
if (result.startsWith(HTTP)) {
|
|
|
|
|
result = Utils.replaceHttpWithHttps(result);
|
|
|
|
|
} else if (!result.startsWith(HTTPS)) {
|
|
|
|
|
result = "https://" + result;
|
2020-02-28 09:36:33 +01:00
|
|
|
|
}
|
|
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
|
return result;
|
2020-02-28 09:36:33 +01:00
|
|
|
|
}
|
2020-02-29 16:42:04 +01:00
|
|
|
|
|
2022-07-22 17:28:39 +02:00
|
|
|
|
/**
|
|
|
|
|
* Get thumbnails from a {@link JsonObject} representing a YouTube
|
|
|
|
|
* {@link org.schabi.newpipe.extractor.InfoItem InfoItem}.
|
|
|
|
|
*
|
|
|
|
|
* <p>
|
|
|
|
|
* Thumbnails are got from the {@code thumbnails} {@link JsonArray} inside the {@code thumbnail}
|
|
|
|
|
* {@link JsonObject} of the YouTube {@link org.schabi.newpipe.extractor.InfoItem InfoItem},
|
|
|
|
|
* using {@link #getImagesFromThumbnailsArray(JsonArray)}.
|
|
|
|
|
* </p>
|
|
|
|
|
*
|
|
|
|
|
* @param infoItem a YouTube {@link org.schabi.newpipe.extractor.InfoItem InfoItem}
|
|
|
|
|
* @return an unmodifiable list of {@link Image}s found in the {@code thumbnails}
|
|
|
|
|
* {@link JsonArray}
|
|
|
|
|
* @throws ParsingException if an exception occurs when
|
|
|
|
|
* {@link #getImagesFromThumbnailsArray(JsonArray)} is executed
|
|
|
|
|
*/
|
|
|
|
|
@Nonnull
|
|
|
|
|
public static List<Image> getThumbnailsFromInfoItem(@Nonnull final JsonObject infoItem)
|
2022-02-02 20:23:11 +01:00
|
|
|
|
throws ParsingException {
|
|
|
|
|
try {
|
2022-07-22 17:28:39 +02:00
|
|
|
|
return getImagesFromThumbnailsArray(infoItem.getObject("thumbnail")
|
|
|
|
|
.getArray("thumbnails"));
|
2022-02-02 20:23:11 +01:00
|
|
|
|
} catch (final Exception e) {
|
2022-07-22 17:28:39 +02:00
|
|
|
|
throw new ParsingException("Could not get thumbnails from InfoItem", e);
|
2022-02-02 20:23:11 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-07-22 17:28:39 +02:00
|
|
|
|
/**
|
|
|
|
|
* Get images from a YouTube {@code thumbnails} {@link JsonArray}.
|
|
|
|
|
*
|
|
|
|
|
* <p>
|
|
|
|
|
* The properties of the {@link Image}s created will be set using the corresponding ones of
|
|
|
|
|
* thumbnail items.
|
|
|
|
|
* </p>
|
|
|
|
|
*
|
|
|
|
|
* @param thumbnails a YouTube {@code thumbnails} {@link JsonArray}
|
|
|
|
|
* @return an unmodifiable list of {@link Image}s extracted from the given {@link JsonArray}
|
|
|
|
|
*/
|
|
|
|
|
@Nonnull
|
|
|
|
|
public static List<Image> getImagesFromThumbnailsArray(
|
|
|
|
|
@Nonnull final JsonArray thumbnails) {
|
|
|
|
|
return thumbnails.stream()
|
|
|
|
|
.filter(JsonObject.class::isInstance)
|
|
|
|
|
.map(JsonObject.class::cast)
|
|
|
|
|
.filter(thumbnail -> !isNullOrEmpty(thumbnail.getString("url")))
|
|
|
|
|
.map(thumbnail -> {
|
|
|
|
|
final int height = thumbnail.getInt("height", Image.HEIGHT_UNKNOWN);
|
|
|
|
|
return new Image(fixThumbnailUrl(thumbnail.getString("url")),
|
|
|
|
|
height,
|
|
|
|
|
thumbnail.getInt("width", Image.WIDTH_UNKNOWN),
|
|
|
|
|
ResolutionLevel.fromHeight(height));
|
|
|
|
|
})
|
|
|
|
|
.collect(Collectors.toUnmodifiableList());
|
|
|
|
|
}
|
|
|
|
|
|
2021-06-11 13:34:23 +02:00
|
|
|
|
@Nonnull
|
|
|
|
|
public static String getValidJsonResponseBody(@Nonnull final Response response)
|
2020-04-01 16:01:21 +02:00
|
|
|
|
throws ParsingException, MalformedURLException {
|
2020-03-01 01:50:31 +01:00
|
|
|
|
if (response.responseCode() == 404) {
|
2020-04-16 19:28:27 +02:00
|
|
|
|
throw new ContentNotAvailableException("Not found"
|
|
|
|
|
+ " (\"" + response.responseCode() + " " + response.responseMessage() + "\")");
|
2020-03-01 01:50:31 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
final String responseBody = response.responseBody();
|
2021-04-12 18:24:32 +02:00
|
|
|
|
if (responseBody.length() < 50) { // Ensure to have a valid response
|
2020-02-29 16:42:04 +01:00
|
|
|
|
throw new ParsingException("JSON response is too short");
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-01 01:52:25 +01:00
|
|
|
|
// Check if the request was redirected to the error page.
|
|
|
|
|
final URL latestUrl = new URL(response.latestUrl());
|
|
|
|
|
if (latestUrl.getHost().equalsIgnoreCase("www.youtube.com")) {
|
|
|
|
|
final String path = latestUrl.getPath();
|
|
|
|
|
if (path.equalsIgnoreCase("/oops") || path.equalsIgnoreCase("/error")) {
|
|
|
|
|
throw new ContentNotAvailableException("Content unavailable");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
final String responseContentType = response.getHeader("Content-Type");
|
2020-04-01 16:01:21 +02:00
|
|
|
|
if (responseContentType != null
|
|
|
|
|
&& responseContentType.toLowerCase().contains("text/html")) {
|
2020-04-16 19:28:27 +02:00
|
|
|
|
throw new ParsingException("Got HTML document, expected JSON response"
|
|
|
|
|
+ " (latest url was: \"" + response.latestUrl() + "\")");
|
2020-03-01 01:52:25 +01:00
|
|
|
|
}
|
|
|
|
|
|
2020-04-01 16:01:21 +02:00
|
|
|
|
return responseBody;
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-08 16:17:59 +02:00
|
|
|
|
public static JsonObject getJsonPostResponse(final String endpoint,
|
2021-04-11 17:01:43 +02:00
|
|
|
|
final byte[] body,
|
|
|
|
|
final Localization localization)
|
2021-04-08 16:17:59 +02:00
|
|
|
|
throws IOException, ExtractionException {
|
2022-11-12 05:01:05 +01:00
|
|
|
|
final var headers = getYouTubeHeaders();
|
2021-04-08 16:17:59 +02:00
|
|
|
|
|
2022-06-18 16:07:32 +02:00
|
|
|
|
return JsonUtils.toJsonObject(getValidJsonResponseBody(
|
2024-04-04 23:33:41 +02:00
|
|
|
|
getDownloader().postWithContentTypeJson(YOUTUBEI_V1_URL + endpoint + "?"
|
|
|
|
|
+ DISABLE_PRETTY_PRINT_PARAMETER, headers, body, localization)));
|
2021-04-08 16:17:59 +02:00
|
|
|
|
}
|
|
|
|
|
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
|
public static JsonObject getJsonAndroidPostResponse(
|
|
|
|
|
final String endpoint,
|
|
|
|
|
final byte[] body,
|
2022-01-15 17:25:00 +01:00
|
|
|
|
@Nonnull final Localization localization,
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
|
@Nullable final String endPartOfUrlRequest) throws IOException, ExtractionException {
|
2022-03-15 19:01:24 +01:00
|
|
|
|
return getMobilePostResponse(endpoint, body, localization,
|
2024-04-04 23:33:41 +02:00
|
|
|
|
getAndroidUserAgent(localization), endPartOfUrlRequest);
|
2021-03-03 19:49:26 +01:00
|
|
|
|
}
|
|
|
|
|
|
2022-01-15 17:25:00 +01:00
|
|
|
|
public static JsonObject getJsonIosPostResponse(
|
|
|
|
|
final String endpoint,
|
|
|
|
|
final byte[] body,
|
|
|
|
|
@Nonnull final Localization localization,
|
|
|
|
|
@Nullable final String endPartOfUrlRequest) throws IOException, ExtractionException {
|
2022-03-15 19:01:24 +01:00
|
|
|
|
return getMobilePostResponse(endpoint, body, localization, getIosUserAgent(localization),
|
2024-04-04 23:33:41 +02:00
|
|
|
|
endPartOfUrlRequest);
|
2022-03-15 19:01:24 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static JsonObject getMobilePostResponse(
|
|
|
|
|
final String endpoint,
|
|
|
|
|
final byte[] body,
|
|
|
|
|
@Nonnull final Localization localization,
|
|
|
|
|
@Nonnull final String userAgent,
|
|
|
|
|
@Nullable final String endPartOfUrlRequest) throws IOException, ExtractionException {
|
2022-11-12 05:01:05 +01:00
|
|
|
|
final var headers = Map.of("User-Agent", List.of(userAgent),
|
|
|
|
|
"X-Goog-Api-Format-Version", List.of("2"));
|
2022-01-15 17:25:00 +01:00
|
|
|
|
|
2024-04-04 23:33:41 +02:00
|
|
|
|
final String baseEndpointUrl = YOUTUBEI_V1_GAPIS_URL + endpoint + "?"
|
[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more
Use the TV embedded client technique to get streams of embeddable age-restricted videos.
This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.
The outdated code for these contents has been removed.
Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.
Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.
2022-04-02 19:06:36 +02:00
|
|
|
|
+ DISABLE_PRETTY_PRINT_PARAMETER;
|
2022-01-15 17:25:00 +01:00
|
|
|
|
|
2022-06-18 16:07:32 +02:00
|
|
|
|
return JsonUtils.toJsonObject(getValidJsonResponseBody(
|
2022-07-15 20:56:37 +02:00
|
|
|
|
getDownloader().postWithContentTypeJson(isNullOrEmpty(endPartOfUrlRequest)
|
2022-06-18 16:07:32 +02:00
|
|
|
|
? baseEndpointUrl
|
|
|
|
|
: baseEndpointUrl + endPartOfUrlRequest,
|
|
|
|
|
headers, body, localization)));
|
2022-01-15 17:25:00 +01:00
|
|
|
|
}
|
|
|
|
|
|
2021-06-11 13:34:23 +02:00
|
|
|
|
@Nonnull
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
|
public static JsonBuilder<JsonObject> prepareDesktopJsonBuilder(
|
|
|
|
|
@Nonnull final Localization localization,
|
|
|
|
|
@Nonnull final ContentCountry contentCountry)
|
2021-04-02 21:34:47 +02:00
|
|
|
|
throws IOException, ExtractionException {
|
[YouTube] Add support for channel tabs and tags and age-restricted channels
Support of tags and videos, shorts, live, playlists and channels tabs has been
added for non-age restricted channels.
Age-restricted channels are now also supported and always returned the videos,
shorts and live tabs, accessible using system playlists. These tabs are the
only ones which can be accessed using YouTube's desktop website without being
logged-in.
The videos channel tab parameter has been updated to the one used by the
desktop website and when a channel extraction is fetched, this tab is returned
in the list of tabs as a cached one in the corresponding link handler.
Visitor data support per request has been added, as a valid visitor data is
required to fetch continuations with contents on the shorts tab. It is only
used in this case to enhance privacy.
A dedicated shorts UI elements (reelItemRenderers) extractor has been added,
YoutubeReelInfoItemExtractor. These elements do not provide the exact view
count, any uploader info (name, URL, avatar, verified status) and the upload
date.
All service's LinkHandlers are now using the singleton pattern and some code
has been also improved on the files changed.
Co-authored-by: ThetaDev <t.testboy@gmail.com>
Co-authored-by: Stypox <stypox@pm.me>
2023-07-14 23:46:48 +02:00
|
|
|
|
return prepareDesktopJsonBuilder(localization, contentCountry, null);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Nonnull
|
|
|
|
|
public static JsonBuilder<JsonObject> prepareDesktopJsonBuilder(
|
|
|
|
|
@Nonnull final Localization localization,
|
|
|
|
|
@Nonnull final ContentCountry contentCountry,
|
|
|
|
|
@Nullable final String visitorData)
|
|
|
|
|
throws IOException, ExtractionException {
|
2021-04-02 21:34:47 +02:00
|
|
|
|
// @formatter:off
|
[YouTube] Add support for channel tabs and tags and age-restricted channels
Support of tags and videos, shorts, live, playlists and channels tabs has been
added for non-age restricted channels.
Age-restricted channels are now also supported and always returned the videos,
shorts and live tabs, accessible using system playlists. These tabs are the
only ones which can be accessed using YouTube's desktop website without being
logged-in.
The videos channel tab parameter has been updated to the one used by the
desktop website and when a channel extraction is fetched, this tab is returned
in the list of tabs as a cached one in the corresponding link handler.
Visitor data support per request has been added, as a valid visitor data is
required to fetch continuations with contents on the shorts tab. It is only
used in this case to enhance privacy.
A dedicated shorts UI elements (reelItemRenderers) extractor has been added,
YoutubeReelInfoItemExtractor. These elements do not provide the exact view
count, any uploader info (name, URL, avatar, verified status) and the upload
date.
All service's LinkHandlers are now using the singleton pattern and some code
has been also improved on the files changed.
Co-authored-by: ThetaDev <t.testboy@gmail.com>
Co-authored-by: Stypox <stypox@pm.me>
2023-07-14 23:46:48 +02:00
|
|
|
|
final JsonBuilder<JsonObject> builder = JsonObject.builder()
|
2021-04-02 21:34:47 +02:00
|
|
|
|
.object("context")
|
|
|
|
|
.object("client")
|
2021-04-30 19:06:56 +02:00
|
|
|
|
.value("hl", localization.getLocalizationCode())
|
|
|
|
|
.value("gl", contentCountry.getCountryCode())
|
2021-06-24 18:39:16 +02:00
|
|
|
|
.value("clientName", "WEB")
|
|
|
|
|
.value("clientVersion", getClientVersion())
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
|
.value("originalUrl", "https://www.youtube.com")
|
2023-10-07 15:04:36 +02:00
|
|
|
|
.value("platform", "DESKTOP")
|
|
|
|
|
.value("utcOffsetMinutes", 0);
|
[YouTube] Add support for channel tabs and tags and age-restricted channels
Support of tags and videos, shorts, live, playlists and channels tabs has been
added for non-age restricted channels.
Age-restricted channels are now also supported and always returned the videos,
shorts and live tabs, accessible using system playlists. These tabs are the
only ones which can be accessed using YouTube's desktop website without being
logged-in.
The videos channel tab parameter has been updated to the one used by the
desktop website and when a channel extraction is fetched, this tab is returned
in the list of tabs as a cached one in the corresponding link handler.
Visitor data support per request has been added, as a valid visitor data is
required to fetch continuations with contents on the shorts tab. It is only
used in this case to enhance privacy.
A dedicated shorts UI elements (reelItemRenderers) extractor has been added,
YoutubeReelInfoItemExtractor. These elements do not provide the exact view
count, any uploader info (name, URL, avatar, verified status) and the upload
date.
All service's LinkHandlers are now using the singleton pattern and some code
has been also improved on the files changed.
Co-authored-by: ThetaDev <t.testboy@gmail.com>
Co-authored-by: Stypox <stypox@pm.me>
2023-07-14 23:46:48 +02:00
|
|
|
|
|
|
|
|
|
if (visitorData != null) {
|
|
|
|
|
builder.value("visitorData", visitorData);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return builder.end()
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
|
.object("request")
|
|
|
|
|
.array("internalExperimentFlags")
|
|
|
|
|
.end()
|
|
|
|
|
.value("useSsl", true)
|
2021-06-24 18:39:16 +02:00
|
|
|
|
.end()
|
|
|
|
|
.object("user")
|
2023-10-07 15:00:40 +02:00
|
|
|
|
// TODO: provide a way to enable restricted mode with:
|
|
|
|
|
// .value("enableSafetyMode", boolean)
|
2021-06-24 18:39:16 +02:00
|
|
|
|
.value("lockedSafetyMode", false)
|
2021-04-02 21:34:47 +02:00
|
|
|
|
.end()
|
|
|
|
|
.end();
|
2021-05-29 14:43:26 +02:00
|
|
|
|
// @formatter:on
|
|
|
|
|
}
|
|
|
|
|
|
2021-06-11 13:34:23 +02:00
|
|
|
|
@Nonnull
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
|
public static JsonBuilder<JsonObject> prepareAndroidMobileJsonBuilder(
|
|
|
|
|
@Nonnull final Localization localization,
|
|
|
|
|
@Nonnull final ContentCountry contentCountry) {
|
2021-05-29 14:43:26 +02:00
|
|
|
|
// @formatter:off
|
|
|
|
|
return JsonObject.builder()
|
|
|
|
|
.object("context")
|
|
|
|
|
.object("client")
|
|
|
|
|
.value("clientName", "ANDROID")
|
2022-08-08 20:12:32 +02:00
|
|
|
|
.value("clientVersion", ANDROID_YOUTUBE_CLIENT_VERSION)
|
2022-01-15 17:25:00 +01:00
|
|
|
|
.value("platform", "MOBILE")
|
2022-08-08 22:06:10 +02:00
|
|
|
|
.value("osName", "Android")
|
2023-10-07 14:14:34 +02:00
|
|
|
|
.value("osVersion", "14")
|
2022-08-08 22:06:10 +02:00
|
|
|
|
/*
|
|
|
|
|
A valid Android SDK version is required to be sure to get a valid player
|
|
|
|
|
response
|
2023-10-07 14:14:34 +02:00
|
|
|
|
If this parameter is not provided, the player response is replaced by an
|
|
|
|
|
error saying the message "The following content is not available on this
|
|
|
|
|
app. Watch this content on the latest version on YouTube" (it was
|
|
|
|
|
previously a 5-minute video with this message)
|
2022-08-08 22:06:10 +02:00
|
|
|
|
See https://github.com/TeamNewPipe/NewPipe/issues/8713
|
|
|
|
|
The Android SDK version corresponding to the Android version used in
|
|
|
|
|
requests is sent
|
|
|
|
|
*/
|
2023-10-07 14:14:34 +02:00
|
|
|
|
.value("androidSdkVersion", 34)
|
2022-01-15 17:25:00 +01:00
|
|
|
|
.value("hl", localization.getLocalizationCode())
|
|
|
|
|
.value("gl", contentCountry.getCountryCode())
|
2023-10-07 15:04:36 +02:00
|
|
|
|
.value("utcOffsetMinutes", 0)
|
2022-01-15 17:25:00 +01:00
|
|
|
|
.end()
|
2023-10-07 15:00:40 +02:00
|
|
|
|
.object("request")
|
|
|
|
|
.array("internalExperimentFlags")
|
|
|
|
|
.end()
|
|
|
|
|
.value("useSsl", true)
|
|
|
|
|
.end()
|
2022-01-15 17:25:00 +01:00
|
|
|
|
.object("user")
|
2023-10-07 15:00:40 +02:00
|
|
|
|
// TODO: provide a way to enable restricted mode with:
|
|
|
|
|
// .value("enableSafetyMode", boolean)
|
2022-01-15 17:25:00 +01:00
|
|
|
|
.value("lockedSafetyMode", false)
|
|
|
|
|
.end()
|
|
|
|
|
.end();
|
|
|
|
|
// @formatter:on
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Nonnull
|
|
|
|
|
public static JsonBuilder<JsonObject> prepareIosMobileJsonBuilder(
|
|
|
|
|
@Nonnull final Localization localization,
|
|
|
|
|
@Nonnull final ContentCountry contentCountry) {
|
|
|
|
|
// @formatter:off
|
|
|
|
|
return JsonObject.builder()
|
|
|
|
|
.object("context")
|
|
|
|
|
.object("client")
|
|
|
|
|
.value("clientName", "IOS")
|
2022-08-08 20:12:32 +02:00
|
|
|
|
.value("clientVersion", IOS_YOUTUBE_CLIENT_VERSION)
|
2022-08-08 22:06:10 +02:00
|
|
|
|
.value("deviceMake", "Apple")
|
2022-01-15 17:25:00 +01:00
|
|
|
|
// Device model is required to get 60fps streams
|
2022-03-26 20:02:35 +01:00
|
|
|
|
.value("deviceModel", IOS_DEVICE_MODEL)
|
2022-01-15 17:25:00 +01:00
|
|
|
|
.value("platform", "MOBILE")
|
2022-08-08 22:06:10 +02:00
|
|
|
|
.value("osName", "iOS")
|
2024-04-20 11:43:54 +02:00
|
|
|
|
.value("osVersion", IOS_OS_VERSION)
|
2021-05-29 14:43:26 +02:00
|
|
|
|
.value("hl", localization.getLocalizationCode())
|
|
|
|
|
.value("gl", contentCountry.getCountryCode())
|
2023-10-07 15:04:36 +02:00
|
|
|
|
.value("utcOffsetMinutes", 0)
|
2021-05-29 14:43:26 +02:00
|
|
|
|
.end()
|
2023-10-07 15:00:40 +02:00
|
|
|
|
.object("request")
|
|
|
|
|
.array("internalExperimentFlags")
|
|
|
|
|
.end()
|
|
|
|
|
.value("useSsl", true)
|
|
|
|
|
.end()
|
2021-06-24 18:39:16 +02:00
|
|
|
|
.object("user")
|
2023-10-07 15:00:40 +02:00
|
|
|
|
// TODO: provide a way to enable restricted mode with:
|
|
|
|
|
// .value("enableSafetyMode", boolean)
|
2021-06-24 18:39:16 +02:00
|
|
|
|
.value("lockedSafetyMode", false)
|
|
|
|
|
.end()
|
2021-05-29 14:43:26 +02:00
|
|
|
|
.end();
|
2021-04-02 21:34:47 +02:00
|
|
|
|
// @formatter:on
|
|
|
|
|
}
|
|
|
|
|
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
|
@Nonnull
|
[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more
Use the TV embedded client technique to get streams of embeddable age-restricted videos.
This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.
The outdated code for these contents has been removed.
Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.
Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.
2022-04-02 19:06:36 +02:00
|
|
|
|
public static JsonBuilder<JsonObject> prepareTvHtml5EmbedJsonBuilder(
|
2022-01-15 17:25:00 +01:00
|
|
|
|
@Nonnull final Localization localization,
|
|
|
|
|
@Nonnull final ContentCountry contentCountry,
|
[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more
Use the TV embedded client technique to get streams of embeddable age-restricted videos.
This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.
The outdated code for these contents has been removed.
Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.
Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.
2022-04-02 19:06:36 +02:00
|
|
|
|
@Nonnull final String videoId) {
|
2022-03-06 20:10:11 +01:00
|
|
|
|
// @formatter:off
|
2022-01-15 17:25:00 +01:00
|
|
|
|
return JsonObject.builder()
|
|
|
|
|
.object("context")
|
|
|
|
|
.object("client")
|
[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more
Use the TV embedded client technique to get streams of embeddable age-restricted videos.
This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.
The outdated code for these contents has been removed.
Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.
Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.
2022-04-02 19:06:36 +02:00
|
|
|
|
.value("clientName", "TVHTML5_SIMPLY_EMBEDDED_PLAYER")
|
|
|
|
|
.value("clientVersion", TVHTML5_SIMPLY_EMBED_CLIENT_VERSION)
|
2022-01-15 17:25:00 +01:00
|
|
|
|
.value("clientScreen", "EMBED")
|
[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more
Use the TV embedded client technique to get streams of embeddable age-restricted videos.
This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.
The outdated code for these contents has been removed.
Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.
Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.
2022-04-02 19:06:36 +02:00
|
|
|
|
.value("platform", "TV")
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
|
.value("hl", localization.getLocalizationCode())
|
|
|
|
|
.value("gl", contentCountry.getCountryCode())
|
2023-10-07 15:04:36 +02:00
|
|
|
|
.value("utcOffsetMinutes", 0)
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
|
.end()
|
|
|
|
|
.object("thirdParty")
|
|
|
|
|
.value("embedUrl", "https://www.youtube.com/watch?v=" + videoId)
|
|
|
|
|
.end()
|
2023-10-07 15:00:40 +02:00
|
|
|
|
.object("request")
|
|
|
|
|
.array("internalExperimentFlags")
|
|
|
|
|
.end()
|
|
|
|
|
.value("useSsl", true)
|
|
|
|
|
.end()
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
|
.object("user")
|
2023-10-07 15:00:40 +02:00
|
|
|
|
// TODO: provide a way to enable restricted mode with:
|
|
|
|
|
// .value("enableSafetyMode", boolean)
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
|
.value("lockedSafetyMode", false)
|
|
|
|
|
.end()
|
[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more
Use the TV embedded client technique to get streams of embeddable age-restricted videos.
This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.
The outdated code for these contents has been removed.
Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.
Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.
2022-04-02 19:06:36 +02:00
|
|
|
|
.end();
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
|
// @formatter:on
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Nonnull
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
|
public static byte[] createDesktopPlayerBody(
|
|
|
|
|
@Nonnull final Localization localization,
|
|
|
|
|
@Nonnull final ContentCountry contentCountry,
|
|
|
|
|
@Nonnull final String videoId,
|
2023-09-16 22:22:09 +02:00
|
|
|
|
@Nonnull final Integer sts,
|
[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more
Use the TV embedded client technique to get streams of embeddable age-restricted videos.
This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.
The outdated code for these contents has been removed.
Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.
Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.
2022-04-02 19:06:36 +02:00
|
|
|
|
final boolean isTvHtml5DesktopJsonBuilder,
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
|
@Nonnull final String contentPlaybackNonce) throws IOException, ExtractionException {
|
|
|
|
|
// @formatter:off
|
[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more
Use the TV embedded client technique to get streams of embeddable age-restricted videos.
This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.
The outdated code for these contents has been removed.
Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.
Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.
2022-04-02 19:06:36 +02:00
|
|
|
|
return JsonWriter.string((isTvHtml5DesktopJsonBuilder
|
|
|
|
|
? prepareTvHtml5EmbedJsonBuilder(localization, contentCountry, videoId)
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
|
: prepareDesktopJsonBuilder(localization, contentCountry))
|
|
|
|
|
.object("playbackContext")
|
|
|
|
|
.object("contentPlaybackContext")
|
2022-08-08 22:06:10 +02:00
|
|
|
|
// Signature timestamp from the JavaScript base player is needed to get
|
|
|
|
|
// working obfuscated URLs
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
|
.value("signatureTimestamp", sts)
|
|
|
|
|
.value("referer", "https://www.youtube.com/watch?v=" + videoId)
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
|
.end()
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
|
.end()
|
|
|
|
|
.value(CPN, contentPlaybackNonce)
|
|
|
|
|
.value(VIDEO_ID, videoId)
|
[YouTube] Fix extraction of embeddable age-restricted videos, fix extraction of contents with warnings and more
Use the TV embedded client technique to get streams of embeddable age-restricted videos.
This client doesn't provide the playerMicroFormatRenderer object in the player response, but it is still returned on the WEB player response, even for unavailable (but non-private) contents, so we need now to store it, as we are replacing the player response from the WEB client by the TV embedded one.
Otherwise, some metadata such as the unlisted property, category, the uploadDate and the publishDate properties.
The outdated code for these contents has been removed.
Add the racyCheckOk and contentCheckOk to player and next requests to the InnerTube API.
The first doesn't seem to make any difference when used anonymously, but the second one is needed to get streams of contents with a warning before they can be played.
Also apply some requested changes, fixes and improvements in YoutubeParsingHelper and YoutubeStreamExtractor.
2022-04-02 19:06:36 +02:00
|
|
|
|
.value(CONTENT_CHECK_OK, true)
|
|
|
|
|
.value(RACY_CHECK_OK, true)
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
|
.done())
|
|
|
|
|
.getBytes(StandardCharsets.UTF_8);
|
|
|
|
|
// @formatter:on
|
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version
Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`:
- the desktop API is fetched.
If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API.
- if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status
If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent.
Otherwise, the next endpoint will be fetched normally, if the content is available.
If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made.
We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used.
If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used.
Other code changes:
- `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder`
- `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder`
- two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder`
- `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper`
- a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor`
- `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>`
- the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page
- some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed
Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
|
|
|
|
}
|
|
|
|
|
|
2022-03-15 19:01:24 +01:00
|
|
|
|
/**
|
|
|
|
|
* Get the user-agent string used as the user-agent for InnerTube requests with the Android
|
|
|
|
|
* client.
|
|
|
|
|
*
|
2022-08-08 20:12:32 +02:00
|
|
|
|
* <p>
|
2022-03-15 19:01:24 +01:00
|
|
|
|
* If the {@link Localization} provided is {@code null}, fallbacks to
|
|
|
|
|
* {@link Localization#DEFAULT the default one}.
|
2022-08-08 20:12:32 +02:00
|
|
|
|
* </p>
|
2022-03-15 19:01:24 +01:00
|
|
|
|
*
|
|
|
|
|
* @param localization the {@link Localization} to set in the user-agent
|
|
|
|
|
* @return the Android user-agent used for InnerTube requests with the Android client,
|
|
|
|
|
* depending on the {@link Localization} provided
|
|
|
|
|
*/
|
|
|
|
|
@Nonnull
|
|
|
|
|
public static String getAndroidUserAgent(@Nullable final Localization localization) {
|
2023-10-07 14:14:34 +02:00
|
|
|
|
// Spoofing an Android 14 device with the hardcoded version of the Android app
|
2022-08-08 20:12:32 +02:00
|
|
|
|
return "com.google.android.youtube/" + ANDROID_YOUTUBE_CLIENT_VERSION
|
2023-10-07 14:14:34 +02:00
|
|
|
|
+ " (Linux; U; Android 14; "
|
2022-05-10 21:38:15 +02:00
|
|
|
|
+ (localization != null ? localization : Localization.DEFAULT).getCountryCode()
|
2022-03-15 19:01:24 +01:00
|
|
|
|
+ ") gzip";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Get the user-agent string used as the user-agent for InnerTube requests with the iOS
|
|
|
|
|
* client.
|
|
|
|
|
*
|
2022-08-08 20:12:32 +02:00
|
|
|
|
* <p>
|
2022-03-15 19:01:24 +01:00
|
|
|
|
* If the {@link Localization} provided is {@code null}, fallbacks to
|
|
|
|
|
* {@link Localization#DEFAULT the default one}.
|
2022-08-08 20:12:32 +02:00
|
|
|
|
* </p>
|
2022-03-15 19:01:24 +01:00
|
|
|
|
*
|
|
|
|
|
* @param localization the {@link Localization} to set in the user-agent
|
|
|
|
|
* @return the iOS user-agent used for InnerTube requests with the iOS client, depending on the
|
|
|
|
|
* {@link Localization} provided
|
|
|
|
|
*/
|
|
|
|
|
@Nonnull
|
|
|
|
|
public static String getIosUserAgent(@Nullable final Localization localization) {
|
2024-04-04 23:33:41 +02:00
|
|
|
|
// Spoofing an iPhone 15 running iOS 17.4.1 with the hardcoded version of the iOS app
|
2022-08-08 20:12:32 +02:00
|
|
|
|
return "com.google.ios.youtube/" + IOS_YOUTUBE_CLIENT_VERSION
|
2024-04-20 11:43:54 +02:00
|
|
|
|
+ "(" + IOS_DEVICE_MODEL + "; U; CPU iOS "
|
|
|
|
|
+ IOS_USER_AGENT_VERSION + " like Mac OS X; "
|
2022-05-10 21:38:15 +02:00
|
|
|
|
+ (localization != null ? localization : Localization.DEFAULT).getCountryCode()
|
2022-03-15 19:01:24 +01:00
|
|
|
|
+ ")";
|
|
|
|
|
}
|
|
|
|
|
|
2022-11-12 05:01:05 +01:00
|
|
|
|
/**
|
|
|
|
|
* Returns a {@link Map} containing the required YouTube Music headers.
|
|
|
|
|
*/
|
2022-06-18 16:07:32 +02:00
|
|
|
|
@Nonnull
|
|
|
|
|
public static Map<String, List<String>> getYoutubeMusicHeaders() {
|
2022-11-12 05:01:05 +01:00
|
|
|
|
final var headers = new HashMap<>(getOriginReferrerHeaders(YOUTUBE_MUSIC_URL));
|
2024-04-20 11:43:54 +02:00
|
|
|
|
headers.putAll(getClientHeaders(YOUTUBE_MUSIC_CLIENT_ID,
|
|
|
|
|
youtubeMusicClientVersion));
|
2022-06-18 16:07:32 +02:00
|
|
|
|
return headers;
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-07 12:25:59 +02:00
|
|
|
|
/**
|
2023-01-02 18:11:03 +01:00
|
|
|
|
* Returns a {@link Map} containing the required YouTube headers, including the
|
|
|
|
|
* <code>CONSENT</code> cookie to prevent redirects to <code>consent.youtube.com</code>
|
2021-04-07 12:25:59 +02:00
|
|
|
|
*/
|
2022-11-12 05:01:05 +01:00
|
|
|
|
public static Map<String, List<String>> getYouTubeHeaders()
|
|
|
|
|
throws ExtractionException, IOException {
|
|
|
|
|
final var headers = getClientInfoHeaders();
|
|
|
|
|
headers.put("Cookie", List.of(generateConsentCookie()));
|
|
|
|
|
return headers;
|
2021-04-07 12:25:59 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2022-11-12 05:01:05 +01:00
|
|
|
|
* Returns a {@link Map} containing the {@code X-YouTube-Client-Name},
|
|
|
|
|
* {@code X-YouTube-Client-Version}, {@code Origin}, and {@code Referer} headers.
|
2021-04-07 12:25:59 +02:00
|
|
|
|
*/
|
2022-11-12 05:01:05 +01:00
|
|
|
|
public static Map<String, List<String>> getClientInfoHeaders()
|
|
|
|
|
throws ExtractionException, IOException {
|
|
|
|
|
final var headers = new HashMap<>(getOriginReferrerHeaders("https://www.youtube.com"));
|
2024-04-20 11:43:54 +02:00
|
|
|
|
headers.putAll(getClientHeaders(WEB_CLIENT_ID, getClientVersion()));
|
2022-11-12 05:01:05 +01:00
|
|
|
|
return headers;
|
2021-04-07 12:25:59 +02:00
|
|
|
|
}
|
|
|
|
|
|
2022-07-27 03:26:02 +02:00
|
|
|
|
/**
|
2022-11-12 05:01:05 +01:00
|
|
|
|
* Returns an unmodifiable {@link Map} containing the {@code Origin} and {@code Referer}
|
|
|
|
|
* headers set to the given URL.
|
|
|
|
|
*
|
|
|
|
|
* @param url The URL to be set as the origin and referrer.
|
2022-07-27 03:26:02 +02:00
|
|
|
|
*/
|
2022-11-12 05:01:05 +01:00
|
|
|
|
private static Map<String, List<String>> getOriginReferrerHeaders(@Nonnull final String url) {
|
|
|
|
|
final var urlList = List.of(url);
|
|
|
|
|
return Map.of("Origin", urlList, "Referer", urlList);
|
2022-07-27 03:26:02 +02:00
|
|
|
|
}
|
|
|
|
|
|
2021-04-07 12:25:59 +02:00
|
|
|
|
/**
|
2022-11-12 05:01:05 +01:00
|
|
|
|
* Returns an unmodifiable {@link Map} containing the {@code X-YouTube-Client-Name} and
|
|
|
|
|
* {@code X-YouTube-Client-Version} headers.
|
|
|
|
|
*
|
|
|
|
|
* @param name The X-YouTube-Client-Name value.
|
|
|
|
|
* @param version X-YouTube-Client-Version value.
|
2021-04-07 12:25:59 +02:00
|
|
|
|
*/
|
2022-11-12 05:01:05 +01:00
|
|
|
|
private static Map<String, List<String>> getClientHeaders(@Nonnull final String name,
|
|
|
|
|
@Nonnull final String version) {
|
|
|
|
|
return Map.of("X-YouTube-Client-Name", List.of(name),
|
|
|
|
|
"X-YouTube-Client-Version", List.of(version));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Create a map with the required cookie header.
|
|
|
|
|
* @return A singleton map containing the header.
|
|
|
|
|
*/
|
|
|
|
|
public static Map<String, List<String>> getCookieHeader() {
|
|
|
|
|
return Map.of("Cookie", List.of(generateConsentCookie()));
|
2021-04-07 12:25:59 +02:00
|
|
|
|
}
|
|
|
|
|
|
2021-06-11 13:34:23 +02:00
|
|
|
|
@Nonnull
|
2021-04-08 16:36:55 +02:00
|
|
|
|
public static String generateConsentCookie() {
|
2023-10-07 19:36:02 +02:00
|
|
|
|
return "SOCS=" + (isConsentAccepted()
|
|
|
|
|
// CAISAiAD means that the user configured manually cookies YouTube, regardless of
|
|
|
|
|
// the consent values
|
|
|
|
|
// This value surprisingly allows to extract mixes and some YouTube Music playlists
|
|
|
|
|
// in the same way when a user allows all cookies
|
|
|
|
|
? "CAISAiAD"
|
|
|
|
|
// CAE= means that the user rejected all non-necessary cookies with the "Reject
|
|
|
|
|
// all" button on the consent page
|
|
|
|
|
: "CAE=");
|
2021-04-08 16:36:55 +02:00
|
|
|
|
}
|
|
|
|
|
|
2021-06-11 13:34:23 +02:00
|
|
|
|
public static String extractCookieValue(final String cookieName,
|
|
|
|
|
@Nonnull final Response response) {
|
2021-04-07 12:25:59 +02:00
|
|
|
|
final List<String> cookies = response.responseHeaders().get("set-cookie");
|
2022-03-01 23:02:56 +01:00
|
|
|
|
if (cookies == null) {
|
2022-08-15 05:49:40 +02:00
|
|
|
|
return "";
|
2022-03-01 23:02:56 +01:00
|
|
|
|
}
|
|
|
|
|
|
2022-08-15 05:49:40 +02:00
|
|
|
|
String result = "";
|
2021-04-07 12:25:59 +02:00
|
|
|
|
for (final String cookie : cookies) {
|
2022-03-01 23:02:56 +01:00
|
|
|
|
final int startIndex = cookie.indexOf(cookieName);
|
2021-04-07 12:25:59 +02:00
|
|
|
|
if (startIndex != -1) {
|
|
|
|
|
result = cookie.substring(startIndex + cookieName.length() + "=".length(),
|
|
|
|
|
cookie.indexOf(";", startIndex));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-01 01:52:25 +01:00
|
|
|
|
/**
|
|
|
|
|
* Shared alert detection function, multiple endpoints return the error similarly structured.
|
|
|
|
|
* <p>
|
|
|
|
|
* Will check if the object has an alert of the type "ERROR".
|
2020-04-01 16:01:21 +02:00
|
|
|
|
* </p>
|
2020-03-01 01:52:25 +01:00
|
|
|
|
*
|
|
|
|
|
* @param initialData the object which will be checked if an alert is present
|
|
|
|
|
* @throws ContentNotAvailableException if an alert is detected
|
|
|
|
|
*/
|
2021-06-11 13:34:23 +02:00
|
|
|
|
public static void defaultAlertsCheck(@Nonnull final JsonObject initialData)
|
|
|
|
|
throws ParsingException {
|
2020-03-01 01:52:25 +01:00
|
|
|
|
final JsonArray alerts = initialData.getArray("alerts");
|
2020-04-15 18:49:58 +02:00
|
|
|
|
if (!isNullOrEmpty(alerts)) {
|
2020-03-01 01:52:25 +01:00
|
|
|
|
final JsonObject alertRenderer = alerts.getObject(0).getObject("alertRenderer");
|
2020-05-03 10:28:45 +02:00
|
|
|
|
final String alertText = getTextFromObject(alertRenderer.getObject("text"));
|
2022-08-15 05:49:40 +02:00
|
|
|
|
final String alertType = alertRenderer.getString("type", "");
|
2020-03-01 01:52:25 +01:00
|
|
|
|
if (alertType.equalsIgnoreCase("ERROR")) {
|
2024-03-20 14:49:19 +01:00
|
|
|
|
if (alertText != null
|
|
|
|
|
&& (alertText.contains("This account has been terminated")
|
|
|
|
|
|| alertText.contains("This channel was removed"))) {
|
|
|
|
|
if (alertText.matches(".*violat(ed|ion|ing).*")
|
2021-03-23 00:15:21 +01:00
|
|
|
|
|| alertText.contains("infringement")) {
|
2021-06-11 13:34:23 +02:00
|
|
|
|
// Possible error messages:
|
2022-03-18 15:09:06 +01:00
|
|
|
|
// "This account has been terminated for a violation of YouTube's Terms of
|
|
|
|
|
// Service."
|
|
|
|
|
// "This account has been terminated due to multiple or severe violations of
|
|
|
|
|
// YouTube's policy prohibiting hate speech."
|
|
|
|
|
// "This account has been terminated due to multiple or severe violations of
|
|
|
|
|
// YouTube's policy prohibiting content designed to harass, bully or
|
|
|
|
|
// threaten."
|
|
|
|
|
// "This account has been terminated due to multiple or severe violations
|
|
|
|
|
// of YouTube's policy against spam, deceptive practices and misleading
|
|
|
|
|
// content or other Terms of Service violations."
|
|
|
|
|
// "This account has been terminated due to multiple or severe violations of
|
|
|
|
|
// YouTube's policy on nudity or sexual content."
|
|
|
|
|
// "This account has been terminated for violating YouTube's Community
|
|
|
|
|
// Guidelines."
|
|
|
|
|
// "This account has been terminated because we received multiple
|
|
|
|
|
// third-party claims of copyright infringement regarding material that
|
|
|
|
|
// the user posted."
|
|
|
|
|
// "This account has been terminated because it is linked to an account that
|
|
|
|
|
// received multiple third-party claims of copyright infringement."
|
2024-03-20 14:49:19 +01:00
|
|
|
|
// "This channel was removed because it violated our Community Guidelines."
|
2022-03-18 15:09:06 +01:00
|
|
|
|
throw new AccountTerminatedException(alertText,
|
|
|
|
|
AccountTerminatedException.Reason.VIOLATION);
|
2021-03-22 10:35:05 +01:00
|
|
|
|
} else {
|
|
|
|
|
throw new AccountTerminatedException(alertText);
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-03-01 01:52:25 +01:00
|
|
|
|
throw new ContentNotAvailableException("Got error: \"" + alertText + "\"");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-12-20 19:54:12 +01:00
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Sometimes, YouTube provides URLs which use Google's cache. They look like
|
|
|
|
|
* {@code https://webcache.googleusercontent.com/search?q=cache:CACHED_URL}
|
2021-02-07 22:12:22 +01:00
|
|
|
|
*
|
2020-12-20 19:54:12 +01:00
|
|
|
|
* @param url the URL which might refer to the Google's webcache
|
|
|
|
|
* @return the URL which is referring to the original site
|
|
|
|
|
*/
|
|
|
|
|
public static String extractCachedUrlIfNeeded(final String url) {
|
|
|
|
|
if (url == null) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
if (url.contains("webcache.googleusercontent.com")) {
|
|
|
|
|
return url.split("cache:")[1];
|
|
|
|
|
}
|
|
|
|
|
return url;
|
|
|
|
|
}
|
2021-01-22 01:44:58 +01:00
|
|
|
|
|
|
|
|
|
public static boolean isVerified(final JsonArray badges) {
|
|
|
|
|
if (Utils.isNullOrEmpty(badges)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-18 15:09:06 +01:00
|
|
|
|
for (final Object badge : badges) {
|
2021-01-22 01:44:58 +01:00
|
|
|
|
final String style = ((JsonObject) badge).getObject("metadataBadgeRenderer")
|
|
|
|
|
.getString("style");
|
|
|
|
|
if (style != null && (style.equals("BADGE_STYLE_TYPE_VERIFIED")
|
|
|
|
|
|| style.equals("BADGE_STYLE_TYPE_VERIFIED_ARTIST"))) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2021-03-05 13:33:25 +01:00
|
|
|
|
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
|
/**
|
|
|
|
|
* Generate a content playback nonce (also called {@code cpn}), sent by YouTube clients in
|
|
|
|
|
* playback requests (and also for some clients, in the player request body).
|
|
|
|
|
*
|
|
|
|
|
* @return a content playback nonce string
|
|
|
|
|
*/
|
|
|
|
|
@Nonnull
|
|
|
|
|
public static String generateContentPlaybackNonce() {
|
2022-02-07 21:23:38 +01:00
|
|
|
|
return RandomStringFromAlphabetGenerator.generate(
|
|
|
|
|
CONTENT_PLAYBACK_NONCE_ALPHABET, 16, numberGenerator);
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Try to generate a {@code t} parameter, sent by mobile clients as a query of the player
|
|
|
|
|
* request.
|
|
|
|
|
*
|
|
|
|
|
* <p>
|
|
|
|
|
* Some researches needs to be done to know how this parameter, unique at each request, is
|
|
|
|
|
* generated.
|
|
|
|
|
* </p>
|
|
|
|
|
*
|
|
|
|
|
* @return a 12 characters string to try to reproduce the {@code} parameter
|
|
|
|
|
*/
|
|
|
|
|
@Nonnull
|
|
|
|
|
public static String generateTParameter() {
|
2022-02-07 21:23:38 +01:00
|
|
|
|
return RandomStringFromAlphabetGenerator.generate(
|
|
|
|
|
CONTENT_PLAYBACK_NONCE_ALPHABET, 12, numberGenerator);
|
[YouTube] Add the cpn param to playback requests and try to spoof better the Android client
The cpn param, aka the content playback nonce param, is a parameter sent by YouTube web client in videoplayback requests, and for some of them, in the player request body. This PR adds it everywhere.
For the desktop/WEB client, some params were missing from the playbackContext object, which seemed (or not) to make YouTube throttle streams extracted from the WEB client. This PR adds them.
Fingerprinting on the WEB client basing on the client version used is not possible anymore, because the latest client version is extracted at the first time of a YouTube request on a session which require the extractor to fetch again the website (and this may come back the reCaptcha issues again unfortunately, but it seems there is no other way to get it).
For the Android client, the video id is now also sent as a query parameter, like a 12 characters string, in the t query parameter, in order to spoof better this client. Researches need to be done on this parameter, unique to each request, and how it is generated by clients.
This commit also fixes a small bug with the Android User-Agent string.
Some code improvements have been also made.
2021-12-22 17:55:41 +01:00
|
|
|
|
}
|
2022-03-06 20:10:11 +01:00
|
|
|
|
|
|
|
|
|
/**
|
2022-03-15 11:19:13 +01:00
|
|
|
|
* Check if the streaming URL is from the YouTube {@code WEB} client.
|
2022-03-06 20:10:11 +01:00
|
|
|
|
*
|
2022-03-15 11:19:13 +01:00
|
|
|
|
* @param url the streaming URL to be checked.
|
2022-03-06 20:10:11 +01:00
|
|
|
|
* @return true if it's a {@code WEB} streaming URL, false otherwise
|
|
|
|
|
*/
|
|
|
|
|
public static boolean isWebStreamingUrl(@Nonnull final String url) {
|
|
|
|
|
return Parser.isMatch(C_WEB_PATTERN, url);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Check if the streaming URL is a URL from the YouTube {@code TVHTML5_SIMPLY_EMBEDDED_PLAYER}
|
|
|
|
|
* client.
|
|
|
|
|
*
|
|
|
|
|
* @param url the streaming URL on which check if it's a {@code TVHTML5_SIMPLY_EMBEDDED_PLAYER}
|
|
|
|
|
* streaming URL.
|
|
|
|
|
* @return true if it's a {@code TVHTML5_SIMPLY_EMBEDDED_PLAYER} streaming URL, false otherwise
|
|
|
|
|
*/
|
|
|
|
|
public static boolean isTvHtml5SimplyEmbeddedPlayerStreamingUrl(@Nonnull final String url) {
|
|
|
|
|
return Parser.isMatch(C_TVHTML5_SIMPLY_EMBEDDED_PLAYER_PATTERN, url);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Check if the streaming URL is a URL from the YouTube {@code ANDROID} client.
|
|
|
|
|
*
|
2022-03-15 11:19:13 +01:00
|
|
|
|
* @param url the streaming URL to be checked.
|
2022-03-06 20:10:11 +01:00
|
|
|
|
* @return true if it's a {@code ANDROID} streaming URL, false otherwise
|
|
|
|
|
*/
|
|
|
|
|
public static boolean isAndroidStreamingUrl(@Nonnull final String url) {
|
|
|
|
|
return Parser.isMatch(C_ANDROID_PATTERN, url);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Check if the streaming URL is a URL from the YouTube {@code IOS} client.
|
|
|
|
|
*
|
|
|
|
|
* @param url the streaming URL on which check if it's a {@code IOS} streaming URL.
|
|
|
|
|
* @return true if it's a {@code IOS} streaming URL, false otherwise
|
|
|
|
|
*/
|
|
|
|
|
public static boolean isIosStreamingUrl(@Nonnull final String url) {
|
|
|
|
|
return Parser.isMatch(C_IOS_PATTERN, url);
|
|
|
|
|
}
|
2022-07-30 16:05:52 +02:00
|
|
|
|
|
2022-08-21 18:27:31 +02:00
|
|
|
|
/**
|
2023-10-07 19:36:02 +02:00
|
|
|
|
* Determines how the consent cookie that is required for YouTube, {@code SOCS}, will be
|
|
|
|
|
* generated.
|
|
|
|
|
*
|
|
|
|
|
* <ul>
|
|
|
|
|
* <li>{@code false} (the default value) will use {@code CAE=};</li>
|
|
|
|
|
* <li>{@code true} will use {@code CAISAiAD}.</li>
|
|
|
|
|
* </ul>
|
|
|
|
|
*
|
|
|
|
|
* <p>
|
|
|
|
|
* Setting this value to {@code true} is needed to extract mixes and some YouTube Music
|
|
|
|
|
* playlists in some countries such as the EU ones.
|
|
|
|
|
* </p>
|
2022-08-21 18:27:31 +02:00
|
|
|
|
*/
|
2022-07-30 16:05:52 +02:00
|
|
|
|
public static void setConsentAccepted(final boolean accepted) {
|
|
|
|
|
consentAccepted = accepted;
|
|
|
|
|
}
|
|
|
|
|
|
2022-08-21 18:27:31 +02:00
|
|
|
|
/**
|
2023-10-07 19:36:02 +02:00
|
|
|
|
* Get the value of the consent's acceptance.
|
|
|
|
|
*
|
|
|
|
|
* @see #setConsentAccepted(boolean)
|
|
|
|
|
* @return the consent's acceptance value
|
2022-08-21 18:27:31 +02:00
|
|
|
|
*/
|
2022-07-30 16:05:52 +02:00
|
|
|
|
public static boolean isConsentAccepted() {
|
|
|
|
|
return consentAccepted;
|
|
|
|
|
}
|
2023-03-28 00:02:20 +02:00
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Extract the audio track type from a YouTube stream URL.
|
|
|
|
|
* <p>
|
|
|
|
|
* The track type is parsed from the {@code xtags} URL parameter
|
|
|
|
|
* (Example: {@code acont=original:lang=en}).
|
|
|
|
|
* </p>
|
|
|
|
|
* @param streamUrl YouTube stream URL
|
|
|
|
|
* @return {@link AudioTrackType} or {@code null} if no track type was found
|
|
|
|
|
*/
|
|
|
|
|
@Nullable
|
|
|
|
|
public static AudioTrackType extractAudioTrackType(final String streamUrl) {
|
|
|
|
|
final String xtags;
|
|
|
|
|
try {
|
|
|
|
|
xtags = Utils.getQueryValue(new URL(streamUrl), "xtags");
|
|
|
|
|
} catch (final MalformedURLException e) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
if (xtags == null) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
String atype = null;
|
|
|
|
|
for (final String param : xtags.split(":")) {
|
|
|
|
|
final String[] kv = param.split("=", 2);
|
|
|
|
|
if (kv.length > 1 && kv[0].equals("acont")) {
|
|
|
|
|
atype = kv[1];
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (atype == null) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch (atype) {
|
|
|
|
|
case "original":
|
|
|
|
|
return AudioTrackType.ORIGINAL;
|
|
|
|
|
case "dubbed":
|
|
|
|
|
return AudioTrackType.DUBBED;
|
|
|
|
|
case "descriptive":
|
|
|
|
|
return AudioTrackType.DESCRIPTIVE;
|
|
|
|
|
default:
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-03-01 18:47:52 +01:00
|
|
|
|
}
|