NewPipeExtractor/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java

263 lines
12 KiB
Java
Raw Normal View History

package org.schabi.newpipe.extractor.services.youtube.extractors;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.DISABLE_PRETTY_PRINT_PARAMETER;
2022-03-18 15:09:06 +01:00
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.YOUTUBEI_V1_URL;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeSearchQueryHandlerFactory.ALL;
import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeSearchQueryHandlerFactory.CHANNELS;
import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeSearchQueryHandlerFactory.PLAYLISTS;
import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeSearchQueryHandlerFactory.VIDEOS;
2022-03-18 15:09:06 +01:00
import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeSearchQueryHandlerFactory.getSearchParameter;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonBuilder;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonWriter;
import org.schabi.newpipe.extractor.InfoItem;
import org.schabi.newpipe.extractor.MetaInfo;
2022-07-28 04:19:21 +02:00
import org.schabi.newpipe.extractor.MultiInfoItemsCollector;
2020-04-15 14:09:46 +02:00
import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.downloader.Downloader;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler;
import org.schabi.newpipe.extractor.localization.Localization;
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.search.SearchExtractor;
import org.schabi.newpipe.extractor.services.youtube.YoutubeMetaInfoHelper;
import org.schabi.newpipe.extractor.utils.JsonUtils;
2021-02-07 22:12:22 +01:00
import java.io.IOException;
2022-07-28 04:19:21 +02:00
import java.nio.charset.StandardCharsets;
2021-02-07 22:12:22 +01:00
import java.util.List;
import java.util.Objects;
2022-03-18 15:09:06 +01:00
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
2020-02-27 17:39:23 +01:00
2018-07-01 16:21:40 +02:00
/*
* Created by Christian Schabesberger on 22.07.2018
*
* Copyright (C) 2018 Christian Schabesberger <chris.schabesberger@mailbox.org>
* YoutubeSearchExtractor.java is part of NewPipe Extractor.
2018-07-01 16:21:40 +02:00
*
* NewPipe Extractor is free software: you can redistribute it and/or modify
2018-07-01 16:21:40 +02:00
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe Extractor is distributed in the hope that it will be useful,
2018-07-01 16:21:40 +02:00
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe Extractor. If not, see <http://www.gnu.org/licenses/>.
2018-07-01 16:21:40 +02:00
*/
public class YoutubeSearchExtractor extends SearchExtractor {
@Nullable
private final String searchType;
private final boolean extractVideoResults;
private final boolean extractChannelResults;
private final boolean extractPlaylistResults;
private JsonObject initialData;
public YoutubeSearchExtractor(final StreamingService service,
final SearchQueryHandler linkHandler) {
super(service, linkHandler);
final List<String> contentFilters = linkHandler.getContentFilters();
searchType = isNullOrEmpty(contentFilters) ? null : contentFilters.get(0);
// Save whether we should extract video, channel and playlist results depending on the
// requested search type, as YouTube returns sometimes videos inside channel search results
// If no search type is provided or ALL filter is requested, extract everything
extractVideoResults = searchType == null || ALL.equals(searchType)
|| VIDEOS.equals(searchType);
extractChannelResults = searchType == null || ALL.equals(searchType)
|| CHANNELS.equals(searchType);
extractPlaylistResults = searchType == null || ALL.equals(searchType)
|| PLAYLISTS.equals(searchType);
}
@Override
public void onFetchPage(@Nonnull final Downloader downloader) throws IOException,
ExtractionException {
final String query = super.getSearchString();
final Localization localization = getExtractorLocalization();
final String params = getSearchParameter(searchType);
2020-02-26 09:01:57 +01:00
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`: - the desktop API is fetched. If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API. - if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent. Otherwise, the next endpoint will be fetched normally, if the content is available. If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made. We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used. If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used. Other code changes: - `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder` - `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder` - two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder` - `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper` - a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor` - `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>` - the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page - some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
final JsonBuilder<JsonObject> jsonBody = prepareDesktopJsonBuilder(localization,
2021-06-24 18:39:16 +02:00
getExtractorContentCountry())
.value("query", query);
if (!isNullOrEmpty(params)) {
2021-06-24 18:39:16 +02:00
jsonBody.value("params", params);
}
2022-07-28 04:19:21 +02:00
final byte[] body = JsonWriter.string(jsonBody.done()).getBytes(StandardCharsets.UTF_8);
2021-06-24 18:39:16 +02:00
initialData = getJsonPostResponse("search", body, localization);
}
@Nonnull
2018-10-25 15:46:47 +02:00
@Override
public String getUrl() throws ParsingException {
return super.getUrl() + "&gl=" + getExtractorContentCountry().getCountryCode();
2018-10-25 15:46:47 +02:00
}
@Nonnull
@Override
public String getSearchSuggestion() throws ParsingException {
final JsonObject itemSectionRenderer = initialData.getObject("contents")
.getObject("twoColumnSearchResultsRenderer")
.getObject("primaryContents")
.getObject("sectionListRenderer")
.getArray("contents")
.getObject(0)
.getObject("itemSectionRenderer");
final JsonObject didYouMeanRenderer = itemSectionRenderer.getArray("contents")
.getObject(0)
.getObject("didYouMeanRenderer");
if (!didYouMeanRenderer.isEmpty()) {
return JsonUtils.getString(didYouMeanRenderer,
"correctedQueryEndpoint.searchEndpoint.query");
}
return Objects.requireNonNullElse(
getTextFromObject(itemSectionRenderer.getArray("contents")
.getObject(0)
.getObject("showingResultsForRenderer")
.getObject("correctedQuery")), "");
}
@Override
public boolean isCorrectedSearch() {
final JsonObject showingResultsForRenderer = initialData.getObject("contents")
.getObject("twoColumnSearchResultsRenderer").getObject("primaryContents")
.getObject("sectionListRenderer").getArray("contents").getObject(0)
.getObject("itemSectionRenderer").getArray("contents").getObject(0)
.getObject("showingResultsForRenderer");
return !showingResultsForRenderer.isEmpty();
}
@Nonnull
@Override
public List<MetaInfo> getMetaInfo() throws ParsingException {
return YoutubeMetaInfoHelper.getMetaInfo(
initialData.getObject("contents")
.getObject("twoColumnSearchResultsRenderer")
.getObject("primaryContents")
.getObject("sectionListRenderer")
.getArray("contents"));
}
@Nonnull
@Override
2020-07-26 12:00:56 +02:00
public InfoItemsPage<InfoItem> getInitialPage() throws IOException, ExtractionException {
final MultiInfoItemsCollector collector = new MultiInfoItemsCollector(getServiceId());
final JsonArray sections = initialData.getObject("contents")
.getObject("twoColumnSearchResultsRenderer")
.getObject("primaryContents")
.getObject("sectionListRenderer")
.getArray("contents");
2020-03-17 11:33:39 +01:00
2020-04-15 14:09:46 +02:00
Page nextPage = null;
for (final Object section : sections) {
final JsonObject sectionJsonObject = (JsonObject) section;
if (sectionJsonObject.has("itemSectionRenderer")) {
final JsonObject itemSectionRenderer =
sectionJsonObject.getObject("itemSectionRenderer");
collectStreamsFrom(collector, itemSectionRenderer.getArray("contents"));
} else if (sectionJsonObject.has("continuationItemRenderer")) {
nextPage = getNextPageFrom(
sectionJsonObject.getObject("continuationItemRenderer"));
}
}
2020-04-15 14:09:46 +02:00
return new InfoItemsPage<>(collector, nextPage);
}
@Override
public InfoItemsPage<InfoItem> getPage(final Page page) throws IOException,
ExtractionException {
if (page == null || isNullOrEmpty(page.getUrl())) {
throw new IllegalArgumentException("Page doesn't contain an URL");
}
final Localization localization = getExtractorLocalization();
final MultiInfoItemsCollector collector = new MultiInfoItemsCollector(getServiceId());
2020-02-26 09:01:57 +01:00
// @formatter:off
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`: - the desktop API is fetched. If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API. - if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent. Otherwise, the next endpoint will be fetched normally, if the content is available. If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made. We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used. If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used. Other code changes: - `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder` - `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder` - two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder` - `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper` - a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor` - `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>` - the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page - some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
final byte[] json = JsonWriter.string(prepareDesktopJsonBuilder(localization,
getExtractorContentCountry())
.value("continuation", page.getId())
.done())
2022-07-28 04:19:21 +02:00
.getBytes(StandardCharsets.UTF_8);
// @formatter:on
final JsonObject ajaxJson = getJsonPostResponse("search", json, localization);
2020-03-17 11:33:39 +01:00
final JsonArray continuationItems = ajaxJson.getArray("onResponseReceivedCommands")
.getObject(0)
.getObject("appendContinuationItemsAction")
.getArray("continuationItems");
2020-03-17 11:33:39 +01:00
final JsonArray contents = continuationItems.getObject(0)
.getObject("itemSectionRenderer")
.getArray("contents");
collectStreamsFrom(collector, contents);
return new InfoItemsPage<>(collector, getNextPageFrom(continuationItems.getObject(1)
.getObject("continuationItemRenderer")));
}
private void collectStreamsFrom(final MultiInfoItemsCollector collector,
@Nonnull final JsonArray contents)
throws NothingFoundException {
final TimeAgoParser timeAgoParser = getTimeAgoParser();
2021-05-29 14:43:50 +02:00
for (final Object content : contents) {
final JsonObject item = (JsonObject) content;
if (item.has("backgroundPromoRenderer")) {
throw new NothingFoundException(
getTextFromObject(item.getObject("backgroundPromoRenderer")
.getObject("bodyText")));
} else if (extractVideoResults && item.has("videoRenderer")) {
collector.commit(new YoutubeStreamInfoItemExtractor(
item.getObject("videoRenderer"), timeAgoParser));
} else if (extractChannelResults && item.has("channelRenderer")) {
collector.commit(new YoutubeChannelInfoItemExtractor(
item.getObject("channelRenderer")));
} else if (extractPlaylistResults && item.has("playlistRenderer")) {
collector.commit(new YoutubePlaylistInfoItemExtractor(
item.getObject("playlistRenderer")));
}
}
}
@Nullable
private Page getNextPageFrom(final JsonObject continuationItemRenderer) {
if (isNullOrEmpty(continuationItemRenderer)) {
return null;
}
final String token = continuationItemRenderer.getObject("continuationEndpoint")
.getObject("continuationCommand")
.getString("token");
final String url = YOUTUBEI_V1_URL + "search?" + DISABLE_PRETTY_PRINT_PARAMETER;
return new Page(url, token);
}
}