NewPipeExtractor/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java

248 lines
11 KiB
Java
Raw Normal View History

package org.schabi.newpipe.extractor.services.youtube.extractors;
2022-03-18 15:09:06 +01:00
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.YOUTUBEI_V1_URL;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getKey;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getValidJsonResponseBody;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeSearchQueryHandlerFactory.getSearchParameter;
import static org.schabi.newpipe.extractor.utils.Utils.UTF_8;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonBuilder;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;
import com.grack.nanojson.JsonWriter;
import org.schabi.newpipe.extractor.InfoItem;
import org.schabi.newpipe.extractor.MetaInfo;
2020-04-15 14:09:46 +02:00
import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.downloader.Downloader;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler;
import org.schabi.newpipe.extractor.localization.Localization;
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.MultiInfoItemsCollector;
import org.schabi.newpipe.extractor.search.SearchExtractor;
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.utils.JsonUtils;
2021-02-07 22:12:22 +01:00
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
2022-03-18 15:09:06 +01:00
import javax.annotation.Nonnull;
2020-02-27 17:39:23 +01:00
2018-07-01 16:21:40 +02:00
/*
* Created by Christian Schabesberger on 22.07.2018
*
* Copyright (C) Christian Schabesberger 2018 <chris.schabesberger@mailbox.org>
* YoutubeSearchExtractor.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
public class YoutubeSearchExtractor extends SearchExtractor {
private JsonObject initialData;
public YoutubeSearchExtractor(final StreamingService service,
final SearchQueryHandler linkHandler) {
super(service, linkHandler);
}
@Override
public void onFetchPage(@Nonnull final Downloader downloader) throws IOException,
ExtractionException {
final String query = super.getSearchString();
final Localization localization = getExtractorLocalization();
2020-02-26 09:01:57 +01:00
// Get the search parameter of the request
final List<String> contentFilters = super.getLinkHandler().getContentFilters();
final String params;
if (!isNullOrEmpty(contentFilters)) {
final String searchType = contentFilters.get(0);
params = getSearchParameter(searchType);
} else {
params = "";
}
2020-02-26 09:01:57 +01:00
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`: - the desktop API is fetched. If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API. - if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent. Otherwise, the next endpoint will be fetched normally, if the content is available. If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made. We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used. If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used. Other code changes: - `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder` - `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder` - two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder` - `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper` - a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor` - `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>` - the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page - some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
final JsonBuilder<JsonObject> jsonBody = prepareDesktopJsonBuilder(localization,
2021-06-24 18:39:16 +02:00
getExtractorContentCountry())
.value("query", query);
if (!isNullOrEmpty(params)) {
2021-06-24 18:39:16 +02:00
jsonBody.value("params", params);
}
2021-06-24 18:39:16 +02:00
final byte[] body = JsonWriter.string(jsonBody.done()).getBytes(UTF_8);
initialData = getJsonPostResponse("search", body, localization);
}
@Nonnull
2018-10-25 15:46:47 +02:00
@Override
public String getUrl() throws ParsingException {
return super.getUrl() + "&gl=" + getExtractorContentCountry().getCountryCode();
2018-10-25 15:46:47 +02:00
}
@Nonnull
@Override
public String getSearchSuggestion() throws ParsingException {
final JsonObject itemSectionRenderer = initialData.getObject("contents")
.getObject("twoColumnSearchResultsRenderer").getObject("primaryContents")
.getObject("sectionListRenderer").getArray("contents").getObject(0)
.getObject("itemSectionRenderer");
final JsonObject didYouMeanRenderer = itemSectionRenderer.getArray("contents").getObject(0)
.getObject("didYouMeanRenderer");
final JsonObject showingResultsForRenderer = itemSectionRenderer.getArray("contents")
.getObject(0)
.getObject("showingResultsForRenderer");
if (!didYouMeanRenderer.isEmpty()) {
return JsonUtils.getString(didYouMeanRenderer,
"correctedQueryEndpoint.searchEndpoint.query");
} else if (showingResultsForRenderer != null) {
return getTextFromObject(showingResultsForRenderer.getObject("correctedQuery"));
} else {
return "";
}
}
@Override
public boolean isCorrectedSearch() {
final JsonObject showingResultsForRenderer = initialData.getObject("contents")
.getObject("twoColumnSearchResultsRenderer").getObject("primaryContents")
.getObject("sectionListRenderer").getArray("contents").getObject(0)
.getObject("itemSectionRenderer").getArray("contents").getObject(0)
.getObject("showingResultsForRenderer");
return !showingResultsForRenderer.isEmpty();
}
@Nonnull
@Override
public List<MetaInfo> getMetaInfo() throws ParsingException {
return YoutubeParsingHelper.getMetaInfo(
initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer")
.getObject("primaryContents").getObject("sectionListRenderer")
.getArray("contents"));
}
@Nonnull
@Override
2020-07-26 12:00:56 +02:00
public InfoItemsPage<InfoItem> getInitialPage() throws IOException, ExtractionException {
final MultiInfoItemsCollector collector = new MultiInfoItemsCollector(getServiceId());
final JsonArray sections = initialData.getObject("contents")
.getObject("twoColumnSearchResultsRenderer").getObject("primaryContents")
.getObject("sectionListRenderer").getArray("contents");
2020-03-17 11:33:39 +01:00
2020-04-15 14:09:46 +02:00
Page nextPage = null;
for (final Object section : sections) {
if (((JsonObject) section).has("itemSectionRenderer")) {
final JsonObject itemSectionRenderer = ((JsonObject) section)
.getObject("itemSectionRenderer");
collectStreamsFrom(collector, itemSectionRenderer.getArray("contents"));
} else if (((JsonObject) section).has("continuationItemRenderer")) {
nextPage = getNextPageFrom(((JsonObject) section)
.getObject("continuationItemRenderer"));
}
}
2020-04-15 14:09:46 +02:00
return new InfoItemsPage<>(collector, nextPage);
}
@Override
public InfoItemsPage<InfoItem> getPage(final Page page) throws IOException,
ExtractionException {
if (page == null || isNullOrEmpty(page.getUrl())) {
throw new IllegalArgumentException("Page doesn't contain an URL");
}
final Localization localization = getExtractorLocalization();
final MultiInfoItemsCollector collector = new MultiInfoItemsCollector(getServiceId());
2020-02-26 09:01:57 +01:00
// @formatter:off
Rebase + some code improvements + fix extraction of age-restricted videos + update clients version Here is now the requests which will be made by the `onFetchPage` method of `YoutubeStreamExtractor`: - the desktop API is fetched. If there is no streaming data, the desktop player API with the embed client screen will be fetched (and also the player code), then the Android mobile API. - if there is no streaming data, a `ContentNotAvailableException` will be thrown by using the message provided in playability status If the video is age restricted, a request to the next endpoint of the desktop player with the embed client screen will be sent. Otherwise, the next endpoint will be fetched normally, if the content is available. If the video is not age-restricted, a request to the player endpoint of the Android mobile API will be made. We can get more streams by using the Android mobile API but some streams may be not available on this API, so the streaming data of the Android mobile API will be first used to get itags and then the streaming data of the desktop internal API will be used. If the parsing of the Android mobile API went wrong, only the streams of the desktop API will be used. Other code changes: - `prepareJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareDesktopJsonBuilder` - `prepareMobileJsonBuilder` in `YoutubeParsingHelper` was renamed to `prepareAndroidMobileJsonBuilder` - two new methods in `YoutubeParsingHelper` were added: `prepareDesktopEmbedVideoJsonBuilder` and `prepareAndroidMobileEmbedVideoJsonBuilder` - `createPlayerBodyWithSts` is now public and was moved to `YoutubeParsingHelper` - a new method in `YoutubeJavaScriptExtractor` was added: `resetJavaScriptCode`, which was needed for the method `resetDebofuscationCode` of `YoutubeStreamExtractor` - `areHardcodedClientVersionAndKeyValid` in `YoutubeParsingHelper` returns now a `boolean` instead of an `Optional<Boolean>` - the `fetchVideoInfoPage` method of `YoutubeStreamExtractor` was removed because YouTube returns now 404 for every client with the `get_video_info` page - some unused objects and some warnings in `YoutubeStreamExtractor` were removed and fixed Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
2021-07-28 23:55:09 +02:00
final byte[] json = JsonWriter.string(prepareDesktopJsonBuilder(localization,
getExtractorContentCountry())
.value("continuation", page.getId())
.done())
.getBytes(UTF_8);
// @formatter:on
final String responseBody = getValidJsonResponseBody(getDownloader().post(
page.getUrl(), new HashMap<>(), json));
final JsonObject ajaxJson;
try {
ajaxJson = JsonParser.object().from(responseBody);
2022-03-18 15:09:06 +01:00
} catch (final JsonParserException e) {
throw new ParsingException("Could not parse JSON", e);
}
2020-03-17 11:33:39 +01:00
final JsonArray continuationItems = ajaxJson.getArray("onResponseReceivedCommands")
.getObject(0).getObject("appendContinuationItemsAction")
.getArray("continuationItems");
2020-03-17 11:33:39 +01:00
final JsonArray contents = continuationItems.getObject(0)
.getObject("itemSectionRenderer").getArray("contents");
collectStreamsFrom(collector, contents);
return new InfoItemsPage<>(collector, getNextPageFrom(continuationItems.getObject(1)
.getObject("continuationItemRenderer")));
}
private void collectStreamsFrom(final MultiInfoItemsCollector collector,
final JsonArray contents) throws NothingFoundException,
ParsingException {
final TimeAgoParser timeAgoParser = getTimeAgoParser();
2021-05-29 14:43:50 +02:00
for (final Object content : contents) {
final JsonObject item = (JsonObject) content;
if (item.has("backgroundPromoRenderer")) {
throw new NothingFoundException(getTextFromObject(
item.getObject("backgroundPromoRenderer").getObject("bodyText")));
} else if (item.has("videoRenderer")) {
collector.commit(new YoutubeStreamInfoItemExtractor(item
.getObject("videoRenderer"), timeAgoParser));
} else if (item.has("channelRenderer")) {
collector.commit(new YoutubeChannelInfoItemExtractor(item
.getObject("channelRenderer")));
} else if (item.has("playlistRenderer")) {
collector.commit(new YoutubePlaylistInfoItemExtractor(item
.getObject("playlistRenderer")));
}
}
}
private Page getNextPageFrom(final JsonObject continuationItemRenderer) throws IOException,
ExtractionException {
if (isNullOrEmpty(continuationItemRenderer)) {
return null;
}
final String token = continuationItemRenderer.getObject("continuationEndpoint")
.getObject("continuationCommand").getString("token");
final String url = YOUTUBEI_V1_URL + "search?key=" + getKey();
return new Page(url, token);
}
}