2018-05-26 19:15:45 +02:00
|
|
|
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
|
|
|
|
2020-02-22 20:19:41 +01:00
|
|
|
import com.grack.nanojson.JsonArray;
|
|
|
|
import com.grack.nanojson.JsonObject;
|
|
|
|
import com.grack.nanojson.JsonParser;
|
|
|
|
import com.grack.nanojson.JsonParserException;
|
|
|
|
|
2018-05-26 19:15:45 +02:00
|
|
|
import org.jsoup.nodes.Document;
|
|
|
|
import org.schabi.newpipe.extractor.InfoItem;
|
|
|
|
import org.schabi.newpipe.extractor.StreamingService;
|
2019-04-28 22:03:16 +02:00
|
|
|
import org.schabi.newpipe.extractor.downloader.Downloader;
|
|
|
|
import org.schabi.newpipe.extractor.downloader.Response;
|
2018-05-26 19:15:45 +02:00
|
|
|
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
|
|
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
2019-10-02 07:02:01 +02:00
|
|
|
import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler;
|
2019-04-28 22:03:16 +02:00
|
|
|
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
|
2018-05-26 19:15:45 +02:00
|
|
|
import org.schabi.newpipe.extractor.search.InfoItemsSearchCollector;
|
|
|
|
import org.schabi.newpipe.extractor.search.SearchExtractor;
|
2019-10-29 06:00:29 +01:00
|
|
|
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
2018-05-26 19:15:45 +02:00
|
|
|
|
|
|
|
import java.io.IOException;
|
2020-02-24 18:24:36 +01:00
|
|
|
import java.util.Collections;
|
|
|
|
import java.util.HashMap;
|
|
|
|
import java.util.List;
|
|
|
|
import java.util.Map;
|
2018-05-26 19:15:45 +02:00
|
|
|
|
2020-02-22 20:19:41 +01:00
|
|
|
import javax.annotation.Nonnull;
|
|
|
|
|
2018-07-01 16:21:40 +02:00
|
|
|
/*
|
|
|
|
* Created by Christian Schabesberger on 22.07.2018
|
|
|
|
*
|
|
|
|
* Copyright (C) Christian Schabesberger 2018 <chris.schabesberger@mailbox.org>
|
|
|
|
* YoutubeSearchExtractor.java is part of NewPipe.
|
|
|
|
*
|
|
|
|
* NewPipe is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* NewPipe is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
2018-05-26 19:15:45 +02:00
|
|
|
public class YoutubeSearchExtractor extends SearchExtractor {
|
|
|
|
|
|
|
|
private Document doc;
|
2020-02-22 23:51:02 +01:00
|
|
|
private JsonObject initialData;
|
2018-05-26 19:15:45 +02:00
|
|
|
|
2019-04-28 22:03:16 +02:00
|
|
|
public YoutubeSearchExtractor(StreamingService service, SearchQueryHandler linkHandler) {
|
|
|
|
super(service, linkHandler);
|
2018-05-26 19:15:45 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException {
|
2018-10-25 15:46:47 +02:00
|
|
|
final String url = getUrl();
|
2019-04-28 22:03:16 +02:00
|
|
|
final Response response = downloader.get(url, getExtractorLocalization());
|
2019-10-29 06:00:29 +01:00
|
|
|
doc = YoutubeParsingHelper.parseAndCheckPage(url, response);
|
2020-02-22 23:51:02 +01:00
|
|
|
initialData = YoutubeParsingHelper.getInitialData(response.responseBody());
|
2018-05-26 19:15:45 +02:00
|
|
|
}
|
|
|
|
|
2019-04-28 22:03:16 +02:00
|
|
|
@Nonnull
|
2018-10-25 15:46:47 +02:00
|
|
|
@Override
|
|
|
|
public String getUrl() throws ParsingException {
|
2019-04-28 22:03:16 +02:00
|
|
|
return super.getUrl() + "&gl=" + getExtractorContentCountry().getCountryCode();
|
2018-10-25 15:46:47 +02:00
|
|
|
}
|
|
|
|
|
2018-05-26 19:15:45 +02:00
|
|
|
@Override
|
2018-10-06 12:22:37 +02:00
|
|
|
public String getSearchSuggestion() {
|
2020-02-24 18:24:36 +01:00
|
|
|
JsonObject showingResultsForRenderer = initialData.getObject("contents")
|
|
|
|
.getObject("twoColumnSearchResultsRenderer").getObject("primaryContents")
|
|
|
|
.getObject("sectionListRenderer").getArray("contents").getObject(0)
|
|
|
|
.getObject("itemSectionRenderer").getArray("contents").getObject(0)
|
|
|
|
.getObject("showingResultsForRenderer");
|
|
|
|
if (showingResultsForRenderer == null) {
|
2018-05-26 19:15:45 +02:00
|
|
|
return "";
|
2020-02-24 18:24:36 +01:00
|
|
|
} else {
|
|
|
|
return showingResultsForRenderer.getObject("correctedQuery").getArray("runs")
|
|
|
|
.getObject(0).getString("text");
|
2018-05-26 19:15:45 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
@Nonnull
|
|
|
|
@Override
|
2018-10-06 12:22:37 +02:00
|
|
|
public InfoItemsPage<InfoItem> getInitialPage() throws ExtractionException {
|
2020-02-24 18:24:36 +01:00
|
|
|
InfoItemsSearchCollector collector = getInfoItemSearchCollector();
|
|
|
|
JsonArray videos = initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer")
|
|
|
|
.getObject("primaryContents").getObject("sectionListRenderer").getArray("contents")
|
|
|
|
.getObject(0).getObject("itemSectionRenderer").getArray("contents");
|
|
|
|
|
|
|
|
collectStreamsFrom(collector, videos);
|
|
|
|
return new InfoItemsPage<>(collector, getNextPageUrl());
|
2018-05-26 19:15:45 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2018-05-27 19:57:52 +02:00
|
|
|
public String getNextPageUrl() throws ExtractionException {
|
2020-02-24 18:24:36 +01:00
|
|
|
return getNextPageUrlFrom(initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer")
|
|
|
|
.getObject("primaryContents").getObject("sectionListRenderer").getArray("contents")
|
|
|
|
.getObject(0).getObject("itemSectionRenderer").getArray("continuations"));
|
2018-05-26 19:15:45 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public InfoItemsPage<InfoItem> getPage(String pageUrl) throws IOException, ExtractionException {
|
2020-02-24 18:24:36 +01:00
|
|
|
if (pageUrl == null || pageUrl.isEmpty()) {
|
|
|
|
throw new ExtractionException(new IllegalArgumentException("Page url is empty or null"));
|
|
|
|
}
|
2018-05-26 19:15:45 +02:00
|
|
|
|
2020-02-24 18:24:36 +01:00
|
|
|
InfoItemsSearchCollector collector = getInfoItemSearchCollector();
|
|
|
|
JsonArray ajaxJson;
|
2020-02-24 20:02:45 +01:00
|
|
|
|
|
|
|
Map<String, List<String>> headers = new HashMap<>();
|
|
|
|
headers.put("X-YouTube-Client-Name", Collections.singletonList("1"));
|
|
|
|
|
2020-02-24 18:24:36 +01:00
|
|
|
try {
|
2020-02-24 20:02:45 +01:00
|
|
|
// Use the hardcoded client version first to get JSON with a structure we know
|
|
|
|
headers.put("X-YouTube-Client-Version",
|
|
|
|
Collections.singletonList(YoutubeParsingHelper.HARDCODED_CLIENT_VERSION));
|
2020-02-24 18:24:36 +01:00
|
|
|
final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
|
2020-02-24 20:02:45 +01:00
|
|
|
if (response.length() < 50) { // ensure to have a valid response
|
|
|
|
throw new ParsingException("Could not parse json data for next streams");
|
|
|
|
}
|
2020-02-24 18:24:36 +01:00
|
|
|
ajaxJson = JsonParser.array().from(response);
|
2020-02-24 20:02:45 +01:00
|
|
|
} catch (Exception e) {
|
|
|
|
try {
|
|
|
|
headers.put("X-YouTube-Client-Version",
|
|
|
|
Collections.singletonList(YoutubeParsingHelper.getClientVersion(initialData, doc.toString())));
|
|
|
|
final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
|
|
|
|
if (response.length() < 50) { // ensure to have a valid response
|
|
|
|
throw new ParsingException("Could not parse json data for next streams");
|
|
|
|
}
|
|
|
|
ajaxJson = JsonParser.array().from(response);
|
|
|
|
} catch (JsonParserException ignored) {
|
|
|
|
throw new ParsingException("Could not parse json data for next streams", e);
|
|
|
|
}
|
2020-02-24 18:24:36 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
JsonObject itemSectionRenderer = ajaxJson.getObject(1).getObject("response")
|
|
|
|
.getObject("continuationContents").getObject("itemSectionContinuation");
|
2018-05-26 19:15:45 +02:00
|
|
|
|
2020-02-24 18:24:36 +01:00
|
|
|
collectStreamsFrom(collector, itemSectionRenderer.getArray("contents"));
|
2018-05-26 19:15:45 +02:00
|
|
|
|
2020-02-24 18:24:36 +01:00
|
|
|
return new InfoItemsPage<>(collector, getNextPageUrlFrom(itemSectionRenderer.getArray("continuations")));
|
2018-05-26 19:15:45 +02:00
|
|
|
}
|
|
|
|
|
2020-02-25 09:07:22 +01:00
|
|
|
private void collectStreamsFrom(InfoItemsSearchCollector collector, JsonArray videos) throws NothingFoundException {
|
2019-11-21 23:03:14 +01:00
|
|
|
collector.reset();
|
2018-05-26 19:15:45 +02:00
|
|
|
|
2019-04-28 22:03:16 +02:00
|
|
|
final TimeAgoParser timeAgoParser = getTimeAgoParser();
|
2018-05-26 19:15:45 +02:00
|
|
|
|
2020-02-24 18:24:36 +01:00
|
|
|
for (Object item : videos) {
|
2020-02-22 20:19:41 +01:00
|
|
|
if (((JsonObject) item).getObject("backgroundPromoRenderer") != null) {
|
|
|
|
throw new NothingFoundException(((JsonObject) item).getObject("backgroundPromoRenderer")
|
|
|
|
.getObject("bodyText").getArray("runs").getObject(0).getString("text"));
|
|
|
|
} else if (((JsonObject) item).getObject("videoRenderer") != null) {
|
|
|
|
collector.commit(new YoutubeStreamInfoItemExtractor(((JsonObject) item).getObject("videoRenderer"), timeAgoParser));
|
|
|
|
} else if (((JsonObject) item).getObject("channelRenderer") != null) {
|
2020-02-23 18:27:28 +01:00
|
|
|
collector.commit(new YoutubeChannelInfoItemExtractor(((JsonObject) item).getObject("channelRenderer")));
|
2020-02-22 20:19:41 +01:00
|
|
|
} else if (((JsonObject) item).getObject("playlistRenderer") != null) {
|
2020-02-23 19:45:45 +01:00
|
|
|
collector.commit(new YoutubePlaylistInfoItemExtractor(((JsonObject) item).getObject("playlistRenderer")));
|
2018-05-26 19:15:45 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-02-22 20:19:41 +01:00
|
|
|
|
2020-02-24 18:24:36 +01:00
|
|
|
private String getNextPageUrlFrom(JsonArray continuations) throws ParsingException {
|
|
|
|
if (continuations == null) {
|
|
|
|
return "";
|
|
|
|
}
|
|
|
|
|
|
|
|
JsonObject nextContinuationData = continuations.getObject(0).getObject("nextContinuationData");
|
|
|
|
String continuation = nextContinuationData.getString("continuation");
|
|
|
|
String clickTrackingParams = nextContinuationData.getString("clickTrackingParams");
|
|
|
|
return getUrl() + "&pbj=1&ctoken=" + continuation + "&continuation=" + continuation
|
|
|
|
+ "&itct=" + clickTrackingParams;
|
|
|
|
}
|
2018-05-26 19:15:45 +02:00
|
|
|
}
|