NewPipeExtractor/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubePlaylistExtractor.java

288 lines
11 KiB
Java

package org.schabi.newpipe.extractor.services.youtube;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.schabi.newpipe.extractor.Downloader;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.UrlIdHandler;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.playlist.PlaylistExtractor;
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
import org.schabi.newpipe.extractor.stream.StreamType;
import org.schabi.newpipe.extractor.utils.Utils;
import javax.annotation.Nonnull;
import java.io.IOException;
@SuppressWarnings("WeakerAccess")
public class YoutubePlaylistExtractor extends PlaylistExtractor {
private Document doc;
public YoutubePlaylistExtractor(StreamingService service, String url) {
super(service, url);
}
@Override
public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException {
String pageContent = downloader.download(getCleanUrl());
doc = Jsoup.parse(pageContent, getCleanUrl());
}
@Override
public String getNextPageUrl() throws ExtractionException {
return getNextPageUrlFrom(doc);
}
@Nonnull
@Override
public String getId() throws ParsingException {
try {
return getUrlIdHandler().getId(getCleanUrl());
} catch (Exception e) {
throw new ParsingException("Could not get playlist id");
}
}
@Nonnull
@Override
public String getName() throws ParsingException {
try {
return doc.select("div[id=pl-header] h1[class=pl-header-title]").first().text();
} catch (Exception e) {
throw new ParsingException("Could not get playlist name");
}
}
@Override
public String getThumbnailUrl() throws ParsingException {
try {
return doc.select("div[id=pl-header] div[class=pl-header-thumb] img").first().attr("abs:src");
} catch (Exception e) {
throw new ParsingException("Could not get playlist thumbnail");
}
}
@Override
public String getBannerUrl() {
return ""; // Banner can't be handled by frontend right now.
// Whoever is willing to implement this should also implement this in the fornt end
}
@Override
public String getUploaderUrl() throws ParsingException {
try {
return doc.select("ul[class=\"pl-header-details\"] li").first().select("a").first().attr("abs:href");
} catch (Exception e) {
throw new ParsingException("Could not get playlist uploader name");
}
}
@Override
public String getUploaderName() throws ParsingException {
try {
return doc.select("span[class=\"qualified-channel-title-text\"]").first().select("a").first().text();
} catch (Exception e) {
throw new ParsingException("Could not get playlist uploader name");
}
}
@Override
public String getUploaderAvatarUrl() throws ParsingException {
try {
return doc.select("div[id=gh-banner] img[class=channel-header-profile-image]").first().attr("abs:src");
} catch (Exception e) {
throw new ParsingException("Could not get playlist uploader avatar");
}
}
@Override
public long getStreamCount() throws ParsingException {
String input;
try {
input = doc.select("ul[class=\"pl-header-details\"] li").get(1).text();
} catch (IndexOutOfBoundsException e) {
throw new ParsingException("Could not get video count from playlist", e);
}
try {
return Long.parseLong(Utils.removeNonDigitCharacters(input));
} catch (NumberFormatException e) {
// When there's no videos in a playlist, there's no number in the "innerHtml",
// all characters that is not a number is removed, so we try to parse a empty string
if (!input.isEmpty()) {
return 0;
} else {
throw new ParsingException("Could not handle input: " + input, e);
}
}
}
@Nonnull
@Override
public InfoItemsPage<StreamInfoItem> getInitialPage() throws IOException, ExtractionException {
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
Element tbody = doc.select("tbody[id=\"pl-load-more-destination\"]").first();
collectStreamsFrom(collector, tbody);
return new InfoItemsPage<>(collector, getNextPageUrl());
}
@Override
public InfoItemsPage<StreamInfoItem> getPage(final String pageUrl) throws IOException, ExtractionException {
if (pageUrl == null || pageUrl.isEmpty()) {
throw new ExtractionException(new IllegalArgumentException("Page url is empty or null"));
}
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
JsonObject pageJson;
try {
pageJson = JsonParser.object().from(NewPipe.getDownloader().download(pageUrl));
} catch (JsonParserException pe) {
throw new ParsingException("Could not parse ajax json", pe);
}
final Document pageHtml = Jsoup.parse("<table><tbody id=\"pl-load-more-destination\">"
+ pageJson.getString("content_html")
+ "</tbody></table>", pageUrl);
collectStreamsFrom(collector, pageHtml.select("tbody[id=\"pl-load-more-destination\"]").first());
return new InfoItemsPage<>(collector, getNextPageUrlFromAjax(pageJson, pageUrl));
}
private String getNextPageUrlFromAjax(final JsonObject pageJson, final String pageUrl)
throws ParsingException{
String nextPageHtml = pageJson.getString("load_more_widget_html");
if (!nextPageHtml.isEmpty()) {
return getNextPageUrlFrom(Jsoup.parse(nextPageHtml, pageUrl));
} else {
return "";
}
}
private String getNextPageUrlFrom(Document d) throws ParsingException {
try {
Element button = d.select("button[class*=\"yt-uix-load-more\"]").first();
if (button != null) {
return button.attr("abs:data-uix-load-more-href");
} else {
// Sometimes playlists are simply so small, they don't have a more streams/videos
return "";
}
} catch (Exception e) {
throw new ParsingException("could not get next streams' url", e);
}
}
private void collectStreamsFrom(StreamInfoItemsCollector collector, Element element) throws ParsingException {
collector.reset();
final UrlIdHandler streamUrlIdHandler = getService().getStreamUrlIdHandler();
for (final Element li : element.children()) {
if(isDeletedItem(li)) {
continue;
}
collector.commit(new YoutubeStreamInfoItemExtractor(li) {
public Element uploaderLink;
@Override
public boolean isAd() throws ParsingException {
return false;
}
@Override
public String getUrl() throws ParsingException {
try {
return streamUrlIdHandler.getUrl(li.attr("data-video-id"));
} catch (Exception e) {
throw new ParsingException("Could not get web page url for the video", e);
}
}
@Override
public String getName() throws ParsingException {
try {
return li.attr("data-title");
} catch (Exception e) {
throw new ParsingException("Could not get title", e);
}
}
@Override
public long getDuration() throws ParsingException {
try {
if (getStreamType() == StreamType.LIVE_STREAM) return -1;
Element first = li.select("div[class=\"timestamp\"] span").first();
if (first == null) {
// Video unavailable (private, deleted, etc.), this is a thing that happens specifically with playlists,
// because in other cases, those videos don't even show up
return -1;
}
return YoutubeParsingHelper.parseDurationString(first.text());
} catch (Exception e) {
throw new ParsingException("Could not get duration" + getUrl(), e);
}
}
private Element getUploaderLink() {
// should always be present since we filter deleted items
if(uploaderLink == null) {
uploaderLink = li.select("div[class=pl-video-owner] a").first();
}
return uploaderLink;
}
@Override
public String getUploaderName() throws ParsingException {
return getUploaderLink().text();
}
@Override
public String getUploaderUrl() throws ParsingException {
return getUploaderLink().attr("abs:href");
}
@Override
public String getUploadDate() throws ParsingException {
return "";
}
@Override
public long getViewCount() throws ParsingException {
return -1;
}
@Override
public String getThumbnailUrl() throws ParsingException {
try {
return "https://i.ytimg.com/vi/" + streamUrlIdHandler.getId(getUrl()) + "/hqdefault.jpg";
} catch (Exception e) {
throw new ParsingException("Could not get thumbnail url", e);
}
}
});
}
}
/**
* Check if the playlist item is deleted
* @param li the list item
* @return true if the item is deleted
*/
private boolean isDeletedItem(Element li) {
return li.select("div[class=pl-video-owner] a").isEmpty();
}
}