288 lines
11 KiB
Java
288 lines
11 KiB
Java
package org.schabi.newpipe.extractor.services.youtube;
|
|
|
|
import com.grack.nanojson.JsonObject;
|
|
import com.grack.nanojson.JsonParser;
|
|
import com.grack.nanojson.JsonParserException;
|
|
import org.jsoup.Jsoup;
|
|
import org.jsoup.nodes.Document;
|
|
import org.jsoup.nodes.Element;
|
|
import org.schabi.newpipe.extractor.Downloader;
|
|
import org.schabi.newpipe.extractor.NewPipe;
|
|
import org.schabi.newpipe.extractor.StreamingService;
|
|
import org.schabi.newpipe.extractor.UrlIdHandler;
|
|
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
|
import org.schabi.newpipe.extractor.playlist.PlaylistExtractor;
|
|
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
|
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
|
|
import org.schabi.newpipe.extractor.stream.StreamType;
|
|
import org.schabi.newpipe.extractor.utils.Utils;
|
|
|
|
import javax.annotation.Nonnull;
|
|
import java.io.IOException;
|
|
|
|
@SuppressWarnings("WeakerAccess")
|
|
public class YoutubePlaylistExtractor extends PlaylistExtractor {
|
|
|
|
private Document doc;
|
|
|
|
public YoutubePlaylistExtractor(StreamingService service, String url) {
|
|
super(service, url);
|
|
}
|
|
|
|
@Override
|
|
public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException {
|
|
String pageContent = downloader.download(getCleanUrl());
|
|
doc = Jsoup.parse(pageContent, getCleanUrl());
|
|
}
|
|
|
|
@Override
|
|
public String getNextPageUrl() throws ExtractionException {
|
|
return getNextPageUrlFrom(doc);
|
|
}
|
|
|
|
@Nonnull
|
|
@Override
|
|
public String getId() throws ParsingException {
|
|
try {
|
|
return getUrlIdHandler().getId(getCleanUrl());
|
|
} catch (Exception e) {
|
|
throw new ParsingException("Could not get playlist id");
|
|
}
|
|
}
|
|
|
|
@Nonnull
|
|
@Override
|
|
public String getName() throws ParsingException {
|
|
try {
|
|
return doc.select("div[id=pl-header] h1[class=pl-header-title]").first().text();
|
|
} catch (Exception e) {
|
|
throw new ParsingException("Could not get playlist name");
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public String getThumbnailUrl() throws ParsingException {
|
|
try {
|
|
return doc.select("div[id=pl-header] div[class=pl-header-thumb] img").first().attr("abs:src");
|
|
} catch (Exception e) {
|
|
throw new ParsingException("Could not get playlist thumbnail");
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public String getBannerUrl() {
|
|
return ""; // Banner can't be handled by frontend right now.
|
|
// Whoever is willing to implement this should also implement this in the fornt end
|
|
}
|
|
|
|
@Override
|
|
public String getUploaderUrl() throws ParsingException {
|
|
try {
|
|
return doc.select("ul[class=\"pl-header-details\"] li").first().select("a").first().attr("abs:href");
|
|
} catch (Exception e) {
|
|
throw new ParsingException("Could not get playlist uploader name");
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public String getUploaderName() throws ParsingException {
|
|
try {
|
|
return doc.select("span[class=\"qualified-channel-title-text\"]").first().select("a").first().text();
|
|
} catch (Exception e) {
|
|
throw new ParsingException("Could not get playlist uploader name");
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public String getUploaderAvatarUrl() throws ParsingException {
|
|
try {
|
|
return doc.select("div[id=gh-banner] img[class=channel-header-profile-image]").first().attr("abs:src");
|
|
} catch (Exception e) {
|
|
throw new ParsingException("Could not get playlist uploader avatar");
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public long getStreamCount() throws ParsingException {
|
|
String input;
|
|
|
|
try {
|
|
input = doc.select("ul[class=\"pl-header-details\"] li").get(1).text();
|
|
} catch (IndexOutOfBoundsException e) {
|
|
throw new ParsingException("Could not get video count from playlist", e);
|
|
}
|
|
|
|
try {
|
|
return Long.parseLong(Utils.removeNonDigitCharacters(input));
|
|
} catch (NumberFormatException e) {
|
|
// When there's no videos in a playlist, there's no number in the "innerHtml",
|
|
// all characters that is not a number is removed, so we try to parse a empty string
|
|
if (!input.isEmpty()) {
|
|
return 0;
|
|
} else {
|
|
throw new ParsingException("Could not handle input: " + input, e);
|
|
}
|
|
}
|
|
}
|
|
|
|
@Nonnull
|
|
@Override
|
|
public InfoItemsPage<StreamInfoItem> getInitialPage() throws IOException, ExtractionException {
|
|
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
|
|
Element tbody = doc.select("tbody[id=\"pl-load-more-destination\"]").first();
|
|
collectStreamsFrom(collector, tbody);
|
|
return new InfoItemsPage<>(collector, getNextPageUrl());
|
|
}
|
|
|
|
@Override
|
|
public InfoItemsPage<StreamInfoItem> getPage(final String pageUrl) throws IOException, ExtractionException {
|
|
if (pageUrl == null || pageUrl.isEmpty()) {
|
|
throw new ExtractionException(new IllegalArgumentException("Page url is empty or null"));
|
|
}
|
|
|
|
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
|
|
JsonObject pageJson;
|
|
try {
|
|
pageJson = JsonParser.object().from(NewPipe.getDownloader().download(pageUrl));
|
|
} catch (JsonParserException pe) {
|
|
throw new ParsingException("Could not parse ajax json", pe);
|
|
}
|
|
|
|
final Document pageHtml = Jsoup.parse("<table><tbody id=\"pl-load-more-destination\">"
|
|
+ pageJson.getString("content_html")
|
|
+ "</tbody></table>", pageUrl);
|
|
|
|
collectStreamsFrom(collector, pageHtml.select("tbody[id=\"pl-load-more-destination\"]").first());
|
|
|
|
return new InfoItemsPage<>(collector, getNextPageUrlFromAjax(pageJson, pageUrl));
|
|
}
|
|
|
|
private String getNextPageUrlFromAjax(final JsonObject pageJson, final String pageUrl)
|
|
throws ParsingException{
|
|
String nextPageHtml = pageJson.getString("load_more_widget_html");
|
|
if (!nextPageHtml.isEmpty()) {
|
|
return getNextPageUrlFrom(Jsoup.parse(nextPageHtml, pageUrl));
|
|
} else {
|
|
return "";
|
|
}
|
|
}
|
|
|
|
private String getNextPageUrlFrom(Document d) throws ParsingException {
|
|
try {
|
|
Element button = d.select("button[class*=\"yt-uix-load-more\"]").first();
|
|
if (button != null) {
|
|
return button.attr("abs:data-uix-load-more-href");
|
|
} else {
|
|
// Sometimes playlists are simply so small, they don't have a more streams/videos
|
|
return "";
|
|
}
|
|
} catch (Exception e) {
|
|
throw new ParsingException("could not get next streams' url", e);
|
|
}
|
|
}
|
|
|
|
private void collectStreamsFrom(StreamInfoItemsCollector collector, Element element) throws ParsingException {
|
|
collector.reset();
|
|
|
|
final UrlIdHandler streamUrlIdHandler = getService().getStreamUrlIdHandler();
|
|
for (final Element li : element.children()) {
|
|
if(isDeletedItem(li)) {
|
|
continue;
|
|
}
|
|
|
|
collector.commit(new YoutubeStreamInfoItemExtractor(li) {
|
|
public Element uploaderLink;
|
|
|
|
@Override
|
|
public boolean isAd() throws ParsingException {
|
|
return false;
|
|
}
|
|
|
|
@Override
|
|
public String getUrl() throws ParsingException {
|
|
try {
|
|
return streamUrlIdHandler.getUrl(li.attr("data-video-id"));
|
|
} catch (Exception e) {
|
|
throw new ParsingException("Could not get web page url for the video", e);
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public String getName() throws ParsingException {
|
|
try {
|
|
return li.attr("data-title");
|
|
} catch (Exception e) {
|
|
throw new ParsingException("Could not get title", e);
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public long getDuration() throws ParsingException {
|
|
try {
|
|
if (getStreamType() == StreamType.LIVE_STREAM) return -1;
|
|
|
|
Element first = li.select("div[class=\"timestamp\"] span").first();
|
|
if (first == null) {
|
|
// Video unavailable (private, deleted, etc.), this is a thing that happens specifically with playlists,
|
|
// because in other cases, those videos don't even show up
|
|
return -1;
|
|
}
|
|
|
|
return YoutubeParsingHelper.parseDurationString(first.text());
|
|
} catch (Exception e) {
|
|
throw new ParsingException("Could not get duration" + getUrl(), e);
|
|
}
|
|
}
|
|
|
|
|
|
private Element getUploaderLink() {
|
|
// should always be present since we filter deleted items
|
|
if(uploaderLink == null) {
|
|
uploaderLink = li.select("div[class=pl-video-owner] a").first();
|
|
}
|
|
return uploaderLink;
|
|
}
|
|
|
|
@Override
|
|
public String getUploaderName() throws ParsingException {
|
|
return getUploaderLink().text();
|
|
}
|
|
|
|
@Override
|
|
public String getUploaderUrl() throws ParsingException {
|
|
return getUploaderLink().attr("abs:href");
|
|
}
|
|
|
|
@Override
|
|
public String getUploadDate() throws ParsingException {
|
|
return "";
|
|
}
|
|
|
|
@Override
|
|
public long getViewCount() throws ParsingException {
|
|
return -1;
|
|
}
|
|
|
|
@Override
|
|
public String getThumbnailUrl() throws ParsingException {
|
|
try {
|
|
return "https://i.ytimg.com/vi/" + streamUrlIdHandler.getId(getUrl()) + "/hqdefault.jpg";
|
|
} catch (Exception e) {
|
|
throw new ParsingException("Could not get thumbnail url", e);
|
|
}
|
|
}
|
|
});
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if the playlist item is deleted
|
|
* @param li the list item
|
|
* @return true if the item is deleted
|
|
*/
|
|
private boolean isDeletedItem(Element li) {
|
|
return li.select("div[class=pl-video-owner] a").isEmpty();
|
|
}
|
|
}
|