mirror of
https://github.com/TeamNewPipe/NewPipeExtractor
synced 2024-11-15 22:46:19 +01:00
added CommentsInfo
This commit is contained in:
parent
4ca23ab5c3
commit
ee239985ae
@ -1,12 +1,16 @@
|
||||
package org.schabi.newpipe.extractor.comments;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.schabi.newpipe.extractor.ListExtractor.InfoItemsPage;
|
||||
import org.schabi.newpipe.extractor.ListInfo;
|
||||
import org.schabi.newpipe.extractor.NewPipe;
|
||||
import org.schabi.newpipe.extractor.StreamingService;
|
||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
||||
import org.schabi.newpipe.extractor.utils.ExtractorHelper;
|
||||
|
||||
public class CommentsInfo extends ListInfo<CommentsInfoItem>{
|
||||
|
||||
@ -19,9 +23,88 @@ public class CommentsInfo extends ListInfo<CommentsInfoItem>{
|
||||
return getInfo(NewPipe.getServiceByUrl(url), url);
|
||||
}
|
||||
|
||||
private static CommentsInfo getInfo(StreamingService serviceByUrl, String url) {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
private static CommentsInfo getInfo(StreamingService serviceByUrl, String url) throws ExtractionException, IOException {
|
||||
return getInfo(serviceByUrl.getCommentsExtractor(url));
|
||||
}
|
||||
|
||||
private static CommentsInfo getInfo(CommentsExtractor commentsExtractor) throws IOException, ExtractionException {
|
||||
//for services which do not have a comments extractor
|
||||
if(null == commentsExtractor) {
|
||||
return null;
|
||||
}
|
||||
|
||||
commentsExtractor.fetchPage();
|
||||
String name = commentsExtractor.getName();
|
||||
int serviceId = commentsExtractor.getServiceId();
|
||||
ListLinkHandler listUrlIdHandler = commentsExtractor.getUIHandler();
|
||||
CommentsInfo commentsInfo = new CommentsInfo(serviceId, listUrlIdHandler, name);
|
||||
commentsInfo.setCommentsExtractor(commentsExtractor);
|
||||
InfoItemsPage<CommentsInfoItem> initialCommentsPage = ExtractorHelper.getItemsPageOrLogError(commentsInfo,
|
||||
commentsExtractor);
|
||||
commentsInfo.setComments(new ArrayList<>());
|
||||
commentsInfo.getComments().addAll(initialCommentsPage.getItems());
|
||||
commentsInfo.setHasMoreComments(initialCommentsPage.hasNextPage());
|
||||
commentsInfo.setNextCommentsPageUrl(initialCommentsPage.getNextPageUrl());
|
||||
return commentsInfo;
|
||||
}
|
||||
|
||||
public static void loadMoreComments(CommentsInfo commentsInfo) {
|
||||
if (commentsInfo.hasMoreComments()) {
|
||||
if(null == commentsInfo.getCommentsExtractor()) {
|
||||
try {
|
||||
commentsInfo.setCommentsExtractor(NewPipe.getService(commentsInfo.getServiceId()).getCommentsExtractor(commentsInfo.getUrl()));
|
||||
} catch (ExtractionException e) {
|
||||
commentsInfo.addError(e);
|
||||
return;
|
||||
}
|
||||
}
|
||||
try {
|
||||
InfoItemsPage<CommentsInfoItem> commentsPage = commentsInfo.getCommentsExtractor()
|
||||
.getPage(commentsInfo.getNextCommentsPageUrl());
|
||||
commentsInfo.getComments().addAll(commentsPage.getItems());
|
||||
commentsInfo.setHasMoreComments(commentsPage.hasNextPage());
|
||||
commentsInfo.setNextCommentsPageUrl(commentsPage.getNextPageUrl());
|
||||
} catch (IOException | ExtractionException e) {
|
||||
commentsInfo.addError(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private transient CommentsExtractor commentsExtractor;
|
||||
private List<CommentsInfoItem> comments;
|
||||
private boolean hasMoreComments;
|
||||
private String nextCommentsPageUrl;
|
||||
|
||||
public List<CommentsInfoItem> getComments() {
|
||||
return comments;
|
||||
}
|
||||
|
||||
public void setComments(List<CommentsInfoItem> comments) {
|
||||
this.comments = comments;
|
||||
}
|
||||
|
||||
public boolean hasMoreComments() {
|
||||
return hasMoreComments;
|
||||
}
|
||||
|
||||
public void setHasMoreComments(boolean hasMoreComments) {
|
||||
this.hasMoreComments = hasMoreComments;
|
||||
}
|
||||
|
||||
public CommentsExtractor getCommentsExtractor() {
|
||||
return commentsExtractor;
|
||||
}
|
||||
|
||||
public void setCommentsExtractor(CommentsExtractor commentsExtractor) {
|
||||
this.commentsExtractor = commentsExtractor;
|
||||
}
|
||||
|
||||
public String getNextCommentsPageUrl() {
|
||||
return nextCommentsPageUrl;
|
||||
}
|
||||
|
||||
public void setNextCommentsPageUrl(String nextCommentsPageUrl) {
|
||||
this.nextCommentsPageUrl = nextCommentsPageUrl;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -31,13 +31,13 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||
|
||||
private List<String> cookies;
|
||||
private String sessionToken;
|
||||
private String commentsToken;
|
||||
private String title;
|
||||
private InfoItemsPage<CommentsInfoItem> initPage;
|
||||
|
||||
private ObjectMapper mapper = new ObjectMapper();
|
||||
|
||||
public YoutubeCommentsExtractor(StreamingService service, ListLinkHandler uiHandler) {
|
||||
super(service, uiHandler);
|
||||
// TODO Auto-generated constructor stub
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -45,12 +45,16 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||
// initial page does not load any comments but is required to get session token
|
||||
// and cookies
|
||||
super.fetchPage();
|
||||
return getPage(getNextPageUrl());
|
||||
return initPage;
|
||||
}
|
||||
|
||||
// isn't this method redundant. you can just call getnextpage on getInitialPage
|
||||
@Override
|
||||
public String getNextPageUrl() throws IOException, ExtractionException {
|
||||
return getNextPageUrl(commentsToken);
|
||||
// initial page does not load any comments but is required to get session token
|
||||
// and cookies
|
||||
super.fetchPage();
|
||||
return initPage.getNextPageUrl();
|
||||
}
|
||||
|
||||
private String getNextPageUrl(JsonNode ajaxJson) throws IOException, ExtractionException {
|
||||
@ -91,6 +95,9 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||
}
|
||||
|
||||
private void collectCommentsFrom(CommentsInfoItemsCollector collector, JsonNode ajaxJson, String pageUrl) {
|
||||
|
||||
fetchTitle(ajaxJson);
|
||||
|
||||
List<JsonNode> comments = ajaxJson.findValues("commentRenderer");
|
||||
comments.stream().forEach(c -> {
|
||||
CommentsInfoItemExtractor extractor = new CommentsInfoItemExtractor() {
|
||||
@ -192,19 +199,29 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||
|
||||
}
|
||||
|
||||
private void fetchTitle(JsonNode ajaxJson) {
|
||||
if(null == title) {
|
||||
try {
|
||||
title = ajaxJson.findValue("commentTargetTitle").get("simpleText").asText();
|
||||
} catch (Exception e) {
|
||||
title = "Youtube Comments";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onFetchPage(Downloader downloader) throws IOException, ExtractionException {
|
||||
DownloadResponse response = downloader.get(getUrl());
|
||||
String responseBody = response.getResponseBody();
|
||||
cookies = response.getResponseHeaders().get("Set-Cookie");
|
||||
sessionToken = findValue(responseBody, "XSRF_TOKEN");
|
||||
commentsToken = findValue(responseBody, "COMMENTS_TOKEN");
|
||||
String commentsToken = findValue(responseBody, "COMMENTS_TOKEN");
|
||||
initPage = getPage(getNextPageUrl(commentsToken));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() throws ParsingException {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
return title;
|
||||
}
|
||||
|
||||
private String makeAjaxRequest(String siteUrl) throws IOException, ReCaptchaException {
|
||||
|
@ -1,20 +1,11 @@
|
||||
package org.schabi.newpipe.extractor.services.youtube.linkHandler;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URLDecoder;
|
||||
import java.util.List;
|
||||
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.schabi.newpipe.extractor.Downloader;
|
||||
import org.schabi.newpipe.extractor.NewPipe;
|
||||
import org.schabi.newpipe.extractor.exceptions.FoundAdException;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
||||
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory;
|
||||
import org.schabi.newpipe.extractor.utils.Parser;
|
||||
|
||||
@ -27,6 +18,11 @@ public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory {
|
||||
return instance;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUrl(String id) {
|
||||
return "https://www.youtube.com/watch?v=" + id;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getId(String url) throws ParsingException, IllegalArgumentException {
|
||||
if (url.isEmpty()) {
|
||||
@ -44,8 +40,6 @@ public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory {
|
||||
} catch (UnsupportedEncodingException uee) {
|
||||
throw new ParsingException("Could not parse attribution_link", uee);
|
||||
}
|
||||
} else if (lowercaseUrl.contains("youtube.com/shared?ci=")) {
|
||||
return getRealIdFromSharedLink(url);
|
||||
} else if (url.contains("vnd.youtube")) {
|
||||
id = Parser.matchGroup1(ID_PATTERN, url);
|
||||
} else if (url.contains("embed")) {
|
||||
@ -86,56 +80,6 @@ public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the real url from a shared uri.
|
||||
* <p>
|
||||
* Shared URI's look like this:
|
||||
* <pre>
|
||||
* * https://www.youtube.com/shared?ci=PJICrTByb3E
|
||||
* * vnd.youtube://www.youtube.com/shared?ci=PJICrTByb3E&feature=twitter-deep-link
|
||||
* </pre>
|
||||
*
|
||||
* @param url The shared url
|
||||
* @return the id of the stream
|
||||
* @throws ParsingException
|
||||
*/
|
||||
private String getRealIdFromSharedLink(String url) throws ParsingException {
|
||||
URI uri;
|
||||
try {
|
||||
uri = new URI(url);
|
||||
} catch (URISyntaxException e) {
|
||||
throw new ParsingException("Invalid shared link", e);
|
||||
}
|
||||
String sharedId = getSharedId(uri);
|
||||
Downloader downloader = NewPipe.getDownloader();
|
||||
String content;
|
||||
try {
|
||||
content = downloader.download("https://www.youtube.com/shared?ci=" + sharedId);
|
||||
} catch (IOException | ReCaptchaException e) {
|
||||
throw new ParsingException("Unable to resolve shared link", e);
|
||||
}
|
||||
final Document document = Jsoup.parse(content);
|
||||
|
||||
final Element element = document.select("link[rel=\"canonical\"]").first();
|
||||
final String urlWithRealId = (element != null)
|
||||
? element.attr("abs:href")
|
||||
: document.select("meta[property=\"og:url\"]").first()
|
||||
.attr("abs:content");
|
||||
|
||||
String realId = Parser.matchGroup1(ID_PATTERN, urlWithRealId);
|
||||
if (sharedId.equals(realId)) {
|
||||
throw new ParsingException("Got same id for as shared info_id: " + sharedId);
|
||||
}
|
||||
return realId;
|
||||
}
|
||||
|
||||
private String getSharedId(URI uri) throws ParsingException {
|
||||
if (!"/shared".equals(uri.getPath())) {
|
||||
throw new ParsingException("Not a shared link: " + uri.toString() + " (path != " + uri.getPath() + ")");
|
||||
}
|
||||
return Parser.matchGroup1("ci=" + ID_PATTERN, uri.getQuery());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean onAcceptUrl(final String url) throws FoundAdException {
|
||||
final String lowercaseUrl = url.toLowerCase();
|
||||
@ -156,8 +100,8 @@ public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory {
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUrl(String id, List<String> contentFilter, String sortFilter) throws ParsingException {
|
||||
return "https://www.youtube.com/watch?v=" + id;
|
||||
}
|
||||
@Override
|
||||
public String getUrl(String id, List<String> contentFilter, String sortFilter) throws ParsingException {
|
||||
return "https://www.youtube.com/watch?v=" + id;
|
||||
}
|
||||
}
|
||||
|
@ -11,6 +11,7 @@ import org.junit.Test;
|
||||
import org.schabi.newpipe.Downloader;
|
||||
import org.schabi.newpipe.extractor.ListExtractor.InfoItemsPage;
|
||||
import org.schabi.newpipe.extractor.NewPipe;
|
||||
import org.schabi.newpipe.extractor.comments.CommentsInfo;
|
||||
import org.schabi.newpipe.extractor.comments.CommentsInfoItem;
|
||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsExtractor;
|
||||
@ -55,6 +56,21 @@ public class YoutubeCommentsExtractorTest {
|
||||
|
||||
assertTrue(result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetCommentsFromCommentsInfo() throws IOException, ExtractionException {
|
||||
boolean result = false;
|
||||
CommentsInfo commentsInfo = CommentsInfo.getInfo("https://www.youtube.com/watch?v=rrgFN3AxGfs");
|
||||
assertTrue("what the fuck am i doing with my life.wmv".equals(commentsInfo.getName()));
|
||||
result = findInComments(commentsInfo.getComments(), "i should really be in the top comment.lol");
|
||||
|
||||
while (commentsInfo.hasMoreComments() && !result) {
|
||||
CommentsInfo.loadMoreComments(commentsInfo);
|
||||
result = findInComments(commentsInfo.getComments(), "i should really be in the top comment.lol");
|
||||
}
|
||||
|
||||
assertTrue(result);
|
||||
}
|
||||
|
||||
private boolean findInComments(InfoItemsPage<CommentsInfoItem> comments, String comment) {
|
||||
return findInComments(comments.getItems(), comment);
|
||||
|
Loading…
Reference in New Issue
Block a user