Merge pull request #652 from litetex/fixYTCommentsAndAddDisabledComments

Fix yt comments and add disabled comments functionallity
This commit is contained in:
Tobi 2021-07-12 16:31:50 +02:00 committed by GitHub
commit b45bb411e8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 162 additions and 48 deletions

View File

@ -9,9 +9,16 @@ import javax.annotation.Nonnull;
public abstract class CommentsExtractor extends ListExtractor<CommentsInfoItem> { public abstract class CommentsExtractor extends ListExtractor<CommentsInfoItem> {
public CommentsExtractor(StreamingService service, ListLinkHandler uiHandler) { public CommentsExtractor(final StreamingService service, final ListLinkHandler uiHandler) {
super(service, uiHandler); super(service, uiHandler);
// TODO Auto-generated constructor stub }
/**
* @apiNote Warning: This method is experimental and may get removed in a future release.
* @return <code>true</code> if the comments are disabled otherwise <code>false</code> (default)
*/
public boolean isCommentsDisabled() {
return false;
} }
@Nonnull @Nonnull

View File

@ -13,45 +13,56 @@ import java.io.IOException;
public class CommentsInfo extends ListInfo<CommentsInfoItem> { public class CommentsInfo extends ListInfo<CommentsInfoItem> {
private CommentsInfo(int serviceId, ListLinkHandler listUrlIdHandler, String name) { private CommentsInfo(
final int serviceId,
final ListLinkHandler listUrlIdHandler,
final String name) {
super(serviceId, listUrlIdHandler, name); super(serviceId, listUrlIdHandler, name);
} }
public static CommentsInfo getInfo(String url) throws IOException, ExtractionException { public static CommentsInfo getInfo(final String url) throws IOException, ExtractionException {
return getInfo(NewPipe.getServiceByUrl(url), url); return getInfo(NewPipe.getServiceByUrl(url), url);
} }
public static CommentsInfo getInfo(StreamingService serviceByUrl, String url) throws ExtractionException, IOException { public static CommentsInfo getInfo(final StreamingService serviceByUrl, final String url)
throws ExtractionException, IOException {
return getInfo(serviceByUrl.getCommentsExtractor(url)); return getInfo(serviceByUrl.getCommentsExtractor(url));
} }
public static CommentsInfo getInfo(CommentsExtractor commentsExtractor) throws IOException, ExtractionException { public static CommentsInfo getInfo(final CommentsExtractor commentsExtractor)
throws IOException, ExtractionException {
// for services which do not have a comments extractor // for services which do not have a comments extractor
if (null == commentsExtractor) { if (commentsExtractor == null) {
return null; return null;
} }
commentsExtractor.fetchPage(); commentsExtractor.fetchPage();
String name = commentsExtractor.getName();
int serviceId = commentsExtractor.getServiceId(); final String name = commentsExtractor.getName();
ListLinkHandler listUrlIdHandler = commentsExtractor.getLinkHandler(); final int serviceId = commentsExtractor.getServiceId();
CommentsInfo commentsInfo = new CommentsInfo(serviceId, listUrlIdHandler, name); final ListLinkHandler listUrlIdHandler = commentsExtractor.getLinkHandler();
final CommentsInfo commentsInfo = new CommentsInfo(serviceId, listUrlIdHandler, name);
commentsInfo.setCommentsExtractor(commentsExtractor); commentsInfo.setCommentsExtractor(commentsExtractor);
InfoItemsPage<CommentsInfoItem> initialCommentsPage = ExtractorHelper.getItemsPageOrLogError(commentsInfo, final InfoItemsPage<CommentsInfoItem> initialCommentsPage =
commentsExtractor); ExtractorHelper.getItemsPageOrLogError(commentsInfo, commentsExtractor);
commentsInfo.setCommentsDisabled(commentsExtractor.isCommentsDisabled());
commentsInfo.setRelatedItems(initialCommentsPage.getItems()); commentsInfo.setRelatedItems(initialCommentsPage.getItems());
commentsInfo.setNextPage(initialCommentsPage.getNextPage()); commentsInfo.setNextPage(initialCommentsPage.getNextPage());
return commentsInfo; return commentsInfo;
} }
public static InfoItemsPage<CommentsInfoItem> getMoreItems(CommentsInfo commentsInfo, Page page) public static InfoItemsPage<CommentsInfoItem> getMoreItems(
throws ExtractionException, IOException { final CommentsInfo commentsInfo,
final Page page) throws ExtractionException, IOException {
return getMoreItems(NewPipe.getService(commentsInfo.getServiceId()), commentsInfo, page); return getMoreItems(NewPipe.getService(commentsInfo.getServiceId()), commentsInfo, page);
} }
public static InfoItemsPage<CommentsInfoItem> getMoreItems(StreamingService service, CommentsInfo commentsInfo, public static InfoItemsPage<CommentsInfoItem> getMoreItems(
Page page) throws IOException, ExtractionException { final StreamingService service,
final CommentsInfo commentsInfo,
final Page page) throws IOException, ExtractionException {
if (null == commentsInfo.getCommentsExtractor()) { if (null == commentsInfo.getCommentsExtractor()) {
commentsInfo.setCommentsExtractor(service.getCommentsExtractor(commentsInfo.getUrl())); commentsInfo.setCommentsExtractor(service.getCommentsExtractor(commentsInfo.getUrl()));
commentsInfo.getCommentsExtractor().fetchPage(); commentsInfo.getCommentsExtractor().fetchPage();
@ -60,13 +71,30 @@ public class CommentsInfo extends ListInfo<CommentsInfoItem> {
} }
private transient CommentsExtractor commentsExtractor; private transient CommentsExtractor commentsExtractor;
private boolean commentsDisabled = false;
public CommentsExtractor getCommentsExtractor() { public CommentsExtractor getCommentsExtractor() {
return commentsExtractor; return commentsExtractor;
} }
public void setCommentsExtractor(CommentsExtractor commentsExtractor) { public void setCommentsExtractor(final CommentsExtractor commentsExtractor) {
this.commentsExtractor = commentsExtractor; this.commentsExtractor = commentsExtractor;
} }
/**
* @apiNote Warning: This method is experimental and may get removed in a future release.
* @return <code>true</code> if the comments are disabled otherwise <code>false</code> (default)
* @see CommentsExtractor#isCommentsDisabled()
*/
public boolean isCommentsDisabled() {
return commentsDisabled;
}
/**
* @apiNote Warning: This method is experimental and may get removed in a future release.
* @param commentsDisabled <code>true</code> if the comments are disabled otherwise <code>false</code>
*/
public void setCommentsDisabled(final boolean commentsDisabled) {
this.commentsDisabled = commentsDisabled;
}
} }

View File

@ -23,9 +23,11 @@ import javax.annotation.Nonnull;
import java.io.IOException; import java.io.IOException;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.net.URLEncoder; import java.net.URLEncoder;
import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Optional;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import static java.util.Collections.singletonList; import static java.util.Collections.singletonList;
@ -41,53 +43,108 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
private String ytClientName; private String ytClientName;
private String responseBody; private String responseBody;
public YoutubeCommentsExtractor(StreamingService service, ListLinkHandler uiHandler) { /**
* Caching mechanism and holder of the commentsDisabled value.
* <br/>
* Initial value = empty -> unknown if comments are disabled or not<br/>
* Some method calls {@link YoutubeCommentsExtractor#findInitialCommentsToken()}
* -> value is set<br/>
* If the method or another one that is depending on disabled comments
* is now called again, the method execution can avoid unnecessary calls
*/
private Optional<Boolean> optCommentsDisabled = Optional.empty();
public YoutubeCommentsExtractor(
final StreamingService service,
final ListLinkHandler uiHandler) {
super(service, uiHandler); super(service, uiHandler);
} }
@Override @Override
public InfoItemsPage<CommentsInfoItem> getInitialPage() throws IOException, ExtractionException { public InfoItemsPage<CommentsInfoItem> getInitialPage()
String commentsTokenInside = findValue(responseBody, "sectionListRenderer", "}"); throws IOException, ExtractionException {
if (!commentsTokenInside.contains("continuation\":\"")) {
commentsTokenInside = findValue(responseBody, "commentSectionRenderer", "}"); // Check if findInitialCommentsToken was already called and optCommentsDisabled initialized
if (optCommentsDisabled.orElse(false)) {
return getInfoItemsPageForDisabledComments();
} }
final String commentsToken = findValue(commentsTokenInside, "continuation\":\"", "\"");
// Get the token
final String commentsToken = findInitialCommentsToken();
// Check if the comments have been disabled
if (optCommentsDisabled.get()) {
return getInfoItemsPageForDisabledComments();
}
return getPage(getNextPage(commentsToken)); return getPage(getNextPage(commentsToken));
} }
private Page getNextPage(JsonObject ajaxJson) throws ParsingException { /**
* Finds the initial comments token and initializes commentsDisabled.
* @return the continuation token or null if none was found
*/
private String findInitialCommentsToken() {
final String continuationStartPattern = "continuation\":\"";
String commentsTokenInside = findValue(responseBody, "sectionListRenderer", "}");
if (commentsTokenInside == null || !commentsTokenInside.contains(continuationStartPattern)) {
commentsTokenInside = findValue(responseBody, "commentSectionRenderer", "}");
}
// If no continuation token is found the comments are disabled
if (commentsTokenInside == null || !commentsTokenInside.contains(continuationStartPattern)) {
optCommentsDisabled = Optional.of(true);
return null;
}
// If a continuation token is found there are >= 0 comments
final String commentsToken = findValue(commentsTokenInside, continuationStartPattern, "\"");
optCommentsDisabled = Optional.of(false);
return commentsToken;
}
private InfoItemsPage<CommentsInfoItem> getInfoItemsPageForDisabledComments() {
return new InfoItemsPage<>(Collections.emptyList(), null, Collections.emptyList());
}
private Page getNextPage(final JsonObject ajaxJson) throws ParsingException {
final JsonArray arr; final JsonArray arr;
try { try {
arr = JsonUtils.getArray(ajaxJson, "response.continuationContents.commentSectionContinuation.continuations"); arr = JsonUtils.getArray(ajaxJson, "response.continuationContents.commentSectionContinuation.continuations");
} catch (Exception e) { } catch (final Exception e) {
return null; return null;
} }
if (arr.isEmpty()) { if (arr.isEmpty()) {
return null; return null;
} }
String continuation; final String continuation;
try { try {
continuation = JsonUtils.getString(arr.getObject(0), "nextContinuationData.continuation"); continuation = JsonUtils.getString(arr.getObject(0), "nextContinuationData.continuation");
} catch (Exception e) { } catch (final Exception e) {
return null; return null;
} }
return getNextPage(continuation); return getNextPage(continuation);
} }
private Page getNextPage(String continuation) throws ParsingException { private Page getNextPage(final String continuation) throws ParsingException {
Map<String, String> params = new HashMap<>(); final Map<String, String> params = new HashMap<>();
params.put("action_get_comments", "1"); params.put("action_get_comments", "1");
params.put("pbj", "1"); params.put("pbj", "1");
params.put("ctoken", continuation); params.put("ctoken", continuation);
try { try {
return new Page("https://m.youtube.com/watch_comment?" + getDataString(params)); return new Page("https://m.youtube.com/watch_comment?" + getDataString(params));
} catch (UnsupportedEncodingException e) { } catch (final UnsupportedEncodingException e) {
throw new ParsingException("Could not get next page url", e); throw new ParsingException("Could not get next page url", e);
} }
} }
@Override @Override
public InfoItemsPage<CommentsInfoItem> getPage(final Page page) throws IOException, ExtractionException { public InfoItemsPage<CommentsInfoItem> getPage(final Page page) throws IOException, ExtractionException {
if (optCommentsDisabled.orElse(false)) {
return getInfoItemsPageForDisabledComments();
}
if (page == null || isNullOrEmpty(page.getUrl())) { if (page == null || isNullOrEmpty(page.getUrl())) {
throw new IllegalArgumentException("Page doesn't contain an URL"); throw new IllegalArgumentException("Page doesn't contain an URL");
} }
@ -96,7 +153,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
final JsonObject ajaxJson; final JsonObject ajaxJson;
try { try {
ajaxJson = JsonParser.array().from(ajaxResponse).getObject(1); ajaxJson = JsonParser.array().from(ajaxResponse).getObject(1);
} catch (Exception e) { } catch (final Exception e) {
throw new ParsingException("Could not parse json data for comments", e); throw new ParsingException("Could not parse json data for comments", e);
} }
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId()); final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
@ -104,31 +161,32 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
return new InfoItemsPage<>(collector, getNextPage(ajaxJson)); return new InfoItemsPage<>(collector, getNextPage(ajaxJson));
} }
private void collectCommentsFrom(CommentsInfoItemsCollector collector, JsonObject ajaxJson) throws ParsingException { private void collectCommentsFrom(final CommentsInfoItemsCollector collector, final JsonObject ajaxJson) throws ParsingException {
JsonArray contents; final JsonArray contents;
try { try {
contents = JsonUtils.getArray(ajaxJson, "response.continuationContents.commentSectionContinuation.items"); contents = JsonUtils.getArray(ajaxJson, "response.continuationContents.commentSectionContinuation.items");
} catch (Exception e) { } catch (final Exception e) {
//no comments //no comments
return; return;
} }
List<Object> comments; final List<Object> comments;
try { try {
comments = JsonUtils.getValues(contents, "commentThreadRenderer.comment.commentRenderer"); comments = JsonUtils.getValues(contents, "commentThreadRenderer.comment.commentRenderer");
} catch (Exception e) { } catch (final Exception e) {
throw new ParsingException("unable to get parse youtube comments", e); throw new ParsingException("unable to get parse youtube comments", e);
} }
for (Object c : comments) { for (final Object c : comments) {
if (c instanceof JsonObject) { if (c instanceof JsonObject) {
CommentsInfoItemExtractor extractor = new YoutubeCommentsInfoItemExtractor((JsonObject) c, getUrl(), getTimeAgoParser()); final CommentsInfoItemExtractor extractor =
new YoutubeCommentsInfoItemExtractor((JsonObject) c, getUrl(), getTimeAgoParser());
collector.commit(extractor); collector.commit(extractor);
} }
} }
} }
@Override @Override
public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException { public void onFetchPage(@Nonnull final Downloader downloader) throws IOException, ExtractionException {
final Map<String, List<String>> requestHeaders = new HashMap<>(); final Map<String, List<String>> requestHeaders = new HashMap<>();
requestHeaders.put("User-Agent", singletonList(USER_AGENT)); requestHeaders.put("User-Agent", singletonList(USER_AGENT));
final Response response = downloader.get(getUrl(), requestHeaders, getExtractorLocalization()); final Response response = downloader.get(getUrl(), requestHeaders, getExtractorLocalization());
@ -138,8 +196,8 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
} }
private String makeAjaxRequest(String siteUrl) throws IOException, ReCaptchaException { private String makeAjaxRequest(final String siteUrl) throws IOException, ReCaptchaException {
Map<String, List<String>> requestHeaders = new HashMap<>(); final Map<String, List<String>> requestHeaders = new HashMap<>();
requestHeaders.put("Accept", singletonList("*/*")); requestHeaders.put("Accept", singletonList("*/*"));
requestHeaders.put("User-Agent", singletonList(USER_AGENT)); requestHeaders.put("User-Agent", singletonList(USER_AGENT));
requestHeaders.put("X-YouTube-Client-Version", singletonList(ytClientVersion)); requestHeaders.put("X-YouTube-Client-Version", singletonList(ytClientVersion));
@ -147,14 +205,15 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
return getDownloader().get(siteUrl, requestHeaders, getExtractorLocalization()).responseBody(); return getDownloader().get(siteUrl, requestHeaders, getExtractorLocalization()).responseBody();
} }
private String getDataString(Map<String, String> params) throws UnsupportedEncodingException { private String getDataString(final Map<String, String> params) throws UnsupportedEncodingException {
StringBuilder result = new StringBuilder(); final StringBuilder result = new StringBuilder();
boolean first = true; boolean first = true;
for (Map.Entry<String, String> entry : params.entrySet()) { for (final Map.Entry<String, String> entry : params.entrySet()) {
if (first) if (first) {
first = false; first = false;
else } else {
result.append("&"); result.append("&");
}
result.append(URLEncoder.encode(entry.getKey(), UTF_8)); result.append(URLEncoder.encode(entry.getKey(), UTF_8));
result.append("="); result.append("=");
result.append(URLEncoder.encode(entry.getValue(), UTF_8)); result.append(URLEncoder.encode(entry.getValue(), UTF_8));
@ -163,8 +222,28 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
} }
private String findValue(final String doc, final String start, final String end) { private String findValue(final String doc, final String start, final String end) {
final int beginIndex = doc.indexOf(start) + start.length(); int beginIndex = doc.indexOf(start);
// Start string was not found
if (beginIndex == -1) {
return null;
}
beginIndex = beginIndex + start.length();
final int endIndex = doc.indexOf(end, beginIndex); final int endIndex = doc.indexOf(end, beginIndex);
// End string was not found
if (endIndex == -1) {
return null;
}
return doc.substring(beginIndex, endIndex); return doc.substring(beginIndex, endIndex);
} }
@Override
public boolean isCommentsDisabled() {
// Check if commentsDisabled has to be initialized
if (!optCommentsDisabled.isPresent()) {
// Initialize commentsDisabled
this.findInitialCommentsToken();
}
return optCommentsDisabled.get();
}
} }