Merge pull request #652 from litetex/fixYTCommentsAndAddDisabledComments

Fix yt comments and add disabled comments functionallity
This commit is contained in:
Tobi 2021-07-12 16:31:50 +02:00 committed by GitHub
commit b45bb411e8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 162 additions and 48 deletions

View File

@ -9,9 +9,16 @@ import javax.annotation.Nonnull;
public abstract class CommentsExtractor extends ListExtractor<CommentsInfoItem> {
public CommentsExtractor(StreamingService service, ListLinkHandler uiHandler) {
public CommentsExtractor(final StreamingService service, final ListLinkHandler uiHandler) {
super(service, uiHandler);
// TODO Auto-generated constructor stub
}
/**
* @apiNote Warning: This method is experimental and may get removed in a future release.
* @return <code>true</code> if the comments are disabled otherwise <code>false</code> (default)
*/
public boolean isCommentsDisabled() {
return false;
}
@Nonnull

View File

@ -13,45 +13,56 @@ import java.io.IOException;
public class CommentsInfo extends ListInfo<CommentsInfoItem> {
private CommentsInfo(int serviceId, ListLinkHandler listUrlIdHandler, String name) {
private CommentsInfo(
final int serviceId,
final ListLinkHandler listUrlIdHandler,
final String name) {
super(serviceId, listUrlIdHandler, name);
}
public static CommentsInfo getInfo(String url) throws IOException, ExtractionException {
public static CommentsInfo getInfo(final String url) throws IOException, ExtractionException {
return getInfo(NewPipe.getServiceByUrl(url), url);
}
public static CommentsInfo getInfo(StreamingService serviceByUrl, String url) throws ExtractionException, IOException {
public static CommentsInfo getInfo(final StreamingService serviceByUrl, final String url)
throws ExtractionException, IOException {
return getInfo(serviceByUrl.getCommentsExtractor(url));
}
public static CommentsInfo getInfo(CommentsExtractor commentsExtractor) throws IOException, ExtractionException {
public static CommentsInfo getInfo(final CommentsExtractor commentsExtractor)
throws IOException, ExtractionException {
// for services which do not have a comments extractor
if (null == commentsExtractor) {
if (commentsExtractor == null) {
return null;
}
commentsExtractor.fetchPage();
String name = commentsExtractor.getName();
int serviceId = commentsExtractor.getServiceId();
ListLinkHandler listUrlIdHandler = commentsExtractor.getLinkHandler();
CommentsInfo commentsInfo = new CommentsInfo(serviceId, listUrlIdHandler, name);
final String name = commentsExtractor.getName();
final int serviceId = commentsExtractor.getServiceId();
final ListLinkHandler listUrlIdHandler = commentsExtractor.getLinkHandler();
final CommentsInfo commentsInfo = new CommentsInfo(serviceId, listUrlIdHandler, name);
commentsInfo.setCommentsExtractor(commentsExtractor);
InfoItemsPage<CommentsInfoItem> initialCommentsPage = ExtractorHelper.getItemsPageOrLogError(commentsInfo,
commentsExtractor);
final InfoItemsPage<CommentsInfoItem> initialCommentsPage =
ExtractorHelper.getItemsPageOrLogError(commentsInfo, commentsExtractor);
commentsInfo.setCommentsDisabled(commentsExtractor.isCommentsDisabled());
commentsInfo.setRelatedItems(initialCommentsPage.getItems());
commentsInfo.setNextPage(initialCommentsPage.getNextPage());
return commentsInfo;
}
public static InfoItemsPage<CommentsInfoItem> getMoreItems(CommentsInfo commentsInfo, Page page)
throws ExtractionException, IOException {
public static InfoItemsPage<CommentsInfoItem> getMoreItems(
final CommentsInfo commentsInfo,
final Page page) throws ExtractionException, IOException {
return getMoreItems(NewPipe.getService(commentsInfo.getServiceId()), commentsInfo, page);
}
public static InfoItemsPage<CommentsInfoItem> getMoreItems(StreamingService service, CommentsInfo commentsInfo,
Page page) throws IOException, ExtractionException {
public static InfoItemsPage<CommentsInfoItem> getMoreItems(
final StreamingService service,
final CommentsInfo commentsInfo,
final Page page) throws IOException, ExtractionException {
if (null == commentsInfo.getCommentsExtractor()) {
commentsInfo.setCommentsExtractor(service.getCommentsExtractor(commentsInfo.getUrl()));
commentsInfo.getCommentsExtractor().fetchPage();
@ -60,13 +71,30 @@ public class CommentsInfo extends ListInfo<CommentsInfoItem> {
}
private transient CommentsExtractor commentsExtractor;
private boolean commentsDisabled = false;
public CommentsExtractor getCommentsExtractor() {
return commentsExtractor;
}
public void setCommentsExtractor(CommentsExtractor commentsExtractor) {
public void setCommentsExtractor(final CommentsExtractor commentsExtractor) {
this.commentsExtractor = commentsExtractor;
}
/**
* @apiNote Warning: This method is experimental and may get removed in a future release.
* @return <code>true</code> if the comments are disabled otherwise <code>false</code> (default)
* @see CommentsExtractor#isCommentsDisabled()
*/
public boolean isCommentsDisabled() {
return commentsDisabled;
}
/**
* @apiNote Warning: This method is experimental and may get removed in a future release.
* @param commentsDisabled <code>true</code> if the comments are disabled otherwise <code>false</code>
*/
public void setCommentsDisabled(final boolean commentsDisabled) {
this.commentsDisabled = commentsDisabled;
}
}

View File

@ -23,9 +23,11 @@ import javax.annotation.Nonnull;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.regex.Pattern;
import static java.util.Collections.singletonList;
@ -41,53 +43,108 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
private String ytClientName;
private String responseBody;
public YoutubeCommentsExtractor(StreamingService service, ListLinkHandler uiHandler) {
/**
* Caching mechanism and holder of the commentsDisabled value.
* <br/>
* Initial value = empty -> unknown if comments are disabled or not<br/>
* Some method calls {@link YoutubeCommentsExtractor#findInitialCommentsToken()}
* -> value is set<br/>
* If the method or another one that is depending on disabled comments
* is now called again, the method execution can avoid unnecessary calls
*/
private Optional<Boolean> optCommentsDisabled = Optional.empty();
public YoutubeCommentsExtractor(
final StreamingService service,
final ListLinkHandler uiHandler) {
super(service, uiHandler);
}
@Override
public InfoItemsPage<CommentsInfoItem> getInitialPage() throws IOException, ExtractionException {
String commentsTokenInside = findValue(responseBody, "sectionListRenderer", "}");
if (!commentsTokenInside.contains("continuation\":\"")) {
commentsTokenInside = findValue(responseBody, "commentSectionRenderer", "}");
public InfoItemsPage<CommentsInfoItem> getInitialPage()
throws IOException, ExtractionException {
// Check if findInitialCommentsToken was already called and optCommentsDisabled initialized
if (optCommentsDisabled.orElse(false)) {
return getInfoItemsPageForDisabledComments();
}
final String commentsToken = findValue(commentsTokenInside, "continuation\":\"", "\"");
// Get the token
final String commentsToken = findInitialCommentsToken();
// Check if the comments have been disabled
if (optCommentsDisabled.get()) {
return getInfoItemsPageForDisabledComments();
}
return getPage(getNextPage(commentsToken));
}
private Page getNextPage(JsonObject ajaxJson) throws ParsingException {
/**
* Finds the initial comments token and initializes commentsDisabled.
* @return the continuation token or null if none was found
*/
private String findInitialCommentsToken() {
final String continuationStartPattern = "continuation\":\"";
String commentsTokenInside = findValue(responseBody, "sectionListRenderer", "}");
if (commentsTokenInside == null || !commentsTokenInside.contains(continuationStartPattern)) {
commentsTokenInside = findValue(responseBody, "commentSectionRenderer", "}");
}
// If no continuation token is found the comments are disabled
if (commentsTokenInside == null || !commentsTokenInside.contains(continuationStartPattern)) {
optCommentsDisabled = Optional.of(true);
return null;
}
// If a continuation token is found there are >= 0 comments
final String commentsToken = findValue(commentsTokenInside, continuationStartPattern, "\"");
optCommentsDisabled = Optional.of(false);
return commentsToken;
}
private InfoItemsPage<CommentsInfoItem> getInfoItemsPageForDisabledComments() {
return new InfoItemsPage<>(Collections.emptyList(), null, Collections.emptyList());
}
private Page getNextPage(final JsonObject ajaxJson) throws ParsingException {
final JsonArray arr;
try {
arr = JsonUtils.getArray(ajaxJson, "response.continuationContents.commentSectionContinuation.continuations");
} catch (Exception e) {
} catch (final Exception e) {
return null;
}
if (arr.isEmpty()) {
return null;
}
String continuation;
final String continuation;
try {
continuation = JsonUtils.getString(arr.getObject(0), "nextContinuationData.continuation");
} catch (Exception e) {
} catch (final Exception e) {
return null;
}
return getNextPage(continuation);
}
private Page getNextPage(String continuation) throws ParsingException {
Map<String, String> params = new HashMap<>();
private Page getNextPage(final String continuation) throws ParsingException {
final Map<String, String> params = new HashMap<>();
params.put("action_get_comments", "1");
params.put("pbj", "1");
params.put("ctoken", continuation);
try {
return new Page("https://m.youtube.com/watch_comment?" + getDataString(params));
} catch (UnsupportedEncodingException e) {
} catch (final UnsupportedEncodingException e) {
throw new ParsingException("Could not get next page url", e);
}
}
@Override
public InfoItemsPage<CommentsInfoItem> getPage(final Page page) throws IOException, ExtractionException {
if (optCommentsDisabled.orElse(false)) {
return getInfoItemsPageForDisabledComments();
}
if (page == null || isNullOrEmpty(page.getUrl())) {
throw new IllegalArgumentException("Page doesn't contain an URL");
}
@ -96,7 +153,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
final JsonObject ajaxJson;
try {
ajaxJson = JsonParser.array().from(ajaxResponse).getObject(1);
} catch (Exception e) {
} catch (final Exception e) {
throw new ParsingException("Could not parse json data for comments", e);
}
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
@ -104,31 +161,32 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
return new InfoItemsPage<>(collector, getNextPage(ajaxJson));
}
private void collectCommentsFrom(CommentsInfoItemsCollector collector, JsonObject ajaxJson) throws ParsingException {
JsonArray contents;
private void collectCommentsFrom(final CommentsInfoItemsCollector collector, final JsonObject ajaxJson) throws ParsingException {
final JsonArray contents;
try {
contents = JsonUtils.getArray(ajaxJson, "response.continuationContents.commentSectionContinuation.items");
} catch (Exception e) {
} catch (final Exception e) {
//no comments
return;
}
List<Object> comments;
final List<Object> comments;
try {
comments = JsonUtils.getValues(contents, "commentThreadRenderer.comment.commentRenderer");
} catch (Exception e) {
} catch (final Exception e) {
throw new ParsingException("unable to get parse youtube comments", e);
}
for (Object c : comments) {
for (final Object c : comments) {
if (c instanceof JsonObject) {
CommentsInfoItemExtractor extractor = new YoutubeCommentsInfoItemExtractor((JsonObject) c, getUrl(), getTimeAgoParser());
final CommentsInfoItemExtractor extractor =
new YoutubeCommentsInfoItemExtractor((JsonObject) c, getUrl(), getTimeAgoParser());
collector.commit(extractor);
}
}
}
@Override
public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException {
public void onFetchPage(@Nonnull final Downloader downloader) throws IOException, ExtractionException {
final Map<String, List<String>> requestHeaders = new HashMap<>();
requestHeaders.put("User-Agent", singletonList(USER_AGENT));
final Response response = downloader.get(getUrl(), requestHeaders, getExtractorLocalization());
@ -138,8 +196,8 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
}
private String makeAjaxRequest(String siteUrl) throws IOException, ReCaptchaException {
Map<String, List<String>> requestHeaders = new HashMap<>();
private String makeAjaxRequest(final String siteUrl) throws IOException, ReCaptchaException {
final Map<String, List<String>> requestHeaders = new HashMap<>();
requestHeaders.put("Accept", singletonList("*/*"));
requestHeaders.put("User-Agent", singletonList(USER_AGENT));
requestHeaders.put("X-YouTube-Client-Version", singletonList(ytClientVersion));
@ -147,14 +205,15 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
return getDownloader().get(siteUrl, requestHeaders, getExtractorLocalization()).responseBody();
}
private String getDataString(Map<String, String> params) throws UnsupportedEncodingException {
StringBuilder result = new StringBuilder();
private String getDataString(final Map<String, String> params) throws UnsupportedEncodingException {
final StringBuilder result = new StringBuilder();
boolean first = true;
for (Map.Entry<String, String> entry : params.entrySet()) {
if (first)
for (final Map.Entry<String, String> entry : params.entrySet()) {
if (first) {
first = false;
else
} else {
result.append("&");
}
result.append(URLEncoder.encode(entry.getKey(), UTF_8));
result.append("=");
result.append(URLEncoder.encode(entry.getValue(), UTF_8));
@ -163,8 +222,28 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
}
private String findValue(final String doc, final String start, final String end) {
final int beginIndex = doc.indexOf(start) + start.length();
int beginIndex = doc.indexOf(start);
// Start string was not found
if (beginIndex == -1) {
return null;
}
beginIndex = beginIndex + start.length();
final int endIndex = doc.indexOf(end, beginIndex);
// End string was not found
if (endIndex == -1) {
return null;
}
return doc.substring(beginIndex, endIndex);
}
@Override
public boolean isCommentsDisabled() {
// Check if commentsDisabled has to be initialized
if (!optCommentsDisabled.isPresent()) {
// Initialize commentsDisabled
this.findInitialCommentsToken();
}
return optCommentsDisabled.get();
}
}