fixed fetching youtube client name

This commit is contained in:
Ritvik Saraf 2018-12-21 09:35:00 +05:30
parent 4c49a347f6
commit 9ad102df3f
2 changed files with 20 additions and 8 deletions

View File

@ -7,6 +7,8 @@ import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.annotation.Nonnull;
@ -25,6 +27,7 @@ import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
import org.schabi.newpipe.extractor.utils.JsonUtils;
import org.schabi.newpipe.extractor.utils.Localization;
import org.schabi.newpipe.extractor.utils.Parser;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
@ -34,6 +37,7 @@ import com.grack.nanojson.JsonParser;
public class YoutubeCommentsExtractor extends CommentsExtractor {
private static final String USER_AGENT = "Mozilla/5.0 (Android 8.1.0; Mobile; rv:62.0) Gecko/62.0 Firefox/62.0";
private static final Pattern YT_CLIENT_NAME_PATTERN = Pattern.compile("INNERTUBE_CONTEXT_CLIENT_NAME\\\":(.*?)[,}]");
private String ytClientVersion;
private String ytClientName;
@ -150,7 +154,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
DownloadResponse response = downloader.get(getUrl(), request);
String responseBody = response.getResponseBody();
ytClientVersion = findValue(responseBody, "INNERTUBE_CONTEXT_CLIENT_VERSION\":\"", "\"");
ytClientName = findValue(responseBody, "INNERTUBE_CONTEXT_CLIENT_NAME\":", ",");
ytClientName = Parser.matchGroup1(YT_CLIENT_NAME_PATTERN, responseBody);
String commentsTokenInside = findValue(responseBody, "commentSectionRenderer", "}");
String commentsToken = findValue(commentsTokenInside, "continuation\":\"", "\"");
initPage = getPage(getNextPageUrl(commentsToken));

View File

@ -1,10 +1,5 @@
package org.schabi.newpipe.extractor.utils;
import org.nibor.autolink.LinkExtractor;
import org.nibor.autolink.LinkSpan;
import org.nibor.autolink.LinkType;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.util.ArrayList;
@ -14,6 +9,11 @@ import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.nibor.autolink.LinkExtractor;
import org.nibor.autolink.LinkSpan;
import org.nibor.autolink.LinkType;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
/*
* Created by Christian Schabesberger on 02.02.16.
*
@ -51,18 +51,26 @@ public class Parser {
public static String matchGroup1(String pattern, String input) throws RegexException {
return matchGroup(pattern, input, 1);
}
public static String matchGroup1(Pattern pattern, String input) throws RegexException {
return matchGroup(pattern, input, 1);
}
public static String matchGroup(String pattern, String input, int group) throws RegexException {
Pattern pat = Pattern.compile(pattern);
return matchGroup(pat, input, group);
}
public static String matchGroup(Pattern pat, String input, int group) throws RegexException {
Matcher mat = pat.matcher(input);
boolean foundMatch = mat.find();
if (foundMatch) {
return mat.group(group);
} else {
if (input.length() > 1024) {
throw new RegexException("failed to find pattern \"" + pattern);
throw new RegexException("failed to find pattern \"" + pat.pattern());
} else {
throw new RegexException("failed to find pattern \"" + pattern + " inside of " + input + "\"");
throw new RegexException("failed to find pattern \"" + pat.pattern() + " inside of " + input + "\"");
}
}
}