Merge branch 'dev' of github.com:TeamNewPipe/NewPipeExtractor into channel-tabs

This commit is contained in:
ThetaDev 2022-11-29 17:50:32 +01:00
commit c156c404cb
19 changed files with 1074 additions and 54 deletions

View File

@ -30,7 +30,7 @@ allprojects {
nanojsonVersion = "1d9e1aea9049fc9f85e68b43ba39fe7be1c1f751"
spotbugsVersion = "4.7.3"
junitVersion = "5.9.1"
checkstyleVersion = "9.3" // do not use latest version (10.0) as it requires compile JDK 11
checkstyleVersion = "10.4"
}
}

View File

@ -96,7 +96,15 @@
<!-- Checks for imports -->
<!-- See https://checkstyle.org/config_import.html -->
<module name="AvoidStarImport"/>
<module name="IllegalImport"/> <!-- defaults to sun.* packages -->
<module name="IllegalImport"> <!-- defaults to sun.* packages -->
<property name="illegalClasses" value="
org.jetbrains.annotations.Nullable,
org.jetbrains.annotations.NotNull,
androidx.annotation.Nullable,
androidx.annotation.NonNull,
io.reactivex.rxjava3.annotations.NonNull,
io.reactivex.rxjava3.annotations.Nullable" />
</module>
<module name="RedundantImport"/>
<module name="UnusedImports"/>

View File

@ -28,7 +28,6 @@ dependencies {
implementation "com.github.TeamNewPipe:nanojson:$nanojsonVersion"
implementation 'org.jsoup:jsoup:1.15.3'
implementation "com.github.spotbugs:spotbugs-annotations:$spotbugsVersion"
implementation 'org.nibor.autolink:autolink:0.10.0'
// do not upgrade to 1.7.14, since in 1.7.14 Rhino uses the `SourceVersion` class, which is not
// available on Android (even when using desugaring), and `NoClassDefFoundError` is thrown

View File

@ -3,13 +3,14 @@ package org.schabi.newpipe.extractor.comments;
import org.schabi.newpipe.extractor.InfoItem;
import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.stream.Description;
import javax.annotation.Nullable;
public class CommentsInfoItem extends InfoItem {
private String commentId;
private String commentText;
private Description commentText;
private String uploaderName;
private String uploaderAvatarUrl;
private String uploaderUrl;
@ -43,11 +44,11 @@ public class CommentsInfoItem extends InfoItem {
this.commentId = commentId;
}
public String getCommentText() {
public Description getCommentText() {
return commentText;
}
public void setCommentText(final String commentText) {
public void setCommentText(final Description commentText) {
this.commentText = commentText;
}

View File

@ -5,6 +5,7 @@ import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsInfoItemExtractor;
import org.schabi.newpipe.extractor.stream.Description;
import org.schabi.newpipe.extractor.stream.StreamExtractor;
import javax.annotation.Nullable;
@ -41,8 +42,8 @@ public interface CommentsInfoItemExtractor extends InfoItemExtractor {
/**
* The text of the comment
*/
default String getCommentText() throws ParsingException {
return "";
default Description getCommentText() throws ParsingException {
return Description.EMPTY_DESCRIPTION;
}
/**

View File

@ -3,6 +3,7 @@ package org.schabi.newpipe.extractor.services.bandcamp.extractors;
import org.jsoup.nodes.Element;
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.stream.Description;
import java.util.Objects;
@ -18,7 +19,7 @@ public class BandcampCommentsInfoItemExtractor implements CommentsInfoItemExtrac
@Override
public String getName() throws ParsingException {
return getCommentText();
return getCommentText().getContent();
}
@Override
@ -32,12 +33,14 @@ public class BandcampCommentsInfoItemExtractor implements CommentsInfoItemExtrac
}
@Override
public String getCommentText() throws ParsingException {
return writing.getElementsByClass("text").stream()
public Description getCommentText() throws ParsingException {
final var text = writing.getElementsByClass("text").stream()
.filter(Objects::nonNull)
.map(Element::ownText)
.findFirst()
.orElseThrow(() -> new ParsingException("Could not get comment text"));
return new Description(text, Description.PLAIN_TEXT);
}
@Override

View File

@ -9,6 +9,7 @@ import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper;
import org.schabi.newpipe.extractor.stream.Description;
import org.schabi.newpipe.extractor.utils.JsonUtils;
import java.util.Objects;
@ -59,13 +60,15 @@ public class PeertubeCommentsInfoItemExtractor implements CommentsInfoItemExtrac
}
@Override
public String getCommentText() throws ParsingException {
public Description getCommentText() throws ParsingException {
final String htmlText = JsonUtils.getString(item, "text");
try {
final Document doc = Jsoup.parse(htmlText);
return doc.body().text();
final var text = doc.body().text();
return new Description(text, Description.PLAIN_TEXT);
} catch (final Exception e) {
return htmlText.replaceAll("(?s)<[^>]*>(\\s*<[^>]*>)*", "");
final var text = htmlText.replaceAll("(?s)<[^>]*>(\\s*<[^>]*>)*", "");
return new Description(text, Description.PLAIN_TEXT);
}
}

View File

@ -5,6 +5,7 @@ import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.services.soundcloud.SoundcloudParsingHelper;
import org.schabi.newpipe.extractor.stream.Description;
import javax.annotation.Nullable;
import java.util.Objects;
@ -24,8 +25,8 @@ public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtr
}
@Override
public String getCommentText() {
return json.getString("body");
public Description getCommentText() {
return new Description(json.getString("body"), Description.PLAIN_TEXT);
}
@Override

View File

@ -944,18 +944,50 @@ public final class YoutubeParsingHelper {
}
final StringBuilder textBuilder = new StringBuilder();
for (final Object textPart : textObject.getArray("runs")) {
final String text = ((JsonObject) textPart).getString("text");
if (html && ((JsonObject) textPart).has("navigationEndpoint")) {
final String url = getUrlFromNavigationEndpoint(((JsonObject) textPart)
.getObject("navigationEndpoint"));
if (!isNullOrEmpty(url)) {
textBuilder.append("<a href=\"").append(url).append("\">").append(text)
.append("</a>");
continue;
for (final Object o : textObject.getArray("runs")) {
final JsonObject run = (JsonObject) o;
String text = run.getString("text");
if (html) {
if (run.has("navigationEndpoint")) {
final String url = getUrlFromNavigationEndpoint(run
.getObject("navigationEndpoint"));
if (!isNullOrEmpty(url)) {
text = "<a href=\"" + url + "\">" + text + "</a>";
}
}
final boolean bold = run.has("bold")
&& run.getBoolean("bold");
final boolean italic = run.has("italics")
&& run.getBoolean("italics");
final boolean strikethrough = run.has("strikethrough")
&& run.getBoolean("strikethrough");
if (bold) {
textBuilder.append("<b>");
}
if (italic) {
textBuilder.append("<i>");
}
if (strikethrough) {
textBuilder.append("<s>");
}
textBuilder.append(text);
if (strikethrough) {
textBuilder.append("</s>");
}
if (italic) {
textBuilder.append("</i>");
}
if (bold) {
textBuilder.append("</b>");
}
} else {
textBuilder.append(text);
}
textBuilder.append(text);
}
String text = textBuilder.toString();
@ -995,7 +1027,7 @@ public final class YoutubeParsingHelper {
final StringBuilder textBuilder = new StringBuilder();
int textStart = 0;
for (final Object commandRun: commandRuns) {
for (final Object commandRun : commandRuns) {
if (!(commandRun instanceof JsonObject)) {
continue;
}

View File

@ -209,14 +209,14 @@ public class YoutubeService extends StreamingService {
// https://www.youtube.com/picker_ajax?action_country_json=1
private static final List<ContentCountry> SUPPORTED_COUNTRIES = ContentCountry.listFrom(
"DZ", "AR", "AU", "AT", "AZ", "BH", "BD", "BY", "BE", "BO", "BA", "BR", "BG", "CA",
"CL", "CO", "CR", "HR", "CY", "CZ", "DK", "DO", "EC", "EG", "SV", "EE", "FI", "FR",
"GE", "DE", "GH", "GR", "GT", "HN", "HK", "HU", "IS", "IN", "ID", "IQ", "IE", "IL",
"IT", "JM", "JP", "JO", "KZ", "KE", "KW", "LV", "LB", "LY", "LI", "LT", "LU", "MY",
"MT", "MX", "ME", "MA", "NP", "NL", "NZ", "NI", "NG", "MK", "NO", "OM", "PK", "PA",
"PG", "PY", "PE", "PH", "PL", "PT", "PR", "QA", "RO", "RU", "SA", "SN", "RS", "SG",
"SK", "SI", "ZA", "KR", "ES", "LK", "SE", "CH", "TW", "TZ", "TH", "TN", "TR", "UG",
"UA", "AE", "GB", "US", "UY", "VE", "VN", "YE", "ZW"
"DZ", "AR", "AU", "AT", "AZ", "BH", "BD", "BY", "BE", "BO", "BA", "BR", "BG", "KH",
"CA", "CL", "CO", "CR", "HR", "CY", "CZ", "DK", "DO", "EC", "EG", "SV", "EE", "FI",
"FR", "GE", "DE", "GH", "GR", "GT", "HN", "HK", "HU", "IS", "IN", "ID", "IQ", "IE",
"IL", "IT", "JM", "JP", "JO", "KZ", "KE", "KW", "LA", "LV", "LB", "LY", "LI", "LT",
"LU", "MY", "MT", "MX", "ME", "MA", "NP", "NL", "NZ", "NI", "NG", "MK", "NO", "OM",
"PK", "PA", "PG", "PY", "PE", "PH", "PL", "PT", "PR", "QA", "RO", "RU", "SA", "SN",
"RS", "SG", "SK", "SI", "ZA", "KR", "ES", "LK", "SE", "CH", "TW", "TZ", "TH", "TN",
"TR", "UG", "UA", "AE", "GB", "US", "UY", "VE", "VN", "YE", "ZW"
);
@Override

View File

@ -1,21 +1,21 @@
package org.schabi.newpipe.extractor.services.youtube.extractors;
import static org.schabi.newpipe.extractor.comments.CommentsInfoItem.UNKNOWN_REPLY_COUNT;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.stream.Description;
import org.schabi.newpipe.extractor.utils.JsonUtils;
import org.schabi.newpipe.extractor.utils.Utils;
import javax.annotation.Nullable;
import static org.schabi.newpipe.extractor.comments.CommentsInfoItem.UNKNOWN_REPLY_COUNT;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
private final JsonObject json;
@ -176,18 +176,20 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
}
@Override
public String getCommentText() throws ParsingException {
public Description getCommentText() throws ParsingException {
try {
final JsonObject contentText = JsonUtils.getObject(getCommentRenderer(), "contentText");
if (contentText.isEmpty()) {
// completely empty comments as described in
// https://github.com/TeamNewPipe/NewPipeExtractor/issues/380#issuecomment-668808584
return "";
return Description.EMPTY_DESCRIPTION;
}
final String commentText = getTextFromObject(contentText);
final String commentText = getTextFromObject(contentText, true);
// YouTube adds U+FEFF in some comments.
// eg. https://www.youtube.com/watch?v=Nj4F63E59io<feff>
return Utils.removeUTF8BOM(commentText);
final String commentTextBomRemoved = Utils.removeUTF8BOM(commentText);
return new Description(commentTextBomRemoved, Description.HTML);
} catch (final Exception e) {
throw new ParsingException("Could not get comment text", e);
}

View File

@ -943,7 +943,8 @@ public class YoutubeStreamExtractor extends StreamExtractor {
}
}
if (status.equalsIgnoreCase("unplayable") && reason != null) {
if ((status.equalsIgnoreCase("unplayable") || status.equalsIgnoreCase("error"))
&& reason != null) {
if (reason.contains("Music Premium")) {
throw new YoutubeMusicPremiumContentException();
}
@ -963,6 +964,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
if (detailedErrorMessage != null && detailedErrorMessage.contains("country")) {
throw new GeographicRestrictionException(
"This video is not available in client's country.");
} else if (detailedErrorMessage != null) {
throw new ContentNotAvailableException(detailedErrorMessage);
} else {
throw new ContentNotAvailableException(reason);
}
}
}

View File

@ -42,7 +42,7 @@ public class BandcampCommentsExtractorTest {
for (CommentsInfoItem c : comments.getItems()) {
assertFalse(Utils.isBlank(c.getUploaderName()));
assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
assertFalse(Utils.isBlank(c.getCommentText()));
assertFalse(Utils.isBlank(c.getCommentText().getContent()));
assertFalse(Utils.isBlank(c.getName()));
assertFalse(Utils.isBlank(c.getThumbnailUrl()));
assertFalse(Utils.isBlank(c.getUrl()));

View File

@ -75,7 +75,7 @@ public class PeertubeCommentsExtractorTest {
assertFalse(Utils.isBlank(c.getUploaderName()));
assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
assertFalse(Utils.isBlank(c.getCommentId()));
assertFalse(Utils.isBlank(c.getCommentText()));
assertFalse(Utils.isBlank(c.getCommentText().getContent()));
assertFalse(Utils.isBlank(c.getName()));
assertFalse(Utils.isBlank(c.getTextualUploadDate()));
assertFalse(Utils.isBlank(c.getThumbnailUrl()));
@ -91,7 +91,7 @@ public class PeertubeCommentsExtractorTest {
private boolean findInComments(List<CommentsInfoItem> comments, String comment) {
for (CommentsInfoItem c : comments) {
if (c.getCommentText().contains(comment)) {
if (c.getCommentText().getContent().contains(comment)) {
return true;
}
}

View File

@ -5,6 +5,7 @@ import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.schabi.newpipe.extractor.ExtractorAsserts.assertContains;
import static org.schabi.newpipe.extractor.ExtractorAsserts.assertGreater;
import static org.schabi.newpipe.extractor.ServiceList.YouTube;
import static org.schabi.newpipe.extractor.comments.CommentsInfoItem.UNKNOWN_REPLY_COUNT;
@ -95,7 +96,7 @@ public class YoutubeCommentsExtractorTest {
assertFalse(Utils.isBlank(c.getUploaderName()));
assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
assertFalse(Utils.isBlank(c.getCommentId()));
assertFalse(Utils.isBlank(c.getCommentText()));
assertFalse(Utils.isBlank(c.getCommentText().getContent()));
assertFalse(Utils.isBlank(c.getName()));
assertFalse(Utils.isBlank(c.getTextualUploadDate()));
assertNotNull(c.getUploadDate());
@ -111,7 +112,7 @@ public class YoutubeCommentsExtractorTest {
private boolean findInComments(List<CommentsInfoItem> comments, String comment) {
for (CommentsInfoItem c : comments) {
if (c.getCommentText().contains(comment)) {
if (c.getCommentText().getContent().contains(comment)) {
return true;
}
}
@ -152,9 +153,9 @@ public class YoutubeCommentsExtractorTest {
assertFalse(Utils.isBlank(c.getUrl()));
assertTrue(c.getLikeCount() >= 0);
if (c.getCommentId().equals("Ugga_h1-EXdHB3gCoAEC")) { // comment without text
assertTrue(Utils.isBlank(c.getCommentText()));
assertTrue(Utils.isBlank(c.getCommentText().getContent()));
} else {
assertFalse(Utils.isBlank(c.getCommentText()));
assertFalse(Utils.isBlank(c.getCommentText().getContent()));
}
}
}
@ -193,7 +194,7 @@ public class YoutubeCommentsExtractorTest {
assertFalse(Utils.isBlank(c.getThumbnailUrl()));
assertFalse(Utils.isBlank(c.getUrl()));
assertTrue(c.getLikeCount() >= 0);
assertFalse(Utils.isBlank(c.getCommentText()));
assertFalse(Utils.isBlank(c.getCommentText().getContent()));
if (c.isHeartedByUploader()) {
heartedByUploader = true;
}
@ -233,7 +234,7 @@ public class YoutubeCommentsExtractorTest {
assertFalse(Utils.isBlank(c.getThumbnailUrl()));
assertFalse(Utils.isBlank(c.getUrl()));
assertTrue(c.getLikeCount() >= 0);
assertFalse(Utils.isBlank(c.getCommentText()));
assertFalse(Utils.isBlank(c.getCommentText().getContent()));
}
assertTrue(comments.getItems().get(0).isPinned(), "First comment isn't pinned");
@ -328,7 +329,7 @@ public class YoutubeCommentsExtractorTest {
InfoItemsPage<CommentsInfoItem> replies = extractor.getPage(firstComment.getReplies());
assertEquals("First", replies.getItems().get(0).getCommentText(),
assertEquals("First", replies.getItems().get(0).getCommentText().getContent(),
"First reply comment did not match");
}
@ -344,4 +345,32 @@ public class YoutubeCommentsExtractorTest {
assertGreater(300, firstComment.getReplyCount());
}
}
public static class FormattingTest {
private final static String url = "https://www.youtube.com/watch?v=zYpyS2HaZHM";
private static YoutubeCommentsExtractor extractor;
@BeforeAll
public static void setUp() throws Exception {
YoutubeTestsUtils.ensureStateless();
NewPipe.init(DownloaderFactory.getDownloader(RESOURCE_PATH + "formatting"));
extractor = (YoutubeCommentsExtractor) YouTube
.getCommentsExtractor(url);
extractor.fetchPage();
}
@Test
public void testGetCommentsFormatting() throws IOException, ExtractionException {
final InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
DefaultTests.defaultTestListOfItems(YouTube, comments.getItems(), comments.getErrors());
final CommentsInfoItem firstComment = comments.getItems().get(0);
assertContains("<s>", firstComment.getCommentText());
assertContains("<b>", firstComment.getCommentText());
}
}
}

View File

@ -0,0 +1,82 @@
{
"request": {
"httpMethod": "GET",
"url": "https://www.youtube.com/sw.js",
"headers": {
"Origin": [
"https://www.youtube.com"
],
"Referer": [
"https://www.youtube.com"
],
"Accept-Language": [
"en-GB, en;q\u003d0.9"
]
},
"localization": {
"languageCode": "en",
"countryCode": "GB"
}
},
"response": {
"responseCode": 200,
"responseMessage": "",
"responseHeaders": {
"access-control-allow-credentials": [
"true"
],
"access-control-allow-origin": [
"https://www.youtube.com"
],
"alt-svc": [
"h3\u003d\":443\"; ma\u003d2592000,h3-29\u003d\":443\"; ma\u003d2592000,h3-Q050\u003d\":443\"; ma\u003d2592000,h3-Q046\u003d\":443\"; ma\u003d2592000,h3-Q043\u003d\":443\"; ma\u003d2592000,quic\u003d\":443\"; ma\u003d2592000; v\u003d\"46,43\""
],
"cache-control": [
"private, max-age\u003d0"
],
"content-type": [
"text/javascript; charset\u003dutf-8"
],
"cross-origin-opener-policy-report-only": [
"same-origin; report-to\u003d\"youtube_main\""
],
"date": [
"Mon, 28 Nov 2022 20:27:36 GMT"
],
"expires": [
"Mon, 28 Nov 2022 20:27:36 GMT"
],
"p3p": [
"CP\u003d\"This is not a P3P policy! See http://support.google.com/accounts/answer/151657?hl\u003den-GB for more info.\""
],
"permissions-policy": [
"ch-ua-arch\u003d*, ch-ua-bitness\u003d*, ch-ua-full-version\u003d*, ch-ua-full-version-list\u003d*, ch-ua-model\u003d*, ch-ua-wow64\u003d*, ch-ua-platform\u003d*, ch-ua-platform-version\u003d*"
],
"report-to": [
"{\"group\":\"youtube_main\",\"max_age\":2592000,\"endpoints\":[{\"url\":\"https://csp.withgoogle.com/csp/report-to/youtube_main\"}]}"
],
"server": [
"ESF"
],
"set-cookie": [
"YSC\u003ddaTQ98V-voQ; Domain\u003d.youtube.com; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
"VISITOR_INFO1_LIVE\u003d; Domain\u003d.youtube.com; Expires\u003dTue, 03-Mar-2020 20:27:36 GMT; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
"CONSENT\u003dPENDING+452; expires\u003dWed, 27-Nov-2024 20:27:36 GMT; path\u003d/; domain\u003d.youtube.com; Secure"
],
"strict-transport-security": [
"max-age\u003d31536000"
],
"x-content-type-options": [
"nosniff"
],
"x-frame-options": [
"SAMEORIGIN"
],
"x-xss-protection": [
"0"
]
},
"responseBody": "\n self.addEventListener(\u0027install\u0027, event \u003d\u003e {\n event.waitUntil(self.skipWaiting());\n });\n self.addEventListener(\u0027activate\u0027, event \u003d\u003e {\n event.waitUntil(\n self.clients.claim().then(() \u003d\u003e self.registration.unregister()));\n });\n ",
"latestUrl": "https://www.youtube.com/sw.js"
}
}