Rework link handlers to correctly accept external websites

This commit is contained in:
Fynn Godau 2020-12-05 15:08:26 +01:00
parent be562b8436
commit 04dd3d4d32
7 changed files with 63 additions and 22 deletions

View File

@ -123,6 +123,28 @@ public class BandcampExtractorHelper {
return "https://f4.bcbits.com/img/" + (album ? 'a' : "") + id + "_10.jpg"; return "https://f4.bcbits.com/img/" + (album ? 'a' : "") + id + "_10.jpg";
} }
/**
* @return <code>true</code> if the given url looks like it comes from a bandcamp custom domain
* or if it comes from bandcamp.com itself
*/
public static boolean isSupportedDomain(final String url) throws ParsingException {
// Accept all bandcamp.com URLs
if (url.toLowerCase().matches("https?://.+\\.bandcamp\\.com(/.*)?")) return true;
try {
// Accept all other URLs if they contain a <meta> tag that says they are generated by bandcamp
return Jsoup.parse(
NewPipe.getDownloader().get(url).responseBody()
)
.getElementsByAttributeValue("name", "generator")
.attr("content").equals("Bandcamp");
} catch (IOException | ReCaptchaException e) {
throw new ParsingException("Could not determine whether URL is custom domain " +
"(not available? network error?)");
}
}
static DateWrapper parseDate(final String textDate) throws ParsingException { static DateWrapper parseDate(final String textDate) throws ParsingException {
try { try {
final Date date = new SimpleDateFormat("dd MMM yyyy HH:mm:ss zzz", Locale.ENGLISH).parse(textDate); final Date date = new SimpleDateFormat("dd MMM yyyy HH:mm:ss zzz", Locale.ENGLISH).parse(textDate);

View File

@ -24,7 +24,7 @@ public class BandcampChannelLinkHandlerFactory extends ListLinkHandlerFactory {
try { try {
final String response = NewPipe.getDownloader().get(url).responseBody(); final String response = NewPipe.getDownloader().get(url).responseBody();
// This variable contains band data! // Use band data embedded in website to extract ID
final JsonObject bandData = BandcampExtractorHelper.getJsonData(response, "data-band"); final JsonObject bandData = BandcampExtractorHelper.getJsonData(response, "data-band");
return String.valueOf(bandData.getLong("id")); return String.valueOf(bandData.getLong("id"));
@ -51,17 +51,15 @@ public class BandcampChannelLinkHandlerFactory extends ListLinkHandlerFactory {
} }
/** /**
* Matches <code>* .bandcamp.com</code> as well as custom domains * Accepts only pages that do not lead to an album or track. Supports external pages.
* where the profile is at <code>* . * /releases</code>
*/ */
@Override @Override
public boolean onAcceptUrl(final String url) { public boolean onAcceptUrl(final String url) throws ParsingException {
// Is a subdomain of bandcamp.com? // Exclude URLs that lead to a track or album
boolean isBandcampComArtistPage = url.matches("https?://.+\\.bandcamp\\.com/?"); if (url.matches(".*/(album|track)/.*")) return false;
boolean isCustomDomainReleases = url.matches("https?://.+\\..+/releases/?(?!.)"); // Test whether domain is supported
return BandcampExtractorHelper.isSupportedDomain(url);
return isBandcampComArtistPage || isCustomDomainReleases;
} }
} }

View File

@ -4,6 +4,7 @@ package org.schabi.newpipe.extractor.services.bandcamp.linkHandler;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory;
import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper;
import java.util.List; import java.util.List;
@ -22,8 +23,16 @@ public class BandcampPlaylistLinkHandlerFactory extends ListLinkHandlerFactory {
return url; return url;
} }
/**
* Accepts all bandcamp URLs that contain /album/ behind their domain name.
*/
@Override @Override
public boolean onAcceptUrl(final String url) { public boolean onAcceptUrl(final String url) throws ParsingException {
return url.toLowerCase().matches("https?://.+\\..+/album/.+");
// Exclude URLs which do not lead to an album
if (!url.toLowerCase().matches("https?://.+\\..+/album/.+")) return false;
// Test whether domain is supported
return BandcampExtractorHelper.isSupportedDomain(url);
} }
} }

View File

@ -4,6 +4,7 @@ package org.schabi.newpipe.extractor.services.bandcamp.linkHandler;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory; import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory;
import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper;
/** /**
* <p>Tracks don't have standalone ids, they are always in combination with the band id. * <p>Tracks don't have standalone ids, they are always in combination with the band id.
@ -40,16 +41,19 @@ public class BandcampStreamLinkHandlerFactory extends LinkHandlerFactory {
} }
/** /**
* Sometimes, the root page of an artist is also an album or track * Accepts URLs that point to a bandcamp radio show or that are a bandcamp
* page. In that case, it is assumed that one actually wants to open * domain and point to a track.
* the profile and not the track it has set as the default one.
* <p>Urls are expected to be in this format to account for
* custom domains:</p>
* <code>https:// * . * /track/ *</code>
*/ */
@Override @Override
public boolean onAcceptUrl(final String url) { public boolean onAcceptUrl(final String url) throws ParsingException {
return url.toLowerCase().matches("https?://.+\\..+/track/.+")
|| url.toLowerCase().matches("https?://bandcamp\\.com/\\?show=\\d+"); // Accept Bandcamp radio
if (url.toLowerCase().matches("https?://bandcamp\\.com/\\?show=\\d+")) return true;
// Don't accept URLs that don't point to a track
if (!url.toLowerCase().matches("https?://.+\\..+/track/.+")) return false;
// Test whether domain is supported
return BandcampExtractorHelper.isSupportedDomain(url);
} }
} }

View File

@ -26,13 +26,19 @@ public class BandcampChannelLinkHandlerFactoryTest {
@Test @Test
public void testAcceptUrl() throws ParsingException { public void testAcceptUrl() throws ParsingException {
assertTrue(linkHandler.acceptUrl("http://interovgm.com/releases/")); // Bandcamp URLs
assertTrue(linkHandler.acceptUrl("https://interovgm.com/releases"));
assertTrue(linkHandler.acceptUrl("http://zachbenson.bandcamp.com")); assertTrue(linkHandler.acceptUrl("http://zachbenson.bandcamp.com"));
assertTrue(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/")); assertTrue(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/"));
assertTrue(linkHandler.acceptUrl("https://billwurtz.bandcamp.com/releases"));
assertFalse(linkHandler.acceptUrl("https://bandcamp.com")); assertFalse(linkHandler.acceptUrl("https://bandcamp.com"));
assertFalse(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/track/kitchen")); assertFalse(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/track/kitchen"));
// External URLs
assertTrue(linkHandler.acceptUrl("http://interovgm.com/releases/"));
assertTrue(linkHandler.acceptUrl("https://interovgm.com/releases"));
assertFalse(linkHandler.acceptUrl("https://example.com/releases"));
} }
@Test @Test

View File

@ -35,6 +35,7 @@ public class BandcampPlaylistLinkHandlerFactoryTest {
assertFalse(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/")); assertFalse(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/"));
assertFalse(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/track/kitchen")); assertFalse(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/track/kitchen"));
assertFalse(linkHandler.acceptUrl("https://interovgm.com/track/title")); assertFalse(linkHandler.acceptUrl("https://interovgm.com/track/title"));
assertFalse(linkHandler.acceptUrl("https://example.com/album/samplealbum"));
assertTrue(linkHandler.acceptUrl("https://powertothequeerkids.bandcamp.com/album/power-to-the-queer-kids")); assertTrue(linkHandler.acceptUrl("https://powertothequeerkids.bandcamp.com/album/power-to-the-queer-kids"));
assertTrue(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/album/prom")); assertTrue(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/album/prom"));

View File

@ -43,6 +43,7 @@ public class BandcampStreamLinkHandlerFactoryTest {
assertFalse(linkHandler.acceptUrl("https://bandcamp.com")); assertFalse(linkHandler.acceptUrl("https://bandcamp.com"));
assertFalse(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/")); assertFalse(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/"));
assertFalse(linkHandler.acceptUrl("https://powertothequeerkids.bandcamp.com/album/power-to-the-queer-kids")); assertFalse(linkHandler.acceptUrl("https://powertothequeerkids.bandcamp.com/album/power-to-the-queer-kids"));
assertFalse(linkHandler.acceptUrl("https://example.com/track/sampletrack"));
assertTrue(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/track/kitchen")); assertTrue(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/track/kitchen"));
assertTrue(linkHandler.acceptUrl("http://ZachBenson.Bandcamp.COM/Track/U-I-Tonite/")); assertTrue(linkHandler.acceptUrl("http://ZachBenson.Bandcamp.COM/Track/U-I-Tonite/"));