// Created by Fynn Godau 2019, licensed GNU GPL version 3 or later package org.schabi.newpipe.extractor.services.bandcamp.extractors; import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParserException; import com.grack.nanojson.JsonWriter; import org.jsoup.Jsoup; import org.jsoup.nodes.Element; import org.schabi.newpipe.extractor.Image; import org.schabi.newpipe.extractor.Image.ResolutionLevel; import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.localization.DateWrapper; import org.schabi.newpipe.extractor.utils.ImageSuffix; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.time.DateTimeException; import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; import java.util.Collections; import java.util.List; import java.util.Locale; import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; import static org.schabi.newpipe.extractor.Image.HEIGHT_UNKNOWN; import static org.schabi.newpipe.extractor.Image.WIDTH_UNKNOWN; import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; import static org.schabi.newpipe.extractor.utils.Utils.replaceHttpWithHttps; public final class BandcampExtractorHelper { /** * List of image IDs which preserve aspect ratio with their theoretical dimension known. * *
* Bandcamp images are not always squares, so images which preserve aspect ratio are only used. *
* ** One of the direct consequences of this specificity is that only one dimension of images is * known at time, depending of the image ID. *
* ** Note also that dimensions are only theoretical because if the image size is less than the * dimensions of the image ID, it will be not upscaled but kept to its original size. *
* ** IDs come from the * GitHub Gist "Bandcamp File Format Parameters" by f2k1de *
*/ private static final List* The image ID {@code 10} was chosen because it provides images wide up to 1200px (when * the original image width is more than or equal this resolution). *
* ** Other integer values are possible as well (e.g. 0 is a very large resolution, possibly the * original); see {@link #IMAGE_URL_SUFFIXES_AND_RESOLUTIONS} for more details about image * resolution IDs. *
* * @param id the image ID * @param isAlbum whether the image is the cover of an album or a track * @return a URL of the image with this ID with a width up to 1200px */ @Nonnull public static String getImageUrl(final long id, final boolean isAlbum) { return IMAGES_DOMAIN_AND_PATH + (isAlbum ? 'a' : "") + id + "_10.jpg"; } /** * @returntrue
if the given URL looks like it comes from a bandcamp custom domain
* or if it comes from bandcamp.com
itself
*/
public static boolean isSupportedDomain(final String url) throws ParsingException {
// Accept all bandcamp.com URLs
if (url.toLowerCase().matches("https?://.+\\.bandcamp\\.com(/.*)?")) {
return true;
}
try {
// Test other URLs for whether they contain a footer that links to bandcamp
return Jsoup.parse(NewPipe.getDownloader().get(url).responseBody())
.getElementById("pgFt")
.getElementById("pgFt-inner")
.getElementById("footer-logo-wrapper")
.getElementById("footer-logo")
.getElementsByClass("hiddenAccess")
.text().equals("Bandcamp");
} catch (final NullPointerException e) {
return false;
} catch (final IOException | ReCaptchaException e) {
throw new ParsingException("Could not determine whether URL is custom domain "
+ "(not available? network error?)");
}
}
/**
* Whether the URL points to a radio kiosk.
* @param url the URL to check
* @return true if the URL matches {@code https://bandcamp.com/?show=SHOW_ID}
*/
public static boolean isRadioUrl(final String url) {
return url.toLowerCase().matches("https?://bandcamp\\.com/\\?show=\\d+");
}
public static DateWrapper parseDate(final String textDate) throws ParsingException {
try {
final ZonedDateTime zonedDateTime = ZonedDateTime.parse(textDate,
DateTimeFormatter.ofPattern("dd MMM yyyy HH:mm:ss zzz", Locale.ENGLISH));
return new DateWrapper(zonedDateTime.toOffsetDateTime(), false);
} catch (final DateTimeException e) {
throw new ParsingException("Could not parse date '" + textDate + "'", e);
}
}
/**
* Get a list of images from a search result {@link Element}.
*
* * This method will call {@link #getImagesFromImageUrl(String)} using the first non null and * non empty image URL found from the {@code src} attribute of {@code img} HTML elements, or an * empty string if no valid image URL was found. *
* * @param searchResult a search result {@link Element} * @return an unmodifiable list of {@link Image}s, which is never null but can be empty, in the * case where no valid image URL was found */ @Nonnull public static List* This method will remove the image ID and its extension from the end of the URL and then call * {@link #getImagesFromImageBaseUrl(String)}. *
* * @param imageUrl the full URL of an image provided by Bandcamp, such as in its HTML code * @return an unmodifiable list of {@link Image}s, which is never null but can be empty, in the * case where the image URL has been not extracted (and so is null or empty) */ @Nonnull public static List* This method will call {@link #getImagesFromImageBaseUrl(String)}. *
* * @param id the id of an image provided by Bandcamp * @param isAlbum whether the image is the cover of an album * @return an unmodifiable list of {@link Image}s, which is never null but can be empty, in the * case where the image ID has been not extracted (and so equal to 0) */ @Nonnull public static List* Base image URLs are images containing the image path, a {@code a} letter if it comes from an * album, its ID and an underscore. *
* ** Images resolutions returned are the ones of {@link #IMAGE_URL_SUFFIXES_AND_RESOLUTIONS}. *
* * @param baseUrl the base URL of the image * @return an unmodifiable and non-empty list of {@link Image}s */ @Nonnull private static List