Merge pull request #618 from TeamNewPipe/soundcloud_id

Fix Soundcloud extraction
This commit is contained in:
Tobi 2021-04-26 18:03:26 +02:00 committed by GitHub
commit 7cf8edd5ac
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 36 additions and 16 deletions

View File

@ -19,6 +19,7 @@ import org.schabi.newpipe.extractor.services.soundcloud.extractors.SoundcloudCha
import org.schabi.newpipe.extractor.services.soundcloud.extractors.SoundcloudStreamExtractor;
import org.schabi.newpipe.extractor.services.soundcloud.extractors.SoundcloudStreamInfoItemExtractor;
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
import org.schabi.newpipe.extractor.utils.JsonUtils;
import org.schabi.newpipe.extractor.utils.Parser;
import org.schabi.newpipe.extractor.utils.Parser.RegexException;
import org.schabi.newpipe.extractor.utils.Utils;
@ -40,7 +41,7 @@ import static org.schabi.newpipe.extractor.ServiceList.SoundCloud;
import static org.schabi.newpipe.extractor.utils.Utils.*;
public class SoundcloudParsingHelper {
private static final String HARDCODED_CLIENT_ID = "Kl9G8jQT22DxqatQk09IjWRujGlut5Vd"; // Updated on 04/03/21
private static final String HARDCODED_CLIENT_ID = "NcIaRZItQCNQp3Vq9Plvzf7tvjmVJnF6"; // Updated on 26/04/21
private static String clientId;
private SoundcloudParsingHelper() {
@ -140,27 +141,37 @@ public class SoundcloudParsingHelper {
}
/**
* Fetch the embed player with the url and return the id (like the id from the json api).
* Fetch the widget API with the url and return the id (like the id from the json api).
*
* @return the resolved id
*/
public static String resolveIdWithEmbedPlayer(String urlString) throws IOException, ReCaptchaException, ParsingException {
public static String resolveIdWithWidgetApi(String urlString) throws IOException, ReCaptchaException, ParsingException {
// Remove the tailing slash from URLs due to issues with the SoundCloud API
if (urlString.charAt(urlString.length() - 1) == '/') urlString = urlString.substring(0, urlString.length() - 1);
// Make URL lower case and remove www. if it exists.
// Without doing this, the widget API does not recognize the URL.
urlString = Utils.removeWWWFromUrl(urlString.toLowerCase());
URL url;
final URL url;
try {
url = Utils.stringToURL(urlString);
} catch (MalformedURLException e) {
throw new IllegalArgumentException("The given URL is not valid");
}
String response = NewPipe.getDownloader().get("https://w.soundcloud.com/player/?url="
+ URLEncoder.encode(url.toString(), UTF_8), SoundCloud.getLocalization()).responseBody();
// handle playlists / sets different and get playlist id via uir field in JSON
if (url.getPath().contains("/sets/") && !url.getPath().endsWith("/sets"))
return Parser.matchGroup1("\"uri\":\\s*\"https:\\/\\/api\\.soundcloud\\.com\\/playlists\\/((\\d)*?)\"", response);
return Parser.matchGroup1(",\"id\":(([^}\\n])*?),", response);
try {
final String widgetUrl = "https://api-widget.soundcloud.com/resolve?url="
+ URLEncoder.encode(url.toString(), UTF_8)
+ "&format=json&client_id=" + SoundcloudParsingHelper.clientId();
final String response = NewPipe.getDownloader().get(widgetUrl,
SoundCloud.getLocalization()).responseBody();
final JsonObject o = JsonParser.object().from(response);
return String.valueOf(JsonUtils.getValue(o, "id"));
} catch (JsonParserException e) {
throw new ParsingException("Could not parse JSON response", e);
} catch (ExtractionException e) {
throw new ParsingException("Could not resolve id with embedded player. ClientId not extracted", e);
}
}
/**

View File

@ -23,7 +23,7 @@ public class SoundcloudChannelLinkHandlerFactory extends ListLinkHandlerFactory
Utils.checkUrl(URL_PATTERN, url);
try {
return SoundcloudParsingHelper.resolveIdWithEmbedPlayer(url);
return SoundcloudParsingHelper.resolveIdWithWidgetApi(url);
} catch (Exception e) {
throw new ParsingException(e.getMessage(), e);
}

View File

@ -22,7 +22,7 @@ public class SoundcloudPlaylistLinkHandlerFactory extends ListLinkHandlerFactory
Utils.checkUrl(URL_PATTERN, url);
try {
return SoundcloudParsingHelper.resolveIdWithEmbedPlayer(url);
return SoundcloudParsingHelper.resolveIdWithWidgetApi(url);
} catch (Exception e) {
throw new ParsingException("Could not get id of url: " + url + " " + e.getMessage(), e);
}

View File

@ -32,7 +32,7 @@ public class SoundcloudStreamLinkHandlerFactory extends LinkHandlerFactory {
Utils.checkUrl(URL_PATTERN, url);
try {
return SoundcloudParsingHelper.resolveIdWithEmbedPlayer(url);
return SoundcloudParsingHelper.resolveIdWithWidgetApi(url);
} catch (Exception e) {
throw new ParsingException(e.getMessage(), e);
}

View File

@ -7,6 +7,7 @@ import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLDecoder;
import java.util.*;
import java.util.regex.Pattern;
public class Utils {
@ -14,6 +15,7 @@ public class Utils {
public static final String HTTPS = "https://";
public static final String UTF_8 = "UTF-8";
public static final String EMPTY_STRING = "";
private static final Pattern WWW_PATTERN = Pattern.compile("(https?)?:\\/\\/www\\.");
private Utils() {
//no instance
@ -170,6 +172,13 @@ public class Utils {
return setsNoPort || usesDefaultPort;
}
public static String removeWWWFromUrl(String url) {
if (WWW_PATTERN.matcher(url).find()) {
return url.replace("www.", "");
}
return url;
}
public static String removeUTF8BOM(String s) {
if (s.startsWith("\uFEFF")) {
s = s.substring(1);

View File

@ -29,9 +29,9 @@ public class SoundcloudParsingHelperTest {
}
@Test
public void resolveIdWithEmbedPlayerTest() throws Exception {
Assert.assertEquals("26057743", SoundcloudParsingHelper.resolveIdWithEmbedPlayer("https://soundcloud.com/trapcity"));
Assert.assertEquals("16069159", SoundcloudParsingHelper.resolveIdWithEmbedPlayer("https://soundcloud.com/nocopyrightsounds"));
public void resolveIdWithWidgetApiTest() throws Exception {
Assert.assertEquals("26057743", SoundcloudParsingHelper.resolveIdWithWidgetApi("https://soundcloud.com/trapcity"));
Assert.assertEquals("16069159", SoundcloudParsingHelper.resolveIdWithWidgetApi("https://soundcloud.com/nocopyrightsounds"));
}