From 3dfbb1a2baa2a24226d01493b117b84dbb25ce84 Mon Sep 17 00:00:00 2001 From: Mauricio Colli Date: Sat, 7 Apr 2018 22:05:16 -0300 Subject: [PATCH] Lightweight method to get the client id on SoundCloud - Instead of downloading the whole ~1MB file, download just the first 16KB. With fallback to the old way in case of fail - I've come across some technical difficulties as to how to limit the download (current implementation is too basic/simple for that), but in the end, a simple http header (Range) was enough --- .../soundcloud/SoundcloudParsingHelper.java | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java index 11c78e241..317ccfb34 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java @@ -22,6 +22,7 @@ import java.net.URLEncoder; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Date; +import java.util.HashMap; import static org.schabi.newpipe.extractor.utils.Utils.replaceHttpWithHttps; @@ -35,18 +36,25 @@ public class SoundcloudParsingHelper { if (clientId != null && !clientId.isEmpty()) return clientId; Downloader dl = NewPipe.getDownloader(); - String response = dl.download("https://soundcloud.com"); + Document doc = Jsoup.parse(response); - - // TODO: Find a less heavy way to get the client_id - // Currently we are downloading a 1MB file (!) just to get the client_id, - // youtube-dl don't have a way too, they are just hardcoding and updating it when it becomes invalid. - // The embed mode has a way to get it, but we still have to download a heavy file (~800KB). Element jsElement = doc.select("script[src^=https://a-v2.sndcdn.com/assets/app]").first(); - String js = dl.download(jsElement.attr("src")); - return clientId = Parser.matchGroup1(",client_id:\"(.*?)\"", js); + final String clientIdPattern = ",client_id:\"(.*?)\""; + + try { + final HashMap headers = new HashMap<>(); + headers.put("Range", "bytes=0-16384"); + String js = dl.download(jsElement.attr("src"), headers); + + return clientId = Parser.matchGroup1(clientIdPattern, js); + } catch (IOException | RegexException ignored) { + // Ignore it and proceed to download the whole js file + } + + String js = dl.download(jsElement.attr("src")); + return clientId = Parser.matchGroup1(clientIdPattern, js); } public static String toDateString(String time) throws ParsingException {