From 98f49852d79712b4c839c92e7f75c8094315cb6c Mon Sep 17 00:00:00 2001 From: Connectety-W Date: Sun, 13 Jan 2019 12:52:07 +0100 Subject: [PATCH 1/5] refactored YouTube-linkHandler to use less regex and more URL-methods --- .../YoutubeChannelLinkHandlerFactory.java | 48 ++++- .../linkHandler/YoutubeParsingHelper.java | 38 ++++ .../YoutubePlaylistLinkHandlerFactory.java | 35 +++- .../YoutubeStreamLinkHandlerFactory.java | 180 +++++++++++------- .../YoutubeTrendingLinkHandlerFactory.java | 13 +- .../schabi/newpipe/extractor/utils/Utils.java | 43 ++++- .../YoutubeStreamLinkHandlerFactoryTest.java | 12 +- 7 files changed, 277 insertions(+), 92 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java index 950bab2b9..da207b278 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java @@ -1,9 +1,9 @@ package org.schabi.newpipe.extractor.services.youtube.linkHandler; -import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; import org.schabi.newpipe.extractor.exceptions.ParsingException; -import org.schabi.newpipe.extractor.utils.Parser; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; +import java.net.URL; import java.util.List; /* @@ -29,25 +29,53 @@ import java.util.List; public class YoutubeChannelLinkHandlerFactory extends ListLinkHandlerFactory { private static final YoutubeChannelLinkHandlerFactory instance = new YoutubeChannelLinkHandlerFactory(); - private static final String ID_PATTERN = "/(user/[A-Za-z0-9_-]*|channel/[A-Za-z0-9_-]*)"; public static YoutubeChannelLinkHandlerFactory getInstance() { return instance; } - @Override - public String getId(String url) throws ParsingException { - return Parser.matchGroup1(ID_PATTERN, url); - } - @Override public String getUrl(String id, List contentFilters, String searchFilter) { return "https://www.youtube.com/" + id; } + @Override + public String getId(String url) throws ParsingException { + try { + URL urlObj = new URL(url); + String path = urlObj.getPath(); + + if (!(YoutubeParsingHelper.isYoutubeURL(urlObj) || urlObj.getHost().equalsIgnoreCase("hooktube.com"))) { + throw new ParsingException("the URL given is not a Youtube-URL"); + } + + if (!path.startsWith("/user/") && !path.startsWith("/channel/")) { + throw new ParsingException("the URL given is neither a channel nor an user"); + } + + // remove leading "/" + path = path.substring(1); + + String[] splitPath = path.split("/"); + String id = splitPath[1]; + + if (id == null || !id.matches("[A-Za-z0-9_-]+")) { + throw new ParsingException("The given id is not a Youtube-Video-ID"); + } + + return splitPath[0] + "/" + id; + } catch (final Exception exception) { + throw new ParsingException("Error could not parse url :" + exception.getMessage(), exception); + } + } + @Override public boolean onAcceptUrl(String url) { - return (url.contains("youtube") || url.contains("youtu.be") || url.contains("hooktube.com")) - && (url.contains("/user/") || url.contains("/channel/")); + try { + getId(url); + } catch (ParsingException e) { + return false; + } + return true; } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java index 84f1f1351..335bc5bf6 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java @@ -3,6 +3,8 @@ package org.schabi.newpipe.extractor.services.youtube.linkHandler; import org.schabi.newpipe.extractor.exceptions.ParsingException; +import java.net.URL; + /* * Created by Christian Schabesberger on 02.03.16. * @@ -28,6 +30,42 @@ public class YoutubeParsingHelper { private YoutubeParsingHelper() { } + private static boolean isHTTP(URL url) { + // make sure its http or https + String protocol = url.getProtocol(); + if (!protocol.equals("http") && !protocol.equals("https")) { + return false; + } + + boolean usesDefaultPort = url.getPort() == url.getDefaultPort(); + boolean setsNoPort = url.getPort() == -1; + + return setsNoPort || usesDefaultPort; + } + + public static boolean isYoutubeURL(URL url) { + // make sure its http or https + if (!isHTTP(url)) + return false; + + // make sure its a known youtube url + String host = url.getHost(); + return host.equalsIgnoreCase("youtube.com") || host.equalsIgnoreCase("www.youtube.com") + || host.equalsIgnoreCase("m.youtube.com"); + } + + public static boolean isYoutubeALikeURL(URL url) { + // make sure its http or https + if (!isHTTP(url)) + return false; + + // make sure its a known youtube url + String host = url.getHost(); + return host.equalsIgnoreCase("youtube.com") || host.equalsIgnoreCase("www.youtube.com") + || host.equalsIgnoreCase("m.youtube.com") || host.equalsIgnoreCase("www.youtube-nocookie.com") + || host.equalsIgnoreCase("youtu.be") || host.equalsIgnoreCase("hooktube.com"); + } + public static long parseDurationString(String input) throws ParsingException, NumberFormatException { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubePlaylistLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubePlaylistLinkHandlerFactory.java index 9954634fc..91944122d 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubePlaylistLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubePlaylistLinkHandlerFactory.java @@ -1,16 +1,15 @@ package org.schabi.newpipe.extractor.services.youtube.linkHandler; - -import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; import org.schabi.newpipe.extractor.exceptions.ParsingException; -import org.schabi.newpipe.extractor.utils.Parser; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; +import org.schabi.newpipe.extractor.utils.Utils; +import java.net.URL; import java.util.List; public class YoutubePlaylistLinkHandlerFactory extends ListLinkHandlerFactory { private static final YoutubePlaylistLinkHandlerFactory instance = new YoutubePlaylistLinkHandlerFactory(); - private static final String ID_PATTERN = "([\\-a-zA-Z0-9_]{10,})"; public static YoutubePlaylistLinkHandlerFactory getInstance() { return instance; @@ -24,17 +23,35 @@ public class YoutubePlaylistLinkHandlerFactory extends ListLinkHandlerFactory { @Override public String getId(String url) throws ParsingException { try { - return Parser.matchGroup1("list=" + ID_PATTERN, url); + URL urlObj = new URL(url); + + if (!YoutubeParsingHelper.isYoutubeURL(urlObj)) { + throw new ParsingException("the url given is not a Youtube-URL"); + } + + String listID = Utils.getQueryValue(urlObj, "list"); + + if (listID == null) { + throw new ParsingException("the url given does not include a playlist"); + } + + if (!listID.matches("[a-zA-Z0-9_-]{10,}")) { + throw new ParsingException("the list-ID given in the URL does not match the list pattern"); + } + + return listID; } catch (final Exception exception) { throw new ParsingException("Error could not parse url :" + exception.getMessage(), exception); } } - @Override public boolean onAcceptUrl(final String url) { - final boolean hasNotEmptyUrl = url != null && !url.isEmpty(); - final boolean isYoutubeDomain = hasNotEmptyUrl && (url.contains("youtube") || url.contains("youtu.be")); - return isYoutubeDomain && url.contains("list="); + try { + getId(url); + } catch (ParsingException e) { + return false; + } + return true; } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeStreamLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeStreamLinkHandlerFactory.java index 5d07779cf..e479e74c9 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeStreamLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeStreamLinkHandlerFactory.java @@ -1,21 +1,14 @@ package org.schabi.newpipe.extractor.services.youtube.linkHandler; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.schabi.newpipe.extractor.Downloader; -import org.schabi.newpipe.extractor.NewPipe; -import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory; import org.schabi.newpipe.extractor.exceptions.FoundAdException; import org.schabi.newpipe.extractor.exceptions.ParsingException; -import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; -import org.schabi.newpipe.extractor.utils.Parser; +import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory; +import org.schabi.newpipe.extractor.utils.Utils; -import java.io.IOException; -import java.io.UnsupportedEncodingException; +import java.net.MalformedURLException; import java.net.URI; import java.net.URISyntaxException; -import java.net.URLDecoder; +import java.net.URL; /* * Created by Christian Schabesberger on 02.02.16. @@ -40,7 +33,6 @@ import java.net.URLDecoder; public class YoutubeStreamLinkHandlerFactory extends LinkHandlerFactory { private static final YoutubeStreamLinkHandlerFactory instance = new YoutubeStreamLinkHandlerFactory(); - private static final String ID_PATTERN = "([\\-a-zA-Z0-9_]{11})"; private YoutubeStreamLinkHandlerFactory() { } @@ -49,78 +41,138 @@ public class YoutubeStreamLinkHandlerFactory extends LinkHandlerFactory { return instance; } + private static String assertIsID(String id) throws ParsingException { + if (id == null || !id.matches("[a-zA-Z0-9_-]{11}")) { + throw new ParsingException("The given string is not a Youtube-Video-ID"); + } + + return id; + } + @Override public String getUrl(String id) { return "https://www.youtube.com/watch?v=" + id; } @Override - public String getId(String url) throws ParsingException, IllegalArgumentException { - if (url.isEmpty()) { - throw new IllegalArgumentException("The url parameter should not be empty"); - } + public String getId(String urlString) throws ParsingException, IllegalArgumentException { + try { + URI uri = new URI(urlString); - String lowercaseUrl = url.toLowerCase(); - if (lowercaseUrl.contains("youtube")) { - if (lowercaseUrl.contains("list=")) { - throw new ParsingException("Error no suitable url: " + url); - } - if (url.contains("attribution_link")) { - try { - String escapedQuery = Parser.matchGroup1("u=(.[^&|$]*)", url); - String query = URLDecoder.decode(escapedQuery, "UTF-8"); - return Parser.matchGroup1("v=" + ID_PATTERN, query); - } catch (UnsupportedEncodingException uee) { - throw new ParsingException("Could not parse attribution_link", uee); + if (uri.getScheme().equals("vnd.youtube")) { + String scheme = uri.getSchemeSpecificPart(); + if (scheme.startsWith("//")) { + urlString = "https:" + scheme; + } else { + return assertIsID(scheme); } } - if (url.contains("vnd.youtube")) { - return Parser.matchGroup1(ID_PATTERN, url); - } - if (url.contains("embed")) { - return Parser.matchGroup1("embed/" + ID_PATTERN, url); - } - if (url.contains("googleads")) { - throw new FoundAdException("Error found add: " + url); - } - return Parser.matchGroup1("[?&]v=" + ID_PATTERN, url); + } catch (URISyntaxException ignored) { } - if (lowercaseUrl.contains("youtu.be")) { - if (lowercaseUrl.contains("list=")) { - throw new ParsingException("Error no suitable url: " + url); - } - if (url.contains("v=")) { - return Parser.matchGroup1("v=" + ID_PATTERN, url); - } - return Parser.matchGroup1("[Yy][Oo][Uu][Tt][Uu]\\.[Bb][Ee]/" + ID_PATTERN, url); + + URL url; + try { + url = new URL(urlString); + } catch (MalformedURLException e) { + throw new IllegalArgumentException("The given URL is not valid"); } - if (lowercaseUrl.contains("hooktube")) { - if (lowercaseUrl.contains("&v=") - || lowercaseUrl.contains("?v=")) { - return Parser.matchGroup1("[?&]v=" + ID_PATTERN, url); + + String host = url.getHost(); + String path = url.getPath(); + // remove leading "/" of URL-path if URL-path is given + if (!path.isEmpty()) { + path = path.substring(1); + } + + if (!YoutubeParsingHelper.isYoutubeALikeURL(url)) { + if (host.equalsIgnoreCase("googleads.g.doubleclick.net")) { + throw new FoundAdException("Error found ad: " + urlString); } - if (url.contains("/embed/")) { - return Parser.matchGroup1("embed/" + ID_PATTERN, url); + + throw new ParsingException("The url is not a Youtube-URL"); + } + + if (YoutubePlaylistLinkHandlerFactory.getInstance().acceptUrl(urlString)) { + throw new ParsingException("Error no suitable url: " + urlString); + } + + // using uppercase instead of lowercase, because toLowercase replaces some unicode characters + // with their lowercase ASCII equivalent. Using toLowercase could result in faultily matching unicode urls. + switch (host.toUpperCase()) { + case "WWW.YOUTUBE-NOCOOKIE.COM": { + if (path.startsWith("embed/")) { + String id = path.split("/")[1]; + + return assertIsID(id); + } } - if (url.contains("/v/")) { - return Parser.matchGroup1("v/" + ID_PATTERN, url); + + case "YOUTUBE.COM": + case "WWW.YOUTUBE.COM": + case "M.YOUTUBE.COM": { + if (path.equals("attribution_link")) { + String uQueryValue = Utils.getQueryValue(url, "u"); + + URL decodedURL; + try { + decodedURL = new URL("http://www.youtube.com" + uQueryValue); + } catch (MalformedURLException e) { + throw new ParsingException("Error no suitable url: " + urlString); + } + + String viewQueryValue = Utils.getQueryValue(decodedURL, "v"); + return assertIsID(viewQueryValue); + } + + if (path.startsWith("embed/")) { + String id = path.split("/")[1]; + + return assertIsID(id); + } + + String viewQueryValue = Utils.getQueryValue(url, "v"); + return assertIsID(viewQueryValue); } - if (url.contains("/watch/")) { - return Parser.matchGroup1("watch/" + ID_PATTERN, url); + + case "YOUTU.BE": { + String viewQueryValue = Utils.getQueryValue(url, "v"); + if (viewQueryValue != null) { + return assertIsID(viewQueryValue); + } + + return assertIsID(path); + } + + case "HOOKTUBE.COM": { + if (path.equals("watch")) { + String viewQueryValue = Utils.getQueryValue(url, "v"); + if (viewQueryValue != null) { + return assertIsID(viewQueryValue); + } + } + if (path.startsWith("embed/")) { + String id = path.substring("embed/".length()); + + return assertIsID(id); + } + if (path.startsWith("v/")) { + String id = path.substring("v/".length()); + + return assertIsID(id); + } + if (path.startsWith("watch/")) { + String id = path.substring("watch/".length()); + + return assertIsID(id); + } } } - throw new ParsingException("Error no suitable url: " + url); + + throw new ParsingException("Error no suitable url: " + urlString); } @Override public boolean onAcceptUrl(final String url) throws FoundAdException { - final String lowercaseUrl = url.toLowerCase(); - if (!lowercaseUrl.contains("youtube") && - !lowercaseUrl.contains("youtu.be") && - !lowercaseUrl.contains("hooktube")) { - return false; - // bad programming I know <-- nice meme - } try { getId(url); return true; diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeTrendingLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeTrendingLinkHandlerFactory.java index e61693b08..123da9d1a 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeTrendingLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeTrendingLinkHandlerFactory.java @@ -21,8 +21,9 @@ package org.schabi.newpipe.extractor.services.youtube.linkHandler; */ import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; -import org.schabi.newpipe.extractor.utils.Parser; +import java.net.MalformedURLException; +import java.net.URL; import java.util.List; public class YoutubeTrendingLinkHandlerFactory extends ListLinkHandlerFactory { @@ -38,6 +39,14 @@ public class YoutubeTrendingLinkHandlerFactory extends ListLinkHandlerFactory { @Override public boolean onAcceptUrl(final String url) { - return Parser.isMatch("^(https://|http://|)(www.|m.|)youtube.com/feed/trending(|\\?.*)$", url); + URL urlObj; + try { + urlObj = new URL(url); + } catch (MalformedURLException e) { + return false; + } + + String urlPath = urlObj.getPath(); + return YoutubeParsingHelper.isYoutubeURL(urlObj) && urlPath.equals("/feed/trending"); } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java index 663fd093b..65f3ce24c 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java @@ -2,6 +2,9 @@ package org.schabi.newpipe.extractor.utils; import org.schabi.newpipe.extractor.exceptions.ParsingException; +import java.io.UnsupportedEncodingException; +import java.net.URL; +import java.net.URLDecoder; import java.util.List; public class Utils { @@ -57,5 +60,43 @@ public class Utils { } return url; } -} + /** + * get the value of a URL-query by name. + * if a url-query is give multiple times, only the value of the first query is returned + * + * @param url the url to be used + * @param parameterName the pattern that will be used to check the url + * @return a string that contains the value of the query parameter or null if nothing was found + */ + public static String getQueryValue(URL url, String parameterName) { + String urlQuery = url.getQuery(); + + if (urlQuery != null) { + for (String param : urlQuery.split("&")) { + String[] params = param.split("=", 2); + + String query; + try { + query = URLDecoder.decode(params[0], "UTF-8"); + } catch (UnsupportedEncodingException e) { + System.err.println("Cannot decode string with UTF-8. using the string without decoding"); + e.printStackTrace(); + query = params[0]; + } + + if (query.equals(parameterName)) { + try { + return URLDecoder.decode(params[1], "UTF-8"); + } catch (UnsupportedEncodingException e) { + System.err.println("Cannot decode string with UTF-8. using the string without decoding"); + e.printStackTrace(); + return params[1]; + } + } + } + } + + return null; + } +} \ No newline at end of file diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamLinkHandlerFactoryTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamLinkHandlerFactoryTest.java index 519eb0efb..f06ad319d 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamLinkHandlerFactoryTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamLinkHandlerFactoryTest.java @@ -60,9 +60,9 @@ public class YoutubeStreamLinkHandlerFactoryTest { public void getIdfromYt() throws Exception { assertEquals("jZViOEv90dI", linkHandler.fromUrl("https://www.youtube.com/watch?v=jZViOEv90dI").getId()); assertEquals("W-fFHeTX70Q", linkHandler.fromUrl("https://www.youtube.com/watch?v=W-fFHeTX70Q").getId()); - assertEquals("jZViOEv90dI", linkHandler.fromUrl("https://www.youtube.com/watch?v=jZViOEv90dI?t=100").getId()); - assertEquals("jZViOEv90dI", linkHandler.fromUrl("https://WWW.YouTube.com/watch?v=jZViOEv90dI?t=100").getId()); - assertEquals("jZViOEv90dI", linkHandler.fromUrl("HTTPS://www.youtube.com/watch?v=jZViOEv90dI?t=100").getId()); + assertEquals("jZViOEv90dI", linkHandler.fromUrl("https://www.youtube.com/watch?v=jZViOEv90dI&t=100").getId()); + assertEquals("jZViOEv90dI", linkHandler.fromUrl("https://WWW.YouTube.com/watch?v=jZViOEv90dI&t=100").getId()); + assertEquals("jZViOEv90dI", linkHandler.fromUrl("HTTPS://www.youtube.com/watch?v=jZViOEv90dI&t=100").getId()); assertEquals("jZViOEv90dI", linkHandler.fromUrl("https://youtu.be/jZViOEv90dI?t=9s").getId()); assertEquals("jZViOEv90dI", linkHandler.fromUrl("HTTPS://Youtu.be/jZViOEv90dI?t=9s").getId()); assertEquals("uEJuoEs1UxY", linkHandler.fromUrl("http://www.youtube.com/watch_popup?v=uEJuoEs1UxY").getId()); @@ -85,9 +85,9 @@ public class YoutubeStreamLinkHandlerFactoryTest { @Test public void testAcceptYtUrl() throws ParsingException { assertTrue(linkHandler.acceptUrl("https://www.youtube.com/watch?v=jZViOEv90dI")); - assertTrue(linkHandler.acceptUrl("https://www.youtube.com/watch?v=jZViOEv90dI?t=100")); - assertTrue(linkHandler.acceptUrl("https://WWW.YouTube.com/watch?v=jZViOEv90dI?t=100")); - assertTrue(linkHandler.acceptUrl("HTTPS://www.youtube.com/watch?v=jZViOEv90dI?t=100")); + assertTrue(linkHandler.acceptUrl("https://www.youtube.com/watch?v=jZViOEv90dI&t=100")); + assertTrue(linkHandler.acceptUrl("https://WWW.YouTube.com/watch?v=jZViOEv90dI&t=100")); + assertTrue(linkHandler.acceptUrl("HTTPS://www.youtube.com/watch?v=jZViOEv90dI&t=100")); assertTrue(linkHandler.acceptUrl("https://youtu.be/jZViOEv90dI?t=9s")); assertTrue(linkHandler.acceptUrl("https://www.youtube.com/embed/jZViOEv90dI")); assertTrue(linkHandler.acceptUrl("https://www.youtube-nocookie.com/embed/jZViOEv90dI")); From 50c4783f71ecf4d8841289d0ef96f2f3e5eca0f7 Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Sat, 19 Jan 2019 17:31:21 +0530 Subject: [PATCH 2/5] fixed extracting hls url --- .../extractors/YoutubeStreamExtractor.java | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 4801fa461..3b614fc36 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -5,6 +5,7 @@ import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParserException; import org.jsoup.Jsoup; +import org.jsoup.helper.StringUtil; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; @@ -386,13 +387,18 @@ public class YoutubeStreamExtractor extends StreamExtractor { public String getHlsUrl() throws ParsingException { assertPageFetched(); try { - String hlsvp; - if (playerArgs != null && playerArgs.isString("hlsvp")) { - hlsvp = playerArgs.getString("hlsvp", ""); - } else { - return ""; + String hlsvp = ""; + if (playerArgs != null) { + if( playerArgs.isString("hlsvp") ) { + hlsvp = playerArgs.getString("hlsvp", ""); + }else { + hlsvp = JsonParser.object() + .from(playerArgs.getString("player_response", "{}")) + .getObject("streamingData", new JsonObject()) + .getString("hlsManifestUrl", ""); + } } - + return hlsvp; } catch (Exception e) { throw new ParsingException("Could not get hls manifest url", e); From c60d6ec0abb0e8fee9ec1f93398d3a69bf643a09 Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Sat, 19 Jan 2019 18:20:02 +0530 Subject: [PATCH 3/5] removed unused import --- .../services/youtube/extractors/YoutubeStreamExtractor.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 3b614fc36..4b21a06ae 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -5,7 +5,6 @@ import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParserException; import org.jsoup.Jsoup; -import org.jsoup.helper.StringUtil; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; @@ -398,7 +397,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { .getString("hlsManifestUrl", ""); } } - + return hlsvp; } catch (Exception e) { throw new ParsingException("Could not get hls manifest url", e); From a6c972eff8ae69d85c1dd74cb43fdcf59dd142c0 Mon Sep 17 00:00:00 2001 From: Connectety-W Date: Sun, 20 Jan 2019 01:31:30 +0100 Subject: [PATCH 4/5] fixed tests by prepending HTTP to URLs without protocol and adding a check for null. --- .../YoutubeChannelLinkHandlerFactory.java | 3 ++- .../YoutubePlaylistLinkHandlerFactory.java | 2 +- .../YoutubeStreamLinkHandlerFactory.java | 15 +++++------ .../YoutubeTrendingLinkHandlerFactory.java | 3 ++- .../schabi/newpipe/extractor/utils/Utils.java | 25 +++++++++++++++++-- 5 files changed, 36 insertions(+), 12 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java index da207b278..e3522b313 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java @@ -2,6 +2,7 @@ package org.schabi.newpipe.extractor.services.youtube.linkHandler; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; +import org.schabi.newpipe.extractor.utils.Utils; import java.net.URL; import java.util.List; @@ -42,7 +43,7 @@ public class YoutubeChannelLinkHandlerFactory extends ListLinkHandlerFactory { @Override public String getId(String url) throws ParsingException { try { - URL urlObj = new URL(url); + URL urlObj = Utils.stringToURL(url); String path = urlObj.getPath(); if (!(YoutubeParsingHelper.isYoutubeURL(urlObj) || urlObj.getHost().equalsIgnoreCase("hooktube.com"))) { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubePlaylistLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubePlaylistLinkHandlerFactory.java index 91944122d..008aeb933 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubePlaylistLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubePlaylistLinkHandlerFactory.java @@ -23,7 +23,7 @@ public class YoutubePlaylistLinkHandlerFactory extends ListLinkHandlerFactory { @Override public String getId(String url) throws ParsingException { try { - URL urlObj = new URL(url); + URL urlObj = Utils.stringToURL(url); if (!YoutubeParsingHelper.isYoutubeURL(urlObj)) { throw new ParsingException("the url given is not a Youtube-URL"); diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeStreamLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeStreamLinkHandlerFactory.java index e479e74c9..3e793960f 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeStreamLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeStreamLinkHandlerFactory.java @@ -58,13 +58,14 @@ public class YoutubeStreamLinkHandlerFactory extends LinkHandlerFactory { public String getId(String urlString) throws ParsingException, IllegalArgumentException { try { URI uri = new URI(urlString); + String scheme = uri.getScheme(); - if (uri.getScheme().equals("vnd.youtube")) { - String scheme = uri.getSchemeSpecificPart(); - if (scheme.startsWith("//")) { - urlString = "https:" + scheme; + if (scheme != null && scheme.equals("vnd.youtube")) { + String schemeSpecificPart = uri.getSchemeSpecificPart(); + if (schemeSpecificPart.startsWith("//")) { + urlString = "https:" + schemeSpecificPart; } else { - return assertIsID(scheme); + return assertIsID(schemeSpecificPart); } } } catch (URISyntaxException ignored) { @@ -72,7 +73,7 @@ public class YoutubeStreamLinkHandlerFactory extends LinkHandlerFactory { URL url; try { - url = new URL(urlString); + url = Utils.stringToURL(urlString); } catch (MalformedURLException e) { throw new IllegalArgumentException("The given URL is not valid"); } @@ -115,7 +116,7 @@ public class YoutubeStreamLinkHandlerFactory extends LinkHandlerFactory { URL decodedURL; try { - decodedURL = new URL("http://www.youtube.com" + uQueryValue); + decodedURL = Utils.stringToURL("http://www.youtube.com" + uQueryValue); } catch (MalformedURLException e) { throw new ParsingException("Error no suitable url: " + urlString); } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeTrendingLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeTrendingLinkHandlerFactory.java index 123da9d1a..253e9cd8a 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeTrendingLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeTrendingLinkHandlerFactory.java @@ -21,6 +21,7 @@ package org.schabi.newpipe.extractor.services.youtube.linkHandler; */ import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; +import org.schabi.newpipe.extractor.utils.Utils; import java.net.MalformedURLException; import java.net.URL; @@ -41,7 +42,7 @@ public class YoutubeTrendingLinkHandlerFactory extends ListLinkHandlerFactory { public boolean onAcceptUrl(final String url) { URL urlObj; try { - urlObj = new URL(url); + urlObj = Utils.stringToURL(url); } catch (MalformedURLException e) { return false; } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java index 65f3ce24c..d4b8db432 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java @@ -3,6 +3,7 @@ package org.schabi.newpipe.extractor.utils; import org.schabi.newpipe.extractor.exceptions.ParsingException; import java.io.UnsupportedEncodingException; +import java.net.MalformedURLException; import java.net.URL; import java.net.URLDecoder; import java.util.List; @@ -43,7 +44,7 @@ public class Utils { } public static void printErrors(List errors) { - for(Throwable e : errors) { + for (Throwable e : errors) { e.printStackTrace(); System.err.println("----------------"); } @@ -55,7 +56,7 @@ public class Utils { public static String replaceHttpWithHttps(final String url) { if (url == null) return null; - if(!url.isEmpty() && url.startsWith(HTTP)) { + if (!url.isEmpty() && url.startsWith(HTTP)) { return HTTPS + url.substring(HTTP.length()); } return url; @@ -99,4 +100,24 @@ public class Utils { return null; } + + /** + * converts a string to a URL-Object. + * defaults to HTTP if no protocol is given + * + * @param url the string to be converted to a URL-Object + * @return a URL-Object containing the url + */ + public static URL stringToURL(String url) throws MalformedURLException { + try { + return new URL(url); + } catch (MalformedURLException e) { + // if no protocol is given try prepending "http://" + if (e.getMessage().equals("no protocol: " + url)) { + return new URL(HTTP + url); + } + + throw e; + } + } } \ No newline at end of file From 19288c1456d667644e3d0ec12bf15d3d21b88447 Mon Sep 17 00:00:00 2001 From: Connectety-W Date: Sun, 20 Jan 2019 14:39:06 +0100 Subject: [PATCH 5/5] added forgotten break statements --- .../youtube/linkHandler/YoutubeStreamLinkHandlerFactory.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeStreamLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeStreamLinkHandlerFactory.java index 3e793960f..d9d9e93a0 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeStreamLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeStreamLinkHandlerFactory.java @@ -106,6 +106,8 @@ public class YoutubeStreamLinkHandlerFactory extends LinkHandlerFactory { return assertIsID(id); } + + break; } case "YOUTUBE.COM": @@ -167,6 +169,8 @@ public class YoutubeStreamLinkHandlerFactory extends LinkHandlerFactory { return assertIsID(id); } } + + break; } throw new ParsingException("Error no suitable url: " + urlString);