From 06016d1ae3dc4cf50ab5943c6593dd7cb05ad585 Mon Sep 17 00:00:00 2001 From: TobiGr Date: Mon, 16 Sep 2019 23:15:54 +0200 Subject: [PATCH] Fix YouTube subscriber count Modify test to fail on too small subscriber count --- .../extractors/YoutubeChannelExtractor.java | 7 +++-- .../schabi/newpipe/extractor/utils/Utils.java | 29 +++++++++++++++++++ .../youtube/YoutubeChannelExtractorTest.java | 2 ++ 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index 624cba670..9ac93deec 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -49,7 +49,7 @@ import java.util.ArrayList; public class YoutubeChannelExtractor extends ChannelExtractor { /*package-private*/ static final String CHANNEL_URL_BASE = "https://www.youtube.com/channel/"; private static final String CHANNEL_FEED_BASE = "https://www.youtube.com/feeds/videos.xml?channel_id="; - private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000"; + private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000&gl=US&hl=en"; private Document doc; @@ -135,10 +135,11 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public long getSubscriberCount() throws ParsingException { - final Element el = doc.select("span[class*=\"yt-subscription-button-subscriber-count\"]").first(); + final String el = doc.select("span[class*=\"yt-subscription-button-subscriber-count\"]") + .first().attr("title"); if (el != null) { try { - return Long.parseLong(Utils.removeNonDigitCharacters(el.text())); + return Utils.mixedNumberWordToLong(el); } catch (NumberFormatException e) { throw new ParsingException("Could not get subscriber count", e); } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java index 3dd01c49c..ecf4017d1 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java @@ -27,6 +27,35 @@ public class Utils { return toRemove.replaceAll("\\D+", ""); } + /** + *

Convert a mixed number word to a long.

+ *

Examples:

+ * + * @param numberWord string to be converted to a long + * @return a long + * @throws NumberFormatException + * @throws ParsingException + */ + public static long mixedNumberWordToLong(String numberWord) throws NumberFormatException, ParsingException { + String multiplier = ""; + try { + multiplier = Parser.matchGroup("[\\d]+([\\.,][\\d]+)?([KMkm])+", numberWord, 2); + } catch(ParsingException ignored) {} + double count = Double.parseDouble(Parser.matchGroup1("([\\d]+([\\.,][\\d]+)?)", numberWord)); + switch (multiplier.toUpperCase()) { + case "K": + return (long) (count * 1e3); + case "M": + return (long) (count * 1e6); + default: + return (long) (count); + } + } + /** * Check if the url matches the pattern. * diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java index f124bed7c..9e8737722 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java @@ -105,6 +105,7 @@ public class YoutubeChannelExtractorTest { @Test public void testSubscriberCount() throws Exception { assertTrue("Wrong subscriber count", extractor.getSubscriberCount() >= 0); + assertTrue("Subscriber count too small", extractor.getSubscriberCount() >= 4e6); } } @@ -195,6 +196,7 @@ public class YoutubeChannelExtractorTest { @Test public void testSubscriberCount() throws Exception { assertTrue("Wrong subscriber count", extractor.getSubscriberCount() >= 0); + assertTrue("Subscriber count too small", extractor.getSubscriberCount() >= 10e6); } }