Fix YouTube subscriber count

Modify test to fail on too small subscriber count
This commit is contained in:
TobiGr 2019-09-16 23:15:54 +02:00
parent dbdd9ed083
commit 06016d1ae3
3 changed files with 35 additions and 3 deletions

View File

@ -49,7 +49,7 @@ import java.util.ArrayList;
public class YoutubeChannelExtractor extends ChannelExtractor {
/*package-private*/ static final String CHANNEL_URL_BASE = "https://www.youtube.com/channel/";
private static final String CHANNEL_FEED_BASE = "https://www.youtube.com/feeds/videos.xml?channel_id=";
private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000";
private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000&gl=US&hl=en";
private Document doc;
@ -135,10 +135,11 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override
public long getSubscriberCount() throws ParsingException {
final Element el = doc.select("span[class*=\"yt-subscription-button-subscriber-count\"]").first();
final String el = doc.select("span[class*=\"yt-subscription-button-subscriber-count\"]")
.first().attr("title");
if (el != null) {
try {
return Long.parseLong(Utils.removeNonDigitCharacters(el.text()));
return Utils.mixedNumberWordToLong(el);
} catch (NumberFormatException e) {
throw new ParsingException("Could not get subscriber count", e);
}

View File

@ -27,6 +27,35 @@ public class Utils {
return toRemove.replaceAll("\\D+", "");
}
/**
* <p>Convert a mixed number word to a long.</p>
* <p>Examples:</p>
* <ul>
* <li>123 -&gt; 123</li>
* <li>1.23K -&gt; 1230</li>
* <li>1.23M -&gt; 1230000</li>
* </ul>
* @param numberWord string to be converted to a long
* @return a long
* @throws NumberFormatException
* @throws ParsingException
*/
public static long mixedNumberWordToLong(String numberWord) throws NumberFormatException, ParsingException {
String multiplier = "";
try {
multiplier = Parser.matchGroup("[\\d]+([\\.,][\\d]+)?([KMkm])+", numberWord, 2);
} catch(ParsingException ignored) {}
double count = Double.parseDouble(Parser.matchGroup1("([\\d]+([\\.,][\\d]+)?)", numberWord));
switch (multiplier.toUpperCase()) {
case "K":
return (long) (count * 1e3);
case "M":
return (long) (count * 1e6);
default:
return (long) (count);
}
}
/**
* Check if the url matches the pattern.
*

View File

@ -105,6 +105,7 @@ public class YoutubeChannelExtractorTest {
@Test
public void testSubscriberCount() throws Exception {
assertTrue("Wrong subscriber count", extractor.getSubscriberCount() >= 0);
assertTrue("Subscriber count too small", extractor.getSubscriberCount() >= 4e6);
}
}
@ -195,6 +196,7 @@ public class YoutubeChannelExtractorTest {
@Test
public void testSubscriberCount() throws Exception {
assertTrue("Wrong subscriber count", extractor.getSubscriberCount() >= 0);
assertTrue("Subscriber count too small", extractor.getSubscriberCount() >= 10e6);
}
}