fix: improve shorts duration parser

This commit is contained in:
ThetaDev 2023-05-08 01:15:38 +02:00
parent 66d80383c5
commit 6c5a225c23
5 changed files with 63 additions and 16 deletions

View File

@ -18,6 +18,8 @@ public class TimeAgoParser {
private final PatternsHolder patternsHolder; private final PatternsHolder patternsHolder;
private final OffsetDateTime now; private final OffsetDateTime now;
private static final Pattern DURATION_PATTERN = Pattern.compile("(?:(\\d+) )?([A-z]+)");
/** /**
* Creates a helper to parse upload dates in the format '2 days ago'. * Creates a helper to parse upload dates in the format '2 days ago'.
* <p> * <p>
@ -60,16 +62,29 @@ public class TimeAgoParser {
return getResultFor(parseTimeAgoAmount(textualDate), parseChronoUnit(textualDate)); return getResultFor(parseTimeAgoAmount(textualDate), parseChronoUnit(textualDate));
} }
public long parseDuration(final String textualDuration) { public long parseDuration(final String textualDuration) throws ParsingException {
final int amount = parseTimeAgoAmount(textualDuration); return DURATION_PATTERN.matcher(textualDuration).results().map(match -> {
ChronoUnit unit; final String digits = match.group(1);
final String word = match.group(2);
int amount;
try { try {
unit = parseChronoUnit(textualDuration); amount = Integer.parseInt(digits);
} catch (final ParsingException e) { } catch (final NumberFormatException ignored) {
unit = ChronoUnit.SECONDS; amount = 1;
}
final ChronoUnit unit;
try {
unit = parseChronoUnit(word);
} catch (final ParsingException ignored) {
return (long) 0;
} }
return amount * unit.getDuration().getSeconds(); return amount * unit.getDuration().getSeconds();
}).filter(n -> n > 0).reduce(Long::sum).orElseThrow(() -> new ParsingException(
String.format("could not parse duration `%s`", textualDuration))
);
} }
private int parseTimeAgoAmount(final String textualDate) { private int parseTimeAgoAmount(final String textualDate) {

View File

@ -123,11 +123,11 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Nonnull @Nonnull
@Override @Override
public String getName() throws ParsingException { public String getName() throws ParsingException {
final String mdName = initialData.getObject("metadata") final String metadataName = initialData.getObject("metadata")
.getObject("channelMetadataRenderer") .getObject("channelMetadataRenderer")
.getString("title"); .getString("title");
if (!isNullOrEmpty(mdName)) { if (!isNullOrEmpty(metadataName)) {
return mdName; return metadataName;
} }
return getChannelHeader().flatMap(header -> { return getChannelHeader().flatMap(header -> {

View File

@ -149,12 +149,12 @@ public class YoutubeChannelTabExtractor extends ChannelTabExtractor {
} }
protected String getChannelName() { protected String getChannelName() {
final String mdName = initialData final String metadataName = initialData
.getObject("metadata") .getObject("metadata")
.getObject("channelMetadataRenderer") .getObject("channelMetadataRenderer")
.getString("title"); .getString("title");
if (!isNullOrEmpty(mdName)) { if (!isNullOrEmpty(metadataName)) {
return mdName; return metadataName;
} }
return YouTubeChannelHelper.getChannelHeader(initialData) return YouTubeChannelHelper.getChannelHeader(initialData)

View File

@ -175,13 +175,14 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
// Duration of short videos in channel tab // Duration of short videos in channel tab
// example: "simple is best - 49 seconds - play video" // example: "simple is best - 49 seconds - play video"
// "Breakfast at Hawaiian McDonald's - 1 minute, 1 second - play video"
final String accessibilityLabel = videoInfo.getObject("accessibility") final String accessibilityLabel = videoInfo.getObject("accessibility")
.getObject("accessibilityData").getString("label"); .getObject("accessibilityData").getString("label");
if (accessibilityLabel == null || timeAgoParser == null) { if (accessibilityLabel == null || timeAgoParser == null) {
return 0; return 0;
} }
final String[] labelParts = accessibilityLabel.split(" \u2013 "); final String[] labelParts = accessibilityLabel.split(" [\u2013-] ");
if (labelParts.length > 2) { if (labelParts.length > 2) {
final String textualDuration = labelParts[labelParts.length - 2]; final String textualDuration = labelParts[labelParts.length - 2];

View File

@ -0,0 +1,31 @@
package org.schabi.newpipe.extractor.localization;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
public class TimeAgoParserTest {
private static TimeAgoParser timeAgoParser;
@BeforeAll
static void setUp() {
timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor(Localization.DEFAULT);
}
@Test
void testGetDuration() throws ParsingException {
assertEquals(timeAgoParser.parseDuration("one second"), 1);
assertEquals(timeAgoParser.parseDuration("second"), 1);
assertEquals(timeAgoParser.parseDuration("49 seconds"), 49);
assertEquals(timeAgoParser.parseDuration("1 minute, 1 second"), 61);
}
@Test
void testGetDurationError() {
assertThrows(ParsingException.class, () -> timeAgoParser.parseDuration("abcd"));
assertThrows(ParsingException.class, () -> timeAgoParser.parseDuration("12 abcd"));
}
}