fix: improve shorts duration parser

This commit is contained in:
ThetaDev 2023-05-08 01:15:38 +02:00
parent 66d80383c5
commit 6c5a225c23
5 changed files with 63 additions and 16 deletions

View File

@ -18,6 +18,8 @@ public class TimeAgoParser {
private final PatternsHolder patternsHolder;
private final OffsetDateTime now;
private static final Pattern DURATION_PATTERN = Pattern.compile("(?:(\\d+) )?([A-z]+)");
/**
* Creates a helper to parse upload dates in the format '2 days ago'.
* <p>
@ -60,16 +62,29 @@ public class TimeAgoParser {
return getResultFor(parseTimeAgoAmount(textualDate), parseChronoUnit(textualDate));
}
public long parseDuration(final String textualDuration) {
final int amount = parseTimeAgoAmount(textualDuration);
ChronoUnit unit;
try {
unit = parseChronoUnit(textualDuration);
} catch (final ParsingException e) {
unit = ChronoUnit.SECONDS;
}
public long parseDuration(final String textualDuration) throws ParsingException {
return DURATION_PATTERN.matcher(textualDuration).results().map(match -> {
final String digits = match.group(1);
final String word = match.group(2);
return amount * unit.getDuration().getSeconds();
int amount;
try {
amount = Integer.parseInt(digits);
} catch (final NumberFormatException ignored) {
amount = 1;
}
final ChronoUnit unit;
try {
unit = parseChronoUnit(word);
} catch (final ParsingException ignored) {
return (long) 0;
}
return amount * unit.getDuration().getSeconds();
}).filter(n -> n > 0).reduce(Long::sum).orElseThrow(() -> new ParsingException(
String.format("could not parse duration `%s`", textualDuration))
);
}
private int parseTimeAgoAmount(final String textualDate) {

View File

@ -123,11 +123,11 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Nonnull
@Override
public String getName() throws ParsingException {
final String mdName = initialData.getObject("metadata")
final String metadataName = initialData.getObject("metadata")
.getObject("channelMetadataRenderer")
.getString("title");
if (!isNullOrEmpty(mdName)) {
return mdName;
if (!isNullOrEmpty(metadataName)) {
return metadataName;
}
return getChannelHeader().flatMap(header -> {

View File

@ -149,12 +149,12 @@ public class YoutubeChannelTabExtractor extends ChannelTabExtractor {
}
protected String getChannelName() {
final String mdName = initialData
final String metadataName = initialData
.getObject("metadata")
.getObject("channelMetadataRenderer")
.getString("title");
if (!isNullOrEmpty(mdName)) {
return mdName;
if (!isNullOrEmpty(metadataName)) {
return metadataName;
}
return YouTubeChannelHelper.getChannelHeader(initialData)

View File

@ -175,13 +175,14 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
// Duration of short videos in channel tab
// example: "simple is best - 49 seconds - play video"
// "Breakfast at Hawaiian McDonald's - 1 minute, 1 second - play video"
final String accessibilityLabel = videoInfo.getObject("accessibility")
.getObject("accessibilityData").getString("label");
if (accessibilityLabel == null || timeAgoParser == null) {
return 0;
}
final String[] labelParts = accessibilityLabel.split(" \u2013 ");
final String[] labelParts = accessibilityLabel.split(" [\u2013-] ");
if (labelParts.length > 2) {
final String textualDuration = labelParts[labelParts.length - 2];

View File

@ -0,0 +1,31 @@
package org.schabi.newpipe.extractor.localization;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
public class TimeAgoParserTest {
private static TimeAgoParser timeAgoParser;
@BeforeAll
static void setUp() {
timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor(Localization.DEFAULT);
}
@Test
void testGetDuration() throws ParsingException {
assertEquals(timeAgoParser.parseDuration("one second"), 1);
assertEquals(timeAgoParser.parseDuration("second"), 1);
assertEquals(timeAgoParser.parseDuration("49 seconds"), 49);
assertEquals(timeAgoParser.parseDuration("1 minute, 1 second"), 61);
}
@Test
void testGetDurationError() {
assertThrows(ParsingException.class, () -> timeAgoParser.parseDuration("abcd"));
assertThrows(ParsingException.class, () -> timeAgoParser.parseDuration("12 abcd"));
}
}