fix: improve shorts duration parser
This commit is contained in:
parent
66d80383c5
commit
6c5a225c23
|
@ -18,6 +18,8 @@ public class TimeAgoParser {
|
|||
private final PatternsHolder patternsHolder;
|
||||
private final OffsetDateTime now;
|
||||
|
||||
private static final Pattern DURATION_PATTERN = Pattern.compile("(?:(\\d+) )?([A-z]+)");
|
||||
|
||||
/**
|
||||
* Creates a helper to parse upload dates in the format '2 days ago'.
|
||||
* <p>
|
||||
|
@ -60,16 +62,29 @@ public class TimeAgoParser {
|
|||
return getResultFor(parseTimeAgoAmount(textualDate), parseChronoUnit(textualDate));
|
||||
}
|
||||
|
||||
public long parseDuration(final String textualDuration) {
|
||||
final int amount = parseTimeAgoAmount(textualDuration);
|
||||
ChronoUnit unit;
|
||||
try {
|
||||
unit = parseChronoUnit(textualDuration);
|
||||
} catch (final ParsingException e) {
|
||||
unit = ChronoUnit.SECONDS;
|
||||
}
|
||||
public long parseDuration(final String textualDuration) throws ParsingException {
|
||||
return DURATION_PATTERN.matcher(textualDuration).results().map(match -> {
|
||||
final String digits = match.group(1);
|
||||
final String word = match.group(2);
|
||||
|
||||
return amount * unit.getDuration().getSeconds();
|
||||
int amount;
|
||||
try {
|
||||
amount = Integer.parseInt(digits);
|
||||
} catch (final NumberFormatException ignored) {
|
||||
amount = 1;
|
||||
}
|
||||
|
||||
final ChronoUnit unit;
|
||||
try {
|
||||
unit = parseChronoUnit(word);
|
||||
} catch (final ParsingException ignored) {
|
||||
return (long) 0;
|
||||
}
|
||||
|
||||
return amount * unit.getDuration().getSeconds();
|
||||
}).filter(n -> n > 0).reduce(Long::sum).orElseThrow(() -> new ParsingException(
|
||||
String.format("could not parse duration `%s`", textualDuration))
|
||||
);
|
||||
}
|
||||
|
||||
private int parseTimeAgoAmount(final String textualDate) {
|
||||
|
|
|
@ -123,11 +123,11 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
|||
@Nonnull
|
||||
@Override
|
||||
public String getName() throws ParsingException {
|
||||
final String mdName = initialData.getObject("metadata")
|
||||
final String metadataName = initialData.getObject("metadata")
|
||||
.getObject("channelMetadataRenderer")
|
||||
.getString("title");
|
||||
if (!isNullOrEmpty(mdName)) {
|
||||
return mdName;
|
||||
if (!isNullOrEmpty(metadataName)) {
|
||||
return metadataName;
|
||||
}
|
||||
|
||||
return getChannelHeader().flatMap(header -> {
|
||||
|
|
|
@ -149,12 +149,12 @@ public class YoutubeChannelTabExtractor extends ChannelTabExtractor {
|
|||
}
|
||||
|
||||
protected String getChannelName() {
|
||||
final String mdName = initialData
|
||||
final String metadataName = initialData
|
||||
.getObject("metadata")
|
||||
.getObject("channelMetadataRenderer")
|
||||
.getString("title");
|
||||
if (!isNullOrEmpty(mdName)) {
|
||||
return mdName;
|
||||
if (!isNullOrEmpty(metadataName)) {
|
||||
return metadataName;
|
||||
}
|
||||
|
||||
return YouTubeChannelHelper.getChannelHeader(initialData)
|
||||
|
|
|
@ -175,13 +175,14 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
|||
|
||||
// Duration of short videos in channel tab
|
||||
// example: "simple is best - 49 seconds - play video"
|
||||
// "Breakfast at Hawaiian McDonald's - 1 minute, 1 second - play video"
|
||||
final String accessibilityLabel = videoInfo.getObject("accessibility")
|
||||
.getObject("accessibilityData").getString("label");
|
||||
if (accessibilityLabel == null || timeAgoParser == null) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
final String[] labelParts = accessibilityLabel.split(" \u2013 ");
|
||||
final String[] labelParts = accessibilityLabel.split(" [\u2013-] ");
|
||||
|
||||
if (labelParts.length > 2) {
|
||||
final String textualDuration = labelParts[labelParts.length - 2];
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
package org.schabi.newpipe.extractor.localization;
|
||||
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
|
||||
public class TimeAgoParserTest {
|
||||
private static TimeAgoParser timeAgoParser;
|
||||
|
||||
@BeforeAll
|
||||
static void setUp() {
|
||||
timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor(Localization.DEFAULT);
|
||||
}
|
||||
|
||||
@Test
|
||||
void testGetDuration() throws ParsingException {
|
||||
assertEquals(timeAgoParser.parseDuration("one second"), 1);
|
||||
assertEquals(timeAgoParser.parseDuration("second"), 1);
|
||||
assertEquals(timeAgoParser.parseDuration("49 seconds"), 49);
|
||||
assertEquals(timeAgoParser.parseDuration("1 minute, 1 second"), 61);
|
||||
}
|
||||
|
||||
@Test
|
||||
void testGetDurationError() {
|
||||
assertThrows(ParsingException.class, () -> timeAgoParser.parseDuration("abcd"));
|
||||
assertThrows(ParsingException.class, () -> timeAgoParser.parseDuration("12 abcd"));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue