fix: improve shorts duration parser
This commit is contained in:
parent
66d80383c5
commit
6c5a225c23
|
@ -18,6 +18,8 @@ public class TimeAgoParser {
|
||||||
private final PatternsHolder patternsHolder;
|
private final PatternsHolder patternsHolder;
|
||||||
private final OffsetDateTime now;
|
private final OffsetDateTime now;
|
||||||
|
|
||||||
|
private static final Pattern DURATION_PATTERN = Pattern.compile("(?:(\\d+) )?([A-z]+)");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a helper to parse upload dates in the format '2 days ago'.
|
* Creates a helper to parse upload dates in the format '2 days ago'.
|
||||||
* <p>
|
* <p>
|
||||||
|
@ -60,16 +62,29 @@ public class TimeAgoParser {
|
||||||
return getResultFor(parseTimeAgoAmount(textualDate), parseChronoUnit(textualDate));
|
return getResultFor(parseTimeAgoAmount(textualDate), parseChronoUnit(textualDate));
|
||||||
}
|
}
|
||||||
|
|
||||||
public long parseDuration(final String textualDuration) {
|
public long parseDuration(final String textualDuration) throws ParsingException {
|
||||||
final int amount = parseTimeAgoAmount(textualDuration);
|
return DURATION_PATTERN.matcher(textualDuration).results().map(match -> {
|
||||||
ChronoUnit unit;
|
final String digits = match.group(1);
|
||||||
|
final String word = match.group(2);
|
||||||
|
|
||||||
|
int amount;
|
||||||
try {
|
try {
|
||||||
unit = parseChronoUnit(textualDuration);
|
amount = Integer.parseInt(digits);
|
||||||
} catch (final ParsingException e) {
|
} catch (final NumberFormatException ignored) {
|
||||||
unit = ChronoUnit.SECONDS;
|
amount = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
final ChronoUnit unit;
|
||||||
|
try {
|
||||||
|
unit = parseChronoUnit(word);
|
||||||
|
} catch (final ParsingException ignored) {
|
||||||
|
return (long) 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return amount * unit.getDuration().getSeconds();
|
return amount * unit.getDuration().getSeconds();
|
||||||
|
}).filter(n -> n > 0).reduce(Long::sum).orElseThrow(() -> new ParsingException(
|
||||||
|
String.format("could not parse duration `%s`", textualDuration))
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
private int parseTimeAgoAmount(final String textualDate) {
|
private int parseTimeAgoAmount(final String textualDate) {
|
||||||
|
|
|
@ -123,11 +123,11 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
||||||
@Nonnull
|
@Nonnull
|
||||||
@Override
|
@Override
|
||||||
public String getName() throws ParsingException {
|
public String getName() throws ParsingException {
|
||||||
final String mdName = initialData.getObject("metadata")
|
final String metadataName = initialData.getObject("metadata")
|
||||||
.getObject("channelMetadataRenderer")
|
.getObject("channelMetadataRenderer")
|
||||||
.getString("title");
|
.getString("title");
|
||||||
if (!isNullOrEmpty(mdName)) {
|
if (!isNullOrEmpty(metadataName)) {
|
||||||
return mdName;
|
return metadataName;
|
||||||
}
|
}
|
||||||
|
|
||||||
return getChannelHeader().flatMap(header -> {
|
return getChannelHeader().flatMap(header -> {
|
||||||
|
|
|
@ -149,12 +149,12 @@ public class YoutubeChannelTabExtractor extends ChannelTabExtractor {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected String getChannelName() {
|
protected String getChannelName() {
|
||||||
final String mdName = initialData
|
final String metadataName = initialData
|
||||||
.getObject("metadata")
|
.getObject("metadata")
|
||||||
.getObject("channelMetadataRenderer")
|
.getObject("channelMetadataRenderer")
|
||||||
.getString("title");
|
.getString("title");
|
||||||
if (!isNullOrEmpty(mdName)) {
|
if (!isNullOrEmpty(metadataName)) {
|
||||||
return mdName;
|
return metadataName;
|
||||||
}
|
}
|
||||||
|
|
||||||
return YouTubeChannelHelper.getChannelHeader(initialData)
|
return YouTubeChannelHelper.getChannelHeader(initialData)
|
||||||
|
|
|
@ -175,13 +175,14 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
||||||
|
|
||||||
// Duration of short videos in channel tab
|
// Duration of short videos in channel tab
|
||||||
// example: "simple is best - 49 seconds - play video"
|
// example: "simple is best - 49 seconds - play video"
|
||||||
|
// "Breakfast at Hawaiian McDonald's - 1 minute, 1 second - play video"
|
||||||
final String accessibilityLabel = videoInfo.getObject("accessibility")
|
final String accessibilityLabel = videoInfo.getObject("accessibility")
|
||||||
.getObject("accessibilityData").getString("label");
|
.getObject("accessibilityData").getString("label");
|
||||||
if (accessibilityLabel == null || timeAgoParser == null) {
|
if (accessibilityLabel == null || timeAgoParser == null) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
final String[] labelParts = accessibilityLabel.split(" \u2013 ");
|
final String[] labelParts = accessibilityLabel.split(" [\u2013-] ");
|
||||||
|
|
||||||
if (labelParts.length > 2) {
|
if (labelParts.length > 2) {
|
||||||
final String textualDuration = labelParts[labelParts.length - 2];
|
final String textualDuration = labelParts[labelParts.length - 2];
|
||||||
|
|
|
@ -0,0 +1,31 @@
|
||||||
|
package org.schabi.newpipe.extractor.localization;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||||
|
|
||||||
|
public class TimeAgoParserTest {
|
||||||
|
private static TimeAgoParser timeAgoParser;
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
static void setUp() {
|
||||||
|
timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor(Localization.DEFAULT);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testGetDuration() throws ParsingException {
|
||||||
|
assertEquals(timeAgoParser.parseDuration("one second"), 1);
|
||||||
|
assertEquals(timeAgoParser.parseDuration("second"), 1);
|
||||||
|
assertEquals(timeAgoParser.parseDuration("49 seconds"), 49);
|
||||||
|
assertEquals(timeAgoParser.parseDuration("1 minute, 1 second"), 61);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testGetDurationError() {
|
||||||
|
assertThrows(ParsingException.class, () -> timeAgoParser.parseDuration("abcd"));
|
||||||
|
assertThrows(ParsingException.class, () -> timeAgoParser.parseDuration("12 abcd"));
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue