2019-04-28 22:03:16 +02:00
|
|
|
package org.schabi.newpipe.extractor.localization;
|
2019-10-02 07:02:01 +02:00
|
|
|
|
|
|
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
2019-04-28 22:03:16 +02:00
|
|
|
import org.schabi.newpipe.extractor.timeago.PatternsHolder;
|
|
|
|
import org.schabi.newpipe.extractor.utils.Parser;
|
2019-10-02 07:02:01 +02:00
|
|
|
|
2020-10-18 05:48:14 +02:00
|
|
|
import java.time.OffsetDateTime;
|
|
|
|
import java.time.ZoneOffset;
|
2020-10-18 04:22:28 +02:00
|
|
|
import java.time.temporal.ChronoUnit;
|
2019-10-02 07:02:01 +02:00
|
|
|
import java.util.Map;
|
2019-04-28 22:03:16 +02:00
|
|
|
import java.util.regex.Pattern;
|
2019-10-02 07:02:01 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* A helper class that is meant to be used by services that need to parse upload dates in the
|
|
|
|
* format '2 days ago' or similar.
|
|
|
|
*/
|
|
|
|
public class TimeAgoParser {
|
2019-04-28 22:03:16 +02:00
|
|
|
private final PatternsHolder patternsHolder;
|
2020-10-18 05:48:14 +02:00
|
|
|
private final OffsetDateTime now;
|
2019-10-02 07:02:01 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Creates a helper to parse upload dates in the format '2 days ago'.
|
|
|
|
* <p>
|
2020-02-08 23:58:46 +01:00
|
|
|
* Instantiate a new {@link TimeAgoParser} every time you extract a new batch of items.
|
2019-10-02 07:02:01 +02:00
|
|
|
* </p>
|
2020-02-08 23:58:46 +01:00
|
|
|
*
|
2022-03-17 14:47:08 +01:00
|
|
|
* @param patternsHolder An object that holds the "time ago" patterns, special cases, and the
|
|
|
|
* language word separator.
|
2019-10-02 07:02:01 +02:00
|
|
|
*/
|
2022-03-17 14:47:08 +01:00
|
|
|
public TimeAgoParser(final PatternsHolder patternsHolder) {
|
2019-04-28 22:03:16 +02:00
|
|
|
this.patternsHolder = patternsHolder;
|
2020-10-18 05:48:14 +02:00
|
|
|
now = OffsetDateTime.now(ZoneOffset.UTC);
|
2019-10-02 07:02:01 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2022-03-17 14:47:08 +01:00
|
|
|
* Parses a textual date in the format '2 days ago' into a Calendar representation which is then
|
|
|
|
* wrapped in a {@link DateWrapper} object.
|
2019-11-03 19:45:25 +01:00
|
|
|
* <p>
|
|
|
|
* Beginning with days ago, the date is considered as an approximation.
|
2019-04-28 22:03:16 +02:00
|
|
|
*
|
2019-10-02 07:02:01 +02:00
|
|
|
* @param textualDate The original date as provided by the streaming service
|
2019-11-03 19:45:25 +01:00
|
|
|
* @return The parsed time (can be approximated)
|
2019-10-02 07:02:01 +02:00
|
|
|
* @throws ParsingException if the time unit could not be recognized
|
|
|
|
*/
|
2022-03-17 14:47:08 +01:00
|
|
|
public DateWrapper parse(final String textualDate) throws ParsingException {
|
|
|
|
for (final Map.Entry<ChronoUnit, Map<String, Integer>> caseUnitEntry
|
|
|
|
: patternsHolder.specialCases().entrySet()) {
|
2020-10-18 04:22:28 +02:00
|
|
|
final ChronoUnit chronoUnit = caseUnitEntry.getKey();
|
2022-03-17 14:47:08 +01:00
|
|
|
for (final Map.Entry<String, Integer> caseMapToAmountEntry
|
|
|
|
: caseUnitEntry.getValue().entrySet()) {
|
2019-04-28 22:03:16 +02:00
|
|
|
final String caseText = caseMapToAmountEntry.getKey();
|
|
|
|
final Integer caseAmount = caseMapToAmountEntry.getValue();
|
|
|
|
|
|
|
|
if (textualDateMatches(textualDate, caseText)) {
|
2020-10-18 04:22:28 +02:00
|
|
|
return getResultFor(caseAmount, chronoUnit);
|
2019-04-28 22:03:16 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-03-26 21:09:31 +01:00
|
|
|
return getResultFor(parseTimeAgoAmount(textualDate), parseChronoUnit(textualDate));
|
|
|
|
}
|
|
|
|
|
|
|
|
private int parseTimeAgoAmount(final String textualDate) {
|
2019-10-02 07:02:01 +02:00
|
|
|
try {
|
2022-03-26 21:09:31 +01:00
|
|
|
return Integer.parseInt(textualDate.replaceAll("\\D+", ""));
|
|
|
|
} catch (final NumberFormatException ignored) {
|
2019-10-02 07:02:01 +02:00
|
|
|
// If there is no valid number in the textual date,
|
|
|
|
// assume it is 1 (as in 'a second ago').
|
2022-03-26 21:09:31 +01:00
|
|
|
return 1;
|
2019-10-02 07:02:01 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-03-17 14:47:08 +01:00
|
|
|
private ChronoUnit parseChronoUnit(final String textualDate) throws ParsingException {
|
2022-03-26 21:09:31 +01:00
|
|
|
return patternsHolder.asMap().entrySet().stream()
|
|
|
|
.filter(e -> e.getValue().stream()
|
|
|
|
.anyMatch(agoPhrase -> textualDateMatches(textualDate, agoPhrase)))
|
|
|
|
.map(Map.Entry::getKey)
|
|
|
|
.findFirst()
|
|
|
|
.orElseThrow(() ->
|
|
|
|
new ParsingException("Unable to parse the date: " + textualDate));
|
2019-10-02 07:02:01 +02:00
|
|
|
}
|
|
|
|
|
2022-03-17 14:47:08 +01:00
|
|
|
private boolean textualDateMatches(final String textualDate, final String agoPhrase) {
|
2019-04-28 22:03:16 +02:00
|
|
|
if (textualDate.equals(agoPhrase)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (patternsHolder.wordSeparator().isEmpty()) {
|
|
|
|
return textualDate.toLowerCase().contains(agoPhrase.toLowerCase());
|
2022-03-26 21:09:31 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
final String escapedPhrase = Pattern.quote(agoPhrase.toLowerCase());
|
|
|
|
final String escapedSeparator = patternsHolder.wordSeparator().equals(" ")
|
2022-03-17 14:47:08 +01:00
|
|
|
// From JDK8 → \h - Treat horizontal spaces as a normal one
|
|
|
|
// (non-breaking space, thin space, etc.)
|
2022-03-26 21:09:31 +01:00
|
|
|
? "[ \\t\\xA0\\u1680\\u180e\\u2000-\\u200a\\u202f\\u205f\\u3000]"
|
|
|
|
: Pattern.quote(patternsHolder.wordSeparator());
|
2019-04-28 22:03:16 +02:00
|
|
|
|
2022-03-26 21:09:31 +01:00
|
|
|
// (^|separator)pattern($|separator)
|
|
|
|
// Check if the pattern is surrounded by separators or start/end of the string.
|
|
|
|
final String pattern =
|
|
|
|
"(^|" + escapedSeparator + ")" + escapedPhrase + "($|" + escapedSeparator + ")";
|
2019-04-28 22:03:16 +02:00
|
|
|
|
2022-03-26 21:09:31 +01:00
|
|
|
return Parser.isMatch(pattern, textualDate.toLowerCase());
|
2019-04-28 22:03:16 +02:00
|
|
|
}
|
|
|
|
|
2022-03-17 14:47:08 +01:00
|
|
|
private DateWrapper getResultFor(final int timeAgoAmount, final ChronoUnit chronoUnit) {
|
2023-03-25 11:49:14 +01:00
|
|
|
OffsetDateTime offsetDateTime = now.minus(timeAgoAmount, chronoUnit);
|
|
|
|
if (chronoUnit.isDateBased()) {
|
2020-10-18 05:48:14 +02:00
|
|
|
offsetDateTime = offsetDateTime.truncatedTo(ChronoUnit.HOURS);
|
2019-11-03 19:45:25 +01:00
|
|
|
}
|
2023-03-25 11:49:14 +01:00
|
|
|
return new DateWrapper(offsetDateTime, chronoUnit.isDateBased());
|
2019-10-02 07:02:01 +02:00
|
|
|
}
|
|
|
|
}
|