Merge pull request #188 from jimbo1qaz/fix-description-timestamps

Fix timestamp links in Youtube video descriptions
This commit is contained in:
Christian Schabesberger 2019-08-22 11:25:16 +02:00 committed by GitHub
commit cfc72d8dc9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 44 additions and 2 deletions

View File

@ -30,6 +30,8 @@ import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.util.*; import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/* /*
* Created by Christian Schabesberger on 06.08.15. * Created by Christian Schabesberger on 06.08.15.
@ -162,14 +164,54 @@ public class YoutubeStreamExtractor extends StreamExtractor {
} }
} }
// onclick="yt.www.watch.player.seekTo(0*3600+00*60+00);return false;"
// :00 is NOT recognized as a timestamp in description or comments.
// 0:00 is recognized in both description and comments.
// https://www.youtube.com/watch?v=4cccfDXu1vA
private final static Pattern DESCRIPTION_TIMESTAMP_ONCLICK_REGEX = Pattern.compile(
"seekTo\\("
+ "(?:(\\d+)\\*3600\\+)?" // hours?
+ "(\\d+)\\*60\\+" // minutes
+ "(\\d+)" // seconds
+ "\\)");
@SafeVarargs
private static <T> T coalesce(T... args) {
for (T arg : args) {
if (arg != null) return arg;
}
throw new IllegalArgumentException("all arguments to coalesce() were null");
}
private String parseHtmlAndGetFullLinks(String descriptionHtml) private String parseHtmlAndGetFullLinks(String descriptionHtml)
throws MalformedURLException, UnsupportedEncodingException, ParsingException { throws MalformedURLException, UnsupportedEncodingException, ParsingException {
final Document description = Jsoup.parse(descriptionHtml, getUrl()); final Document description = Jsoup.parse(descriptionHtml, getUrl());
for(Element a : description.select("a")) { for(Element a : description.select("a")) {
final String rawUrl = a.attr("abs:href"); final String rawUrl = a.attr("abs:href");
final URL redirectLink = new URL(rawUrl); final URL redirectLink = new URL(rawUrl);
final String queryString = redirectLink.getQuery();
if(queryString != null) { final Matcher onClickTimestamp;
final String queryString;
if ((onClickTimestamp = DESCRIPTION_TIMESTAMP_ONCLICK_REGEX.matcher(a.attr("onclick")))
.find()) {
a.removeAttr("onclick");
String hours = coalesce(onClickTimestamp.group(1), "0");
String minutes = onClickTimestamp.group(2);
String seconds = onClickTimestamp.group(3);
int timestamp = 0;
timestamp += Integer.parseInt(hours) * 3600;
timestamp += Integer.parseInt(minutes) * 60;
timestamp += Integer.parseInt(seconds);
String setTimestamp = "&t=" + timestamp;
// Even after clicking https://youtu.be/...?t=6,
// getUrl() is https://www.youtube.com/watch?v=..., never youtu.be, never &t=.
a.attr("href", getUrl() + setTimestamp);
} else if((queryString = redirectLink.getQuery()) != null) {
// if the query string is null we are not dealing with a redirect link, // if the query string is null we are not dealing with a redirect link,
// so we don't need to override it. // so we don't need to override it.
final String link = final String link =