fix: use url parser instead of regex for extracting track type
This commit is contained in:
parent
6e5b6b76a2
commit
f2c167f2dd
|
@ -43,6 +43,7 @@ import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
|||
import org.schabi.newpipe.extractor.localization.ContentCountry;
|
||||
import org.schabi.newpipe.extractor.localization.Localization;
|
||||
import org.schabi.newpipe.extractor.playlist.PlaylistInfo;
|
||||
import org.schabi.newpipe.extractor.stream.AudioTrackType;
|
||||
import org.schabi.newpipe.extractor.stream.Description;
|
||||
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
||||
import org.schabi.newpipe.extractor.utils.Parser;
|
||||
|
@ -1483,6 +1484,7 @@ public final class YoutubeParsingHelper {
|
|||
|
||||
/**
|
||||
* Create a map with the required cookie header.
|
||||
*
|
||||
* @return A singleton map containing the header.
|
||||
*/
|
||||
public static Map<String, List<String>> getCookieHeader() {
|
||||
|
@ -1801,4 +1803,52 @@ public final class YoutubeParsingHelper {
|
|||
public static boolean isConsentAccepted() {
|
||||
return consentAccepted;
|
||||
}
|
||||
|
||||
private static final Pattern AUDIO_STREAM_TYPE_REGEX =
|
||||
Pattern.compile("&xtags=[\\w%]*acont(?:=|%3D)([a-z]+)(?:=|%3D|:|%3A|&|$)");
|
||||
|
||||
/**
|
||||
* Extract the audio track type from a YouTube stream URL.
|
||||
* <p>
|
||||
* The track type is parsed from the {@code xtags} URL parameter
|
||||
* (Example: {@code acont=original:lang=en}).
|
||||
* </p>
|
||||
* @param streamUrl YouTube stream URL
|
||||
* @return {@link AudioTrackType} or {@code null} if no track type was found
|
||||
*/
|
||||
@Nullable
|
||||
public static AudioTrackType extractAudioTrackType(final String streamUrl) {
|
||||
final String xtags;
|
||||
try {
|
||||
xtags = Utils.getQueryValue(new URL(streamUrl), "xtags");
|
||||
} catch (final MalformedURLException e) {
|
||||
return null;
|
||||
}
|
||||
if (xtags == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
String atype = null;
|
||||
for (final String param : xtags.split(":")) {
|
||||
final String[] kv = param.split("=", 2);
|
||||
if (kv.length > 1 && kv[0].equals("acont")) {
|
||||
atype = kv[1];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (atype == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
switch (atype) {
|
||||
case "original":
|
||||
return AudioTrackType.ORIGINAL;
|
||||
case "dubbed":
|
||||
return AudioTrackType.DUBBED;
|
||||
case "descriptive":
|
||||
return AudioTrackType.DESCRIPTIVE;
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -72,7 +72,6 @@ import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
|
|||
import org.schabi.newpipe.extractor.services.youtube.YoutubeThrottlingDecrypter;
|
||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
|
||||
import org.schabi.newpipe.extractor.stream.AudioStream;
|
||||
import org.schabi.newpipe.extractor.stream.AudioTrackType;
|
||||
import org.schabi.newpipe.extractor.stream.DeliveryMethod;
|
||||
import org.schabi.newpipe.extractor.stream.Description;
|
||||
import org.schabi.newpipe.extractor.stream.Frameset;
|
||||
|
@ -100,7 +99,6 @@ import java.util.List;
|
|||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
|
@ -812,8 +810,6 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
"\\bc\\s*&&\\s*d\\.set\\([^,]+\\s*,\\s*(:encodeURIComponent\\s*\\()([a-zA-Z0-9$]+)\\("
|
||||
};
|
||||
private static final String STS_REGEX = "signatureTimestamp[=:](\\d+)";
|
||||
private static final Pattern AUDIO_STREAM_TYPE_REGEX =
|
||||
Pattern.compile("&xtags=[\\w%]*acont(?:=|%3D)([a-z]+)(?:=|%3D|:|%3A|&|$)");
|
||||
|
||||
@Override
|
||||
public void onFetchPage(@Nonnull final Downloader downloader)
|
||||
|
@ -1488,20 +1484,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
itagItem.setAudioLocale(LocaleCompat.forLanguageTag(
|
||||
audioTrackId.substring(0, audioTrackIdLastLocaleCharacter)));
|
||||
}
|
||||
|
||||
try {
|
||||
final String atype = Parser.matchGroup1(AUDIO_STREAM_TYPE_REGEX, streamUrl);
|
||||
switch (atype) {
|
||||
case "original":
|
||||
itagItem.setAudioTrackType(AudioTrackType.ORIGINAL);
|
||||
break;
|
||||
case "dubbed":
|
||||
itagItem.setAudioTrackType(AudioTrackType.DUBBED);
|
||||
break;
|
||||
case "descriptive":
|
||||
itagItem.setAudioTrackType(AudioTrackType.DESCRIPTIVE);
|
||||
}
|
||||
} catch (final Parser.RegexException ignored) { }
|
||||
itagItem.setAudioTrackType(YoutubeParsingHelper.extractAudioTrackType(streamUrl));
|
||||
}
|
||||
|
||||
itagItem.setAudioTrackName(formatData.getObject("audioTrack")
|
||||
|
|
|
@ -1,16 +1,20 @@
|
|||
package org.schabi.newpipe.extractor.services.youtube;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.schabi.newpipe.downloader.DownloaderFactory;
|
||||
import org.schabi.newpipe.extractor.NewPipe;
|
||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.stream.AudioTrackType;
|
||||
import org.schabi.newpipe.extractor.utils.Parser;
|
||||
import org.schabi.newpipe.extractor.utils.Utils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
public class YoutubeParsingHelperTest {
|
||||
|
||||
|
@ -48,4 +52,17 @@ public class YoutubeParsingHelperTest {
|
|||
assertEquals("https://www.infektionsschutz.de/coronavirus-sars-cov-2.html",
|
||||
YoutubeParsingHelper.extractCachedUrlIfNeeded("https://www.infektionsschutz.de/coronavirus-sars-cov-2.html"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void extractAudioTrackType() {
|
||||
final String originalUrl = "https://rr2---sn-4g5lzned.googlevideo.com/videoplayback?expire=1679429648&ei=sLsZZKrICIuR1gLSnYbgAg&ip=2001%3A638%3A102%3A26%3A1a7c%3A106b%3A6e4a%3Adc09&id=o-ALWn2ZwDxUXEZKzlsT_X9iuDjRMSi__SgRXVrVjKZEhc&itag=251&source=youtube&requiressl=yes&mh=nU&mm=31%2C29&mn=sn-4g5lzned%2Csn-4g5edndz&ms=au%2Crdu&mv=m&mvi=2&pl=40&initcwndbps=1740000&spc=H3gIhgXQzBxvKu2MOEmFaaEenC4DKdVUwudTeu3dtKwmq-Xv5g&vprv=1&xtags=acont%3Doriginal%3Alang%3Den&mime=audio%2Fwebm&ns=-lg0OQZL1LZRQO-dzE0W4E4L&gir=yes&clen=3513412&dur=303.681&lmt=1679342942566207&mt=1679407764&fvip=1&keepalive=yes&fexp=24007246&c=WEB&txp=5532434&n=gDLP5pImH9Vr7v&sparams=expire%2Cei%2Cip%2Cid%2Citag%2Csource%2Crequiressl%2Cspc%2Cvprv%2Cxtags%2Cmime%2Cns%2Cgir%2Cclen%2Cdur%2Clmt&sig=AOq0QJ8wRAIgPFQ1yX8aoc35sz2eV2-wzNIhTQeOHGCsOmIonmo776kCIFo5k6HZ5kAQ6DycRCAG8jJgk9jNyncILGPrGZMZUuuo&lsparams=mh%2Cmm%2Cmn%2Cms%2Cmv%2Cmvi%2Cpl%2Cinitcwndbps&lsig=AG3C_xAwRQIhANODPaBuc32MWI9gF3Bn1iz3byEn7EwUiXpNLuCcQqW9AiBB88Qrrz2fJCzYKg14_nnGxGQH1Uoi7i31OSrHK6_dGw%3D%3D";
|
||||
final String dubbedUrl = "https://rr2---sn-4g5lzned.googlevideo.com/videoplayback?expire=1679429648&ei=sLsZZKrICIuR1gLSnYbgAg&ip=2001%3A638%3A102%3A26%3A1a7c%3A106b%3A6e4a%3Adc09&id=o-ALWn2ZwDxUXEZKzlsT_X9iuDjRMSi__SgRXVrVjKZEhc&itag=251&source=youtube&requiressl=yes&mh=nU&mm=31%2C29&mn=sn-4g5lzned%2Csn-4g5edndz&ms=au%2Crdu&mv=m&mvi=2&pl=40&initcwndbps=1740000&spc=H3gIhgXQzBxvKu2MOEmFaaEenC4DKdVUwudTeu3dtKwmq-Xv5g&vprv=1&xtags=acont%3Ddubbed%3Alang%3Den&mime=audio%2Fwebm&ns=-lg0OQZL1LZRQO-dzE0W4E4L&gir=yes&clen=3884070&dur=303.721&lmt=1679342946044954&mt=1679407764&fvip=1&keepalive=yes&fexp=24007246&c=WEB&txp=5532434&n=gDLP5pImH9Vr7v&sparams=expire%2Cei%2Cip%2Cid%2Citag%2Csource%2Crequiressl%2Cspc%2Cvprv%2Cxtags%2Cmime%2Cns%2Cgir%2Cclen%2Cdur%2Clmt&sig=AOq0QJ8wRQIhAKEMLB8yLZJf2jXAu4P1Q8AVEciYsmjjr2syYAWZfJg6AiAfu-XI11zYpCLqljw_MCegh26pJHYyfatgfFGWfpL-6Q%3D%3D&lsparams=mh%2Cmm%2Cmn%2Cms%2Cmv%2Cmvi%2Cpl%2Cinitcwndbps&lsig=AG3C_xAwRQIhANODPaBuc32MWI9gF3Bn1iz3byEn7EwUiXpNLuCcQqW9AiBB88Qrrz2fJCzYKg14_nnGxGQH1Uoi7i31OSrHK6_dGw%3D%3D";
|
||||
final String descriptiveUrl = "https://rr2---sn-4g5lzned.googlevideo.com/videoplayback?expire=1679429648&ei=sLsZZKrICIuR1gLSnYbgAg&ip=2001%3A638%3A102%3A26%3A1a7c%3A106b%3A6e4a%3Adc09&id=o-ALWn2ZwDxUXEZKzlsT_X9iuDjRMSi__SgRXVrVjKZEhc&itag=251&source=youtube&requiressl=yes&mh=nU&mm=31%2C29&mn=sn-4g5lzned%2Csn-4g5edndz&ms=au%2Crdu&mv=m&mvi=2&pl=40&initcwndbps=1740000&spc=H3gIhgXQzBxvKu2MOEmFaaEenC4DKdVUwudTeu3dtKwmq-Xv5g&vprv=1&xtags=acont%3Ddescriptive%3Alang%3Den&mime=audio%2Fwebm&ns=-lg0OQZL1LZRQO-dzE0W4E4L&gir=yes&clen=4061711&dur=303.721&lmt=1679342946800120&mt=1679407764&fvip=1&keepalive=yes&fexp=24007246&c=WEB&txp=5532434&n=gDLP5pImH9Vr7v&sparams=expire%2Cei%2Cip%2Cid%2Citag%2Csource%2Crequiressl%2Cspc%2Cvprv%2Cxtags%2Cmime%2Cns%2Cgir%2Cclen%2Cdur%2Clmt&sig=AOq0QJ8wRgIhAKFUzoNscV1hbNcPwcnQO3vOy47q69szj7BdLhFYS52pAiEA2oPhLZIZsrUQrx62iH4dHvTBlCloC3NieJw6edo7LL8%3D&lsparams=mh%2Cmm%2Cmn%2Cms%2Cmv%2Cmvi%2Cpl%2Cinitcwndbps&lsig=AG3C_xAwRQIhANODPaBuc32MWI9gF3Bn1iz3byEn7EwUiXpNLuCcQqW9AiBB88Qrrz2fJCzYKg14_nnGxGQH1Uoi7i31OSrHK6_dGw%3D%3D";
|
||||
final String noTrackUrl = "https://rr2---sn-4g5ednz7.googlevideo.com/videoplayback?expire=1679430240&ei=AL4ZZKiXJefYx_APj_6ECA&ip=2001%3A638%3A102%3A26%3A1a7c%3A106b%3A6e4a%3Adc09&id=o-ALKVh9uHVEvurL3bZOZCEMzFod9ZmJJd6GszA6UEIuKy&itag=251&source=youtube&requiressl=yes&mh=8L&mm=31%2C26&mn=sn-4g5ednz7%2Csn-i5heen7z&ms=au%2Conr&mv=m&mvi=2&pl=40&initcwndbps=1793750&spc=H3gIhh2s06nxQJg3zEgY9pw84syUasRiagYDsQ5UHHfcu5bfTA&vprv=1&mime=audio%2Fwebm&ns=VumObYcnTZNicexX7Ek2WakL&gir=yes&clen=3711099&dur=299.201&lmt=1679334484198077&mt=1679408487&fvip=2&keepalive=yes&fexp=24007246&c=WEB&txp=3318224&n=10c-m6ZvG6C7rC&sparams=expire%2Cei%2Cip%2Cid%2Citag%2Csource%2Crequiressl%2Cspc%2Cvprv%2Cmime%2Cns%2Cgir%2Cclen%2Cdur%2Clmt&sig=AOq0QJ8wRQIhAODS0aHRBgdrHm5qwquqGC6zq3rU81W59y4BtV0Y9KStAiAPT8ykXXj_7GzAyZbLPgYKs-B1HWT-4bY0CppmZ2rReg%3D%3D&lsparams=mh%2Cmm%2Cmn%2Cms%2Cmv%2Cmvi%2Cpl%2Cinitcwndbps&lsig=AG3C_xAwRQIhAL8fS6T-V9BNqrx55mdMvve5be2gcjIY8pYfxlUMPY6pAiAgiCMbqR4eSS_HvLu9KBe6cCFZeMcSTc7vzWtL9y0xvw%3D%3D";
|
||||
|
||||
assertEquals(AudioTrackType.ORIGINAL, YoutubeParsingHelper.extractAudioTrackType(originalUrl));
|
||||
assertEquals(AudioTrackType.DUBBED, YoutubeParsingHelper.extractAudioTrackType(dubbedUrl));
|
||||
assertEquals(AudioTrackType.DESCRIPTIVE, YoutubeParsingHelper.extractAudioTrackType(descriptiveUrl));
|
||||
assertNull(YoutubeParsingHelper.extractAudioTrackType(noTrackUrl));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue