Merge pull request #1135 from Stypox/yt-emergency-info

[YouTube] Implement emergency meta info
This commit is contained in:
Tobi 2023-12-29 12:01:40 +01:00 committed by GitHub
commit 1e93b1dc20
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 226 additions and 123 deletions

View File

@ -0,0 +1,200 @@
package org.schabi.newpipe.extractor.services.youtube;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.extractCachedUrlIfNeeded;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObjectOrThrow;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getUrlFromNavigationEndpoint;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.isGoogleURL;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
import static org.schabi.newpipe.extractor.utils.Utils.replaceHttpWithHttps;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import org.schabi.newpipe.extractor.MetaInfo;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.stream.Description;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
public final class YoutubeMetaInfoHelper {
private YoutubeMetaInfoHelper() {
}
@Nonnull
public static List<MetaInfo> getMetaInfo(@Nonnull final JsonArray contents)
throws ParsingException {
final List<MetaInfo> metaInfo = new ArrayList<>();
for (final Object content : contents) {
final JsonObject resultObject = (JsonObject) content;
if (resultObject.has("itemSectionRenderer")) {
for (final Object sectionContentObject
: resultObject.getObject("itemSectionRenderer").getArray("contents")) {
final JsonObject sectionContent = (JsonObject) sectionContentObject;
if (sectionContent.has("infoPanelContentRenderer")) {
metaInfo.add(getInfoPanelContent(sectionContent
.getObject("infoPanelContentRenderer")));
}
if (sectionContent.has("clarificationRenderer")) {
metaInfo.add(getClarificationRenderer(sectionContent
.getObject("clarificationRenderer")
));
}
if (sectionContent.has("emergencyOneboxRenderer")) {
getEmergencyOneboxRenderer(
sectionContent.getObject("emergencyOneboxRenderer"),
metaInfo::add
);
}
}
}
}
return metaInfo;
}
@Nonnull
private static MetaInfo getInfoPanelContent(@Nonnull final JsonObject infoPanelContentRenderer)
throws ParsingException {
final MetaInfo metaInfo = new MetaInfo();
final StringBuilder sb = new StringBuilder();
for (final Object paragraph : infoPanelContentRenderer.getArray("paragraphs")) {
if (sb.length() != 0) {
sb.append("<br>");
}
sb.append(getTextFromObject((JsonObject) paragraph));
}
metaInfo.setContent(new Description(sb.toString(), Description.HTML));
if (infoPanelContentRenderer.has("sourceEndpoint")) {
final String metaInfoLinkUrl = getUrlFromNavigationEndpoint(
infoPanelContentRenderer.getObject("sourceEndpoint"));
try {
metaInfo.addUrl(new URL(Objects.requireNonNull(extractCachedUrlIfNeeded(
metaInfoLinkUrl))));
} catch (final NullPointerException | MalformedURLException e) {
throw new ParsingException("Could not get metadata info URL", e);
}
final String metaInfoLinkText = getTextFromObject(
infoPanelContentRenderer.getObject("inlineSource"));
if (isNullOrEmpty(metaInfoLinkText)) {
throw new ParsingException("Could not get metadata info link text.");
}
metaInfo.addUrlText(metaInfoLinkText);
}
return metaInfo;
}
@Nonnull
private static MetaInfo getClarificationRenderer(
@Nonnull final JsonObject clarificationRenderer) throws ParsingException {
final MetaInfo metaInfo = new MetaInfo();
final String title = getTextFromObject(clarificationRenderer
.getObject("contentTitle"));
final String text = getTextFromObject(clarificationRenderer
.getObject("text"));
if (title == null || text == null) {
throw new ParsingException("Could not extract clarification renderer content");
}
metaInfo.setTitle(title);
metaInfo.setContent(new Description(text, Description.PLAIN_TEXT));
if (clarificationRenderer.has("actionButton")) {
final JsonObject actionButton = clarificationRenderer.getObject("actionButton")
.getObject("buttonRenderer");
try {
final String url = getUrlFromNavigationEndpoint(actionButton
.getObject("command"));
metaInfo.addUrl(new URL(Objects.requireNonNull(extractCachedUrlIfNeeded(url))));
} catch (final NullPointerException | MalformedURLException e) {
throw new ParsingException("Could not get metadata info URL", e);
}
final String metaInfoLinkText = getTextFromObject(
actionButton.getObject("text"));
if (isNullOrEmpty(metaInfoLinkText)) {
throw new ParsingException("Could not get metadata info link text.");
}
metaInfo.addUrlText(metaInfoLinkText);
}
if (clarificationRenderer.has("secondaryEndpoint") && clarificationRenderer
.has("secondarySource")) {
final String url = getUrlFromNavigationEndpoint(clarificationRenderer
.getObject("secondaryEndpoint"));
// Ignore Google URLs, because those point to a Google search about "Covid-19"
if (url != null && !isGoogleURL(url)) {
try {
metaInfo.addUrl(new URL(url));
final String description = getTextFromObject(clarificationRenderer
.getObject("secondarySource"));
metaInfo.addUrlText(description == null ? url : description);
} catch (final MalformedURLException e) {
throw new ParsingException("Could not get metadata info secondary URL", e);
}
}
}
return metaInfo;
}
private static void getEmergencyOneboxRenderer(
@Nonnull final JsonObject emergencyOneboxRenderer,
final Consumer<MetaInfo> addMetaInfo
) throws ParsingException {
final List<JsonObject> supportRenderers = emergencyOneboxRenderer.values()
.stream()
.filter(o -> o instanceof JsonObject
&& ((JsonObject) o).has("singleActionEmergencySupportRenderer"))
.map(o -> ((JsonObject) o).getObject("singleActionEmergencySupportRenderer"))
.collect(Collectors.toList());
if (supportRenderers.isEmpty()) {
throw new ParsingException("Could not extract any meta info from emergency renderer");
}
for (final JsonObject r : supportRenderers) {
final MetaInfo metaInfo = new MetaInfo();
// usually an encouragement like "We are with you"
final String title = getTextFromObjectOrThrow(r.getObject("title"), "title");
// usually a phone number
final String action = getTextFromObjectOrThrow(r.getObject("actionText"), "action");
// usually details about the phone number
final String details = getTextFromObjectOrThrow(r.getObject("detailsText"), "details");
// usually the name of an association
final String urlText = getTextFromObjectOrThrow(r.getObject("navigationText"),
"urlText");
metaInfo.setTitle(title);
metaInfo.setContent(new Description(details + "\n" + action, Description.PLAIN_TEXT));
metaInfo.addUrlText(urlText);
// usually the webpage of the association
final String url = getUrlFromNavigationEndpoint(r.getObject("navigationEndpoint"));
if (url == null) {
throw new ParsingException("Could not extract emergency renderer url");
}
try {
metaInfo.addUrl(new URL(replaceHttpWithHttps(url)));
} catch (final MalformedURLException e) {
throw new ParsingException("Could not parse emergency renderer url", e);
}
addMetaInfo.accept(metaInfo);
}
}
}

View File

@ -32,11 +32,10 @@ import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;
import com.grack.nanojson.JsonWriter;
import org.jsoup.nodes.Entities;
import org.jsoup.nodes.Entities;
import org.schabi.newpipe.extractor.Image;
import org.schabi.newpipe.extractor.Image.ResolutionLevel;
import org.schabi.newpipe.extractor.MetaInfo;
import org.schabi.newpipe.extractor.downloader.Response;
import org.schabi.newpipe.extractor.exceptions.AccountTerminatedException;
import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
@ -47,7 +46,6 @@ import org.schabi.newpipe.extractor.localization.ContentCountry;
import org.schabi.newpipe.extractor.localization.Localization;
import org.schabi.newpipe.extractor.playlist.PlaylistInfo;
import org.schabi.newpipe.extractor.stream.AudioTrackType;
import org.schabi.newpipe.extractor.stream.Description;
import org.schabi.newpipe.extractor.utils.JsonUtils;
import org.schabi.newpipe.extractor.utils.Parser;
import org.schabi.newpipe.extractor.utils.RandomStringFromAlphabetGenerator;
@ -62,12 +60,10 @@ import java.time.LocalDate;
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.time.format.DateTimeParseException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Random;
import java.util.Set;
@ -262,7 +258,7 @@ public final class YoutubeParsingHelper {
private static boolean consentAccepted = false;
private static boolean isGoogleURL(final String url) {
public static boolean isGoogleURL(final String url) {
final String cachedUrl = extractCachedUrlIfNeeded(url);
try {
final URL u = new URL(cachedUrl);
@ -1080,6 +1076,16 @@ public final class YoutubeParsingHelper {
.replaceAll(" {2}", " &nbsp;");
}
@Nonnull
public static String getTextFromObjectOrThrow(final JsonObject textObject, final String error)
throws ParsingException {
final String result = getTextFromObject(textObject);
if (result == null) {
throw new ParsingException("Could not extract text: " + error);
}
return result;
}
@Nullable
public static String getTextFromObject(final JsonObject textObject) {
return getTextFromObject(textObject, false);
@ -1648,120 +1654,6 @@ public final class YoutubeParsingHelper {
}
}
@Nonnull
public static List<MetaInfo> getMetaInfo(@Nonnull final JsonArray contents)
throws ParsingException {
final List<MetaInfo> metaInfo = new ArrayList<>();
for (final Object content : contents) {
final JsonObject resultObject = (JsonObject) content;
if (resultObject.has("itemSectionRenderer")) {
for (final Object sectionContentObject
: resultObject.getObject("itemSectionRenderer").getArray("contents")) {
final JsonObject sectionContent = (JsonObject) sectionContentObject;
if (sectionContent.has("infoPanelContentRenderer")) {
metaInfo.add(getInfoPanelContent(sectionContent
.getObject("infoPanelContentRenderer")));
}
if (sectionContent.has("clarificationRenderer")) {
metaInfo.add(getClarificationRendererContent(sectionContent
.getObject("clarificationRenderer")
));
}
}
}
}
return metaInfo;
}
@Nonnull
private static MetaInfo getInfoPanelContent(@Nonnull final JsonObject infoPanelContentRenderer)
throws ParsingException {
final MetaInfo metaInfo = new MetaInfo();
final StringBuilder sb = new StringBuilder();
for (final Object paragraph : infoPanelContentRenderer.getArray("paragraphs")) {
if (sb.length() != 0) {
sb.append("<br>");
}
sb.append(YoutubeParsingHelper.getTextFromObject((JsonObject) paragraph));
}
metaInfo.setContent(new Description(sb.toString(), Description.HTML));
if (infoPanelContentRenderer.has("sourceEndpoint")) {
final String metaInfoLinkUrl = YoutubeParsingHelper.getUrlFromNavigationEndpoint(
infoPanelContentRenderer.getObject("sourceEndpoint"));
try {
metaInfo.addUrl(new URL(Objects.requireNonNull(extractCachedUrlIfNeeded(
metaInfoLinkUrl))));
} catch (final NullPointerException | MalformedURLException e) {
throw new ParsingException("Could not get metadata info URL", e);
}
final String metaInfoLinkText = YoutubeParsingHelper.getTextFromObject(
infoPanelContentRenderer.getObject("inlineSource"));
if (isNullOrEmpty(metaInfoLinkText)) {
throw new ParsingException("Could not get metadata info link text.");
}
metaInfo.addUrlText(metaInfoLinkText);
}
return metaInfo;
}
@Nonnull
private static MetaInfo getClarificationRendererContent(
@Nonnull final JsonObject clarificationRenderer) throws ParsingException {
final MetaInfo metaInfo = new MetaInfo();
final String title = YoutubeParsingHelper.getTextFromObject(clarificationRenderer
.getObject("contentTitle"));
final String text = YoutubeParsingHelper.getTextFromObject(clarificationRenderer
.getObject("text"));
if (title == null || text == null) {
throw new ParsingException("Could not extract clarification renderer content");
}
metaInfo.setTitle(title);
metaInfo.setContent(new Description(text, Description.PLAIN_TEXT));
if (clarificationRenderer.has("actionButton")) {
final JsonObject actionButton = clarificationRenderer.getObject("actionButton")
.getObject("buttonRenderer");
try {
final String url = YoutubeParsingHelper.getUrlFromNavigationEndpoint(actionButton
.getObject("command"));
metaInfo.addUrl(new URL(Objects.requireNonNull(extractCachedUrlIfNeeded(url))));
} catch (final NullPointerException | MalformedURLException e) {
throw new ParsingException("Could not get metadata info URL", e);
}
final String metaInfoLinkText = YoutubeParsingHelper.getTextFromObject(
actionButton.getObject("text"));
if (isNullOrEmpty(metaInfoLinkText)) {
throw new ParsingException("Could not get metadata info link text.");
}
metaInfo.addUrlText(metaInfoLinkText);
}
if (clarificationRenderer.has("secondaryEndpoint") && clarificationRenderer
.has("secondarySource")) {
final String url = getUrlFromNavigationEndpoint(clarificationRenderer
.getObject("secondaryEndpoint"));
// Ignore Google URLs, because those point to a Google search about "Covid-19"
if (url != null && !isGoogleURL(url)) {
try {
metaInfo.addUrl(new URL(url));
final String description = getTextFromObject(clarificationRenderer
.getObject("secondarySource"));
metaInfo.addUrlText(description == null ? url : description);
} catch (final MalformedURLException e) {
throw new ParsingException("Could not get metadata info secondary URL", e);
}
}
}
return metaInfo;
}
/**
* Sometimes, YouTube provides URLs which use Google's cache. They look like
* {@code https://webcache.googleusercontent.com/search?q=cache:CACHED_URL}

View File

@ -30,7 +30,7 @@ import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler;
import org.schabi.newpipe.extractor.localization.Localization;
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.search.SearchExtractor;
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.services.youtube.YoutubeMetaInfoHelper;
import org.schabi.newpipe.extractor.utils.JsonUtils;
import java.io.IOException;
@ -151,7 +151,7 @@ public class YoutubeSearchExtractor extends SearchExtractor {
@Nonnull
@Override
public List<MetaInfo> getMetaInfo() throws ParsingException {
return YoutubeParsingHelper.getMetaInfo(
return YoutubeMetaInfoHelper.getMetaInfo(
initialData.getObject("contents")
.getObject("twoColumnSearchResultsRenderer")
.getObject("primaryContents")

View File

@ -67,6 +67,7 @@ import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.localization.TimeAgoPatternsManager;
import org.schabi.newpipe.extractor.services.youtube.ItagItem;
import org.schabi.newpipe.extractor.services.youtube.YoutubeJavaScriptPlayerManager;
import org.schabi.newpipe.extractor.services.youtube.YoutubeMetaInfoHelper;
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
import org.schabi.newpipe.extractor.stream.AudioStream;
@ -1592,7 +1593,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
@Nonnull
@Override
public List<MetaInfo> getMetaInfo() throws ParsingException {
return YoutubeParsingHelper.getMetaInfo(nextResponse
return YoutubeMetaInfoHelper.getMetaInfo(nextResponse
.getObject("contents")
.getObject("twoColumnWatchNextResults")
.getObject("results")

View File

@ -446,5 +446,15 @@ public class YoutubeSearchExtractorTest {
@Override public String expectedOriginalUrlContains() throws Exception { return "youtube.com/results?search_query=" + Utils.encodeUrlUtf8(QUERY); }
@Override public String expectedSearchString() { return QUERY; }
@Nullable @Override public String expectedSearchSuggestion() { return null; }
@Test
@Override
public void testMetaInfo() throws Exception {
final List<MetaInfo> metaInfoList = extractor().getMetaInfo();
// the meta info will have different text and language depending on where in the world
// the connection is established from, so we can't check the actual content
assertEquals(1, metaInfoList.size());
}
}
}