From cc9ade962e89418548324899f36176dae27bcd04 Mon Sep 17 00:00:00 2001 From: Stypox Date: Sat, 30 Dec 2023 22:53:27 +0100 Subject: [PATCH 1/3] [MediaCCC] Allow obtaining channel tab extractor from scratch i.e. without needing to pass through the conference/channel extractor This was needed because clients (like NewPipe) might rely on link handlers to hold as little data as possible, since they might be kept around for long or passed around in system transactions, so this commit allows obtaining a standalone link handler that does not hold a JsonObject within itself. --- .../services/media_ccc/MediaCCCService.java | 19 ++-- .../MediaCCCChannelTabExtractor.java | 69 +++++++++++++++ .../MediaCCCConferenceExtractor.java | 87 ++++--------------- .../MediaCCCConferenceLinkHandlerFactory.java | 17 ++++ 4 files changed, 114 insertions(+), 78 deletions(-) create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/extractors/MediaCCCChannelTabExtractor.java diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/MediaCCCService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/MediaCCCService.java index 3e3a4726e..78d6e4093 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/MediaCCCService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/MediaCCCService.java @@ -19,6 +19,7 @@ import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler; import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandlerFactory; import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; import org.schabi.newpipe.extractor.search.SearchExtractor; +import org.schabi.newpipe.extractor.services.media_ccc.extractors.MediaCCCChannelTabExtractor; import org.schabi.newpipe.extractor.services.media_ccc.extractors.MediaCCCConferenceExtractor; import org.schabi.newpipe.extractor.services.media_ccc.extractors.MediaCCCConferenceKiosk; import org.schabi.newpipe.extractor.services.media_ccc.extractors.MediaCCCLiveStreamExtractor; @@ -57,7 +58,9 @@ public class MediaCCCService extends StreamingService { @Override public ListLinkHandlerFactory getChannelTabLHFactory() { - return null; + // there is just one channel tab in MediaCCC, the one containing conferences, so there is + // no need for a specific channel tab link handler, but we can just use the channel one + return MediaCCCConferenceLinkHandlerFactory.getInstance(); } @Override @@ -86,17 +89,13 @@ public class MediaCCCService extends StreamingService { @Override public ChannelTabExtractor getChannelTabExtractor(final ListLinkHandler linkHandler) { if (linkHandler instanceof ReadyChannelTabListLinkHandler) { + // conference data has already been fetched, let the ReadyChannelTabListLinkHandler + // create a MediaCCCChannelTabExtractor with that data return ((ReadyChannelTabListLinkHandler) linkHandler).getChannelTabExtractor(this); + } else { + // conference data has not been fetched yet, so pass null instead + return new MediaCCCChannelTabExtractor(this, linkHandler, null); } - - /* - Channel tab extractors are only supported in conferences and should only come from a - ReadyChannelTabListLinkHandler instance with a ChannelTabExtractorBuilder instance of the - conferences extractor - - If that's not the case, return null in this case, so no channel tabs support - */ - return null; } @Override diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/extractors/MediaCCCChannelTabExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/extractors/MediaCCCChannelTabExtractor.java new file mode 100644 index 000000000..aef9daabf --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/extractors/MediaCCCChannelTabExtractor.java @@ -0,0 +1,69 @@ +package org.schabi.newpipe.extractor.services.media_ccc.extractors; + +import com.grack.nanojson.JsonObject; + +import org.schabi.newpipe.extractor.InfoItem; +import org.schabi.newpipe.extractor.ListExtractor; +import org.schabi.newpipe.extractor.MultiInfoItemsCollector; +import org.schabi.newpipe.extractor.Page; +import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.channel.tabs.ChannelTabExtractor; +import org.schabi.newpipe.extractor.downloader.Downloader; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; +import org.schabi.newpipe.extractor.services.media_ccc.extractors.infoItems.MediaCCCStreamInfoItemExtractor; + +import java.io.IOException; +import java.util.Objects; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +/** + * MediaCCC does not really have channel tabs, but rather a list of videos for each conference, + * so this class just acts as a videos channel tab extractor. + */ +public class MediaCCCChannelTabExtractor extends ChannelTabExtractor { + @Nullable + private JsonObject conferenceData; + + /** + * @param conferenceData will be not-null if conference data has already been fetched by + * {@link MediaCCCConferenceExtractor}. Otherwise, if this parameter is + * {@code null}, conference data will be fetched anew. + */ + public MediaCCCChannelTabExtractor(final StreamingService service, + final ListLinkHandler linkHandler, + @Nullable final JsonObject conferenceData) { + super(service, linkHandler); + this.conferenceData = conferenceData; + } + + @Override + public void onFetchPage(@Nonnull final Downloader downloader) + throws ExtractionException, IOException { + if (conferenceData == null) { + // only fetch conference data if we don't have it already + conferenceData = MediaCCCConferenceExtractor.fetchConferenceData(downloader, getId()); + } + } + + @Nonnull + @Override + public ListExtractor.InfoItemsPage getInitialPage() { + final MultiInfoItemsCollector collector = + new MultiInfoItemsCollector(getServiceId()); + Objects.requireNonNull(conferenceData) // will surely be != null after onFetchPage + .getArray("events") + .stream() + .filter(JsonObject.class::isInstance) + .map(JsonObject.class::cast) + .forEach(event -> collector.commit(new MediaCCCStreamInfoItemExtractor(event))); + return new ListExtractor.InfoItemsPage<>(collector, null); + } + + @Override + public ListExtractor.InfoItemsPage getPage(final Page page) { + return ListExtractor.InfoItemsPage.emptyPage(); + } +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/extractors/MediaCCCConferenceExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/extractors/MediaCCCConferenceExtractor.java index e4574cb88..aab587bf2 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/extractors/MediaCCCConferenceExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/extractors/MediaCCCConferenceExtractor.java @@ -1,24 +1,20 @@ package org.schabi.newpipe.extractor.services.media_ccc.extractors; +import static org.schabi.newpipe.extractor.services.media_ccc.extractors.MediaCCCParsingHelper.getImageListFromLogoImageUrl; + import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParserException; -import org.schabi.newpipe.extractor.InfoItem; -import org.schabi.newpipe.extractor.ListExtractor; -import org.schabi.newpipe.extractor.MultiInfoItemsCollector; import org.schabi.newpipe.extractor.Image; -import org.schabi.newpipe.extractor.Page; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.channel.ChannelExtractor; -import org.schabi.newpipe.extractor.channel.tabs.ChannelTabExtractor; import org.schabi.newpipe.extractor.channel.tabs.ChannelTabs; import org.schabi.newpipe.extractor.downloader.Downloader; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; import org.schabi.newpipe.extractor.linkhandler.ReadyChannelTabListLinkHandler; -import org.schabi.newpipe.extractor.services.media_ccc.extractors.infoItems.MediaCCCStreamInfoItemExtractor; import org.schabi.newpipe.extractor.services.media_ccc.linkHandler.MediaCCCConferenceLinkHandlerFactory; import java.io.IOException; @@ -27,8 +23,6 @@ import java.util.List; import javax.annotation.Nonnull; -import static org.schabi.newpipe.extractor.services.media_ccc.extractors.MediaCCCParsingHelper.getImageListFromLogoImageUrl; - public class MediaCCCConferenceExtractor extends ChannelExtractor { private JsonObject conferenceData; @@ -37,6 +31,19 @@ public class MediaCCCConferenceExtractor extends ChannelExtractor { super(service, linkHandler); } + static JsonObject fetchConferenceData(@Nonnull final Downloader downloader, + @Nonnull final String conferenceId) + throws IOException, ExtractionException { + final String conferenceUrl + = MediaCCCConferenceLinkHandlerFactory.CONFERENCE_API_ENDPOINT + conferenceId; + try { + return JsonParser.object().from(downloader.get(conferenceUrl).responseBody()); + } catch (final JsonParserException jpe) { + throw new ExtractionException("Could not parse json returned by URL: " + conferenceUrl); + } + } + + @Nonnull @Override public List getAvatars() { @@ -88,20 +95,15 @@ public class MediaCCCConferenceExtractor extends ChannelExtractor { @Nonnull @Override public List getTabs() throws ParsingException { - return List.of(new ReadyChannelTabListLinkHandler(getUrl(), getId(), - ChannelTabs.VIDEOS, new VideosTabExtractorBuilder(conferenceData))); + return List.of(new ReadyChannelTabListLinkHandler(getUrl(), getId(), ChannelTabs.VIDEOS, + (service, linkHandler) -> + new MediaCCCChannelTabExtractor(service, linkHandler, conferenceData))); } @Override public void onFetchPage(@Nonnull final Downloader downloader) throws IOException, ExtractionException { - final String conferenceUrl - = MediaCCCConferenceLinkHandlerFactory.CONFERENCE_API_ENDPOINT + getId(); - try { - conferenceData = JsonParser.object().from(downloader.get(conferenceUrl).responseBody()); - } catch (final JsonParserException jpe) { - throw new ExtractionException("Could not parse json returned by URL: " + conferenceUrl); - } + conferenceData = fetchConferenceData(downloader, getId()); } @Nonnull @@ -109,55 +111,4 @@ public class MediaCCCConferenceExtractor extends ChannelExtractor { public String getName() throws ParsingException { return conferenceData.getString("title"); } - - private static final class VideosTabExtractorBuilder - implements ReadyChannelTabListLinkHandler.ChannelTabExtractorBuilder { - - private final JsonObject conferenceData; - - VideosTabExtractorBuilder(final JsonObject conferenceData) { - this.conferenceData = conferenceData; - } - - @Nonnull - @Override - public ChannelTabExtractor build(@Nonnull final StreamingService service, - @Nonnull final ListLinkHandler linkHandler) { - return new VideosChannelTabExtractor(service, linkHandler, conferenceData); - } - } - - private static final class VideosChannelTabExtractor extends ChannelTabExtractor { - private final JsonObject conferenceData; - - VideosChannelTabExtractor(final StreamingService service, - final ListLinkHandler linkHandler, - final JsonObject conferenceData) { - super(service, linkHandler); - this.conferenceData = conferenceData; - } - - @Override - public void onFetchPage(@Nonnull final Downloader downloader) { - // Nothing to do here, as data was already fetched - } - - @Nonnull - @Override - public ListExtractor.InfoItemsPage getInitialPage() { - final MultiInfoItemsCollector collector = - new MultiInfoItemsCollector(getServiceId()); - conferenceData.getArray("events") - .stream() - .filter(JsonObject.class::isInstance) - .map(JsonObject.class::cast) - .forEach(event -> collector.commit(new MediaCCCStreamInfoItemExtractor(event))); - return new InfoItemsPage<>(collector, null); - } - - @Override - public InfoItemsPage getPage(final Page page) { - return InfoItemsPage.emptyPage(); - } - } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/linkHandler/MediaCCCConferenceLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/linkHandler/MediaCCCConferenceLinkHandlerFactory.java index b6f88c331..edef17d03 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/linkHandler/MediaCCCConferenceLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/linkHandler/MediaCCCConferenceLinkHandlerFactory.java @@ -1,11 +1,17 @@ package org.schabi.newpipe.extractor.services.media_ccc.linkHandler; +import org.schabi.newpipe.extractor.channel.tabs.ChannelTabs; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; import org.schabi.newpipe.extractor.utils.Parser; import java.util.List; +/** + * Since MediaCCC does not really have channel tabs (i.e. it only has one single "tab" with videos), + * this link handler acts both as the channel link handler and the channel tab link handler. That's + * why {@link #getAvailableContentFilter()} has been overridden. + */ public final class MediaCCCConferenceLinkHandlerFactory extends ListLinkHandlerFactory { private static final MediaCCCConferenceLinkHandlerFactory INSTANCE @@ -46,4 +52,15 @@ public final class MediaCCCConferenceLinkHandlerFactory extends ListLinkHandlerF return false; } } + + /** + * @see MediaCCCConferenceLinkHandlerFactory + * @return MediaCCC's only channel "tab", i.e. {@link ChannelTabs#VIDEOS} + */ + @Override + public String[] getAvailableContentFilter() { + return new String[]{ + ChannelTabs.VIDEOS, + }; + } } From 137e924035798df37f1e3fdf16ab1105b6dd5590 Mon Sep 17 00:00:00 2001 From: Stypox Date: Sat, 30 Dec 2023 22:53:51 +0100 Subject: [PATCH 2/3] [MediaCCC] Add ChannelTabExtractorTest --- .../MediaCCCChannelTabExtractorTest.java | 49 +++++++++++++++++++ .../MediaCCCConferenceExtractorTest.java | 3 +- 2 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 extractor/src/test/java/org/schabi/newpipe/extractor/services/media_ccc/MediaCCCChannelTabExtractorTest.java diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/media_ccc/MediaCCCChannelTabExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/media_ccc/MediaCCCChannelTabExtractorTest.java new file mode 100644 index 000000000..77a4dafec --- /dev/null +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/media_ccc/MediaCCCChannelTabExtractorTest.java @@ -0,0 +1,49 @@ +package org.schabi.newpipe.extractor.services.media_ccc; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.schabi.newpipe.extractor.ServiceList.MediaCCC; + +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.schabi.newpipe.downloader.DownloaderTestImpl; +import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.channel.tabs.ChannelTabExtractor; +import org.schabi.newpipe.extractor.channel.tabs.ChannelTabs; + +/** + * Test that it is possible to create and use a channel tab extractor ({@link + * org.schabi.newpipe.extractor.services.media_ccc.extractors.MediaCCCChannelTabExtractor}) without + * passing through the conference extractor + */ +public class MediaCCCChannelTabExtractorTest { + public static class CCCamp2023 { + private static ChannelTabExtractor extractor; + + @BeforeAll + public static void setUpClass() throws Exception { + NewPipe.init(DownloaderTestImpl.getInstance()); + extractor = MediaCCC.getChannelTabExtractorFromId("camp2023", ChannelTabs.VIDEOS); + extractor.fetchPage(); + } + + @Test + void testName() { + assertEquals(ChannelTabs.VIDEOS, extractor.getName()); + } + + @Test + void testGetUrl() throws Exception { + assertEquals("https://media.ccc.de/c/camp2023", extractor.getUrl()); + } + + @Test + void testGetOriginalUrl() throws Exception { + assertEquals("https://media.ccc.de/c/camp2023", extractor.getOriginalUrl()); + } + + @Test + void testGetInitalPage() throws Exception { + assertEquals(177, extractor.getInitialPage().getItems().size()); + } + } +} diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/media_ccc/MediaCCCConferenceExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/media_ccc/MediaCCCConferenceExtractorTest.java index 86561c971..efee349e6 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/media_ccc/MediaCCCConferenceExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/media_ccc/MediaCCCConferenceExtractorTest.java @@ -13,7 +13,8 @@ import static org.schabi.newpipe.extractor.ExtractorAsserts.assertContainsImageU import static org.schabi.newpipe.extractor.ServiceList.MediaCCC; /** - * Test {@link MediaCCCConferenceExtractor} + * Test {@link MediaCCCConferenceExtractor} and {@link + * org.schabi.newpipe.extractor.services.media_ccc.extractors.MediaCCCChannelTabExtractor} */ public class MediaCCCConferenceExtractorTest { public static class FrOSCon2017 { From aaf3231fc75d7b4177549fec4aa7e672bfe84015 Mon Sep 17 00:00:00 2001 From: Stypox Date: Sat, 30 Dec 2023 23:23:19 +0100 Subject: [PATCH 3/3] [MediaCCC] Fix lambda link handler keeping reference to extractor This caused problems in NewPipe, because extractors are not serializable, and well, keeping references to them is a bad idea anyway. --- .../media_ccc/extractors/MediaCCCConferenceExtractor.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/extractors/MediaCCCConferenceExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/extractors/MediaCCCConferenceExtractor.java index aab587bf2..30cab066f 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/extractors/MediaCCCConferenceExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/extractors/MediaCCCConferenceExtractor.java @@ -95,9 +95,11 @@ public class MediaCCCConferenceExtractor extends ChannelExtractor { @Nonnull @Override public List getTabs() throws ParsingException { + // avoid keeping a reference to MediaCCCConferenceExtractor inside the lambda + final JsonObject theConferenceData = conferenceData; return List.of(new ReadyChannelTabListLinkHandler(getUrl(), getId(), ChannelTabs.VIDEOS, (service, linkHandler) -> - new MediaCCCChannelTabExtractor(service, linkHandler, conferenceData))); + new MediaCCCChannelTabExtractor(service, linkHandler, theConferenceData))); } @Override