From 2123b3abd3ad9bae4cb9b7069d03d737c7a213e0 Mon Sep 17 00:00:00 2001 From: Zsombor Gegesy Date: Thu, 21 Nov 2019 23:03:14 +0100 Subject: [PATCH 1/4] Fix search result paging - due to the way as the InfoItemsSearchCollector are re-used, the returned item list just grows, which cause that same videos are returned. --- .../services/media_ccc/extractors/MediaCCCSearchExtractor.java | 1 + .../extractor/services/soundcloud/SoundcloudSearchExtractor.java | 1 + .../services/youtube/extractors/YoutubeSearchExtractor.java | 1 + 3 files changed, 3 insertions(+) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/extractors/MediaCCCSearchExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/extractors/MediaCCCSearchExtractor.java index eab97ce32..429445f24 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/extractors/MediaCCCSearchExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/extractors/MediaCCCSearchExtractor.java @@ -48,6 +48,7 @@ public class MediaCCCSearchExtractor extends SearchExtractor { @Override public InfoItemsPage getInitialPage() throws IOException, ExtractionException { InfoItemsSearchCollector searchItems = getInfoItemSearchCollector(); + searchItems.reset(); if(getLinkHandler().getContentFilters().contains(CONFERENCES) || getLinkHandler().getContentFilters().contains(ALL) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudSearchExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudSearchExtractor.java index 97e69dc6f..9ed340fa1 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudSearchExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudSearchExtractor.java @@ -76,6 +76,7 @@ public class SoundcloudSearchExtractor extends SearchExtractor { private InfoItemsCollector collectItems(JsonArray searchCollection) { final InfoItemsSearchCollector collector = getInfoItemSearchCollector(); + collector.reset(); for (Object result : searchCollection) { if (!(result instanceof JsonObject)) continue; diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java index f6236e93a..2e3aad320 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java @@ -106,6 +106,7 @@ public class YoutubeSearchExtractor extends SearchExtractor { private InfoItemsSearchCollector collectItems(Document doc) throws NothingFoundException { InfoItemsSearchCollector collector = getInfoItemSearchCollector(); + collector.reset(); Element list = doc.select("ol[class=\"item-section\"]").first(); final TimeAgoParser timeAgoParser = getTimeAgoParser(); From 68b0fd9650b5cd459e142bbf175d48b9cbcfd39f Mon Sep 17 00:00:00 2001 From: Zsombor Gegesy Date: Wed, 4 Dec 2019 23:57:44 +0100 Subject: [PATCH 2/4] Add test for search paging --- .../search/YoutubeSearchPagingTest.java | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchPagingTest.java diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchPagingTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchPagingTest.java new file mode 100644 index 000000000..b4596633d --- /dev/null +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchPagingTest.java @@ -0,0 +1,78 @@ +package org.schabi.newpipe.extractor.services.youtube.search; + +import static java.util.Collections.singletonList; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.schabi.newpipe.extractor.ServiceList.YouTube; + +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.junit.BeforeClass; +import org.junit.Test; +import org.schabi.newpipe.DownloaderTestImpl; +import org.schabi.newpipe.extractor.InfoItem; +import org.schabi.newpipe.extractor.ListExtractor; +import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSearchExtractor; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeSearchQueryHandlerFactory; + +public class YoutubeSearchPagingTest { + private static ListExtractor.InfoItemsPage page1; + private static ListExtractor.InfoItemsPage page2; + private static Set urlList1; + private static Set urlList2; + + @BeforeClass + public static void setUpClass() throws Exception { + NewPipe.init(DownloaderTestImpl.getInstance()); + + YoutubeSearchExtractor extractor = (YoutubeSearchExtractor) YouTube.getSearchExtractor("cirque du soleil", + singletonList(YoutubeSearchQueryHandlerFactory.VIDEOS), null); + + extractor.fetchPage(); + page1 = extractor.getInitialPage(); + urlList1 = extractUrls(page1.getItems()); + assertEquals("page with items loaded", 20, page1.getItems().size()); + assertEquals("distinct videos", 20, urlList1.size()); + + assertTrue("has more page", page1.hasNextPage()); + assertNotNull("has next page url", page1.getNextPageUrl()); + page2 = extractor.getPage(page1.getNextPageUrl()); + urlList2 = extractUrls(page2.getItems()); + } + + private static Set extractUrls(List list) { + Set result = new HashSet<>(); + for (InfoItem item : list) { + result.add(item.getUrl()); + } + return result; + } + + @Test + public void firstPageOk() { + assertEquals("page with items loaded", 20, page1.getItems().size()); + assertEquals("distinct videos", 20, urlList1.size()); + } + + @Test + public void secondPageLength() { + assertEquals("one page", 20, page2.getItems().size()); + } + + @Test + public void secondPageUniqueVideos() { + assertEquals("distinct videos", 20, urlList2.size()); + } + + @Test + public void noRepeatingVideosInPages() { + Set intersection = new HashSet<>(urlList2); + intersection.retainAll(urlList1); + assertEquals("empty intersection", 0, intersection.size()); + } + +} \ No newline at end of file From 64729e535723ccbc15b656d6f732a79ea97b388d Mon Sep 17 00:00:00 2001 From: Zsombor Gegesy Date: Sat, 7 Dec 2019 22:58:01 +0100 Subject: [PATCH 3/4] Improve the tests --- .../search/YoutubeSearchPagingTest.java | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchPagingTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchPagingTest.java index b4596633d..50776abc9 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchPagingTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchPagingTest.java @@ -24,6 +24,7 @@ public class YoutubeSearchPagingTest { private static ListExtractor.InfoItemsPage page2; private static Set urlList1; private static Set urlList2; + private static int pageSize; @BeforeClass public static void setUpClass() throws Exception { @@ -35,10 +36,11 @@ public class YoutubeSearchPagingTest { extractor.fetchPage(); page1 = extractor.getInitialPage(); urlList1 = extractUrls(page1.getItems()); - assertEquals("page with items loaded", 20, page1.getItems().size()); - assertEquals("distinct videos", 20, urlList1.size()); + assertTrue("page with items loaded", 15 < page1.getItems().size()); + pageSize = page1.getItems().size(); + assertEquals("they are all distinct, no repetition", pageSize, urlList1.size()); - assertTrue("has more page", page1.hasNextPage()); + assertTrue("has more than one page of results", page1.hasNextPage()); assertNotNull("has next page url", page1.getNextPageUrl()); page2 = extractor.getPage(page1.getNextPageUrl()); urlList2 = extractUrls(page2.getItems()); @@ -54,25 +56,25 @@ public class YoutubeSearchPagingTest { @Test public void firstPageOk() { - assertEquals("page with items loaded", 20, page1.getItems().size()); - assertEquals("distinct videos", 20, urlList1.size()); + assertTrue("first page contains the expected number of items", 15 < page1.getItems().size()); + assertEquals("they are all distinct, no repetition", pageSize, urlList1.size()); } @Test public void secondPageLength() { - assertEquals("one page", 20, page2.getItems().size()); + assertEquals("second page contains only the expected number of items", pageSize, page2.getItems().size()); } @Test public void secondPageUniqueVideos() { - assertEquals("distinct videos", 20, urlList2.size()); + assertEquals("they are all distinct, no repetition", pageSize, urlList2.size()); } @Test public void noRepeatingVideosInPages() { Set intersection = new HashSet<>(urlList2); intersection.retainAll(urlList1); - assertEquals("empty intersection", 0, intersection.size()); + assertEquals("Found a duplicated video on second search page", 0, intersection.size()); } } \ No newline at end of file From 0b6e37e71f6679cecf983f5be65e6a16bf7fe525 Mon Sep 17 00:00:00 2001 From: TobiGr Date: Tue, 31 Dec 2019 00:18:53 +0100 Subject: [PATCH 4/4] Improve YouTubeSearchPagingtest --- .../search/YoutubeSearchPagingTest.java | 29 +++++++------------ 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchPagingTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchPagingTest.java index 50776abc9..7df7b0cb3 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchPagingTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchPagingTest.java @@ -24,7 +24,8 @@ public class YoutubeSearchPagingTest { private static ListExtractor.InfoItemsPage page2; private static Set urlList1; private static Set urlList2; - private static int pageSize; + private static int page1Size; + private static int page2Size; @BeforeClass public static void setUpClass() throws Exception { @@ -36,14 +37,15 @@ public class YoutubeSearchPagingTest { extractor.fetchPage(); page1 = extractor.getInitialPage(); urlList1 = extractUrls(page1.getItems()); - assertTrue("page with items loaded", 15 < page1.getItems().size()); - pageSize = page1.getItems().size(); - assertEquals("they are all distinct, no repetition", pageSize, urlList1.size()); + assertTrue("failed to load search result page one: too few items", 15 < page1.getItems().size()); + page1Size = page1.getItems().size(); + assertEquals("duplicated items in search result on page one", page1Size, urlList1.size()); - assertTrue("has more than one page of results", page1.hasNextPage()); - assertNotNull("has next page url", page1.getNextPageUrl()); + assertTrue("search result has no second page", page1.hasNextPage()); + assertNotNull("next page url is null", page1.getNextPageUrl()); page2 = extractor.getPage(page1.getNextPageUrl()); urlList2 = extractUrls(page2.getItems()); + page2Size = page2.getItems().size(); } private static Set extractUrls(List list) { @@ -54,27 +56,16 @@ public class YoutubeSearchPagingTest { return result; } - @Test - public void firstPageOk() { - assertTrue("first page contains the expected number of items", 15 < page1.getItems().size()); - assertEquals("they are all distinct, no repetition", pageSize, urlList1.size()); - } - - @Test - public void secondPageLength() { - assertEquals("second page contains only the expected number of items", pageSize, page2.getItems().size()); - } - @Test public void secondPageUniqueVideos() { - assertEquals("they are all distinct, no repetition", pageSize, urlList2.size()); + assertEquals("Second search result page has duplicated items", page2Size, urlList2.size()); } @Test public void noRepeatingVideosInPages() { Set intersection = new HashSet<>(urlList2); intersection.retainAll(urlList1); - assertEquals("Found a duplicated video on second search page", 0, intersection.size()); + assertEquals("Found the same item on first AND second search page", 0, intersection.size()); } } \ No newline at end of file