From b0888c6ca3527464332cd3cbbe50d04798951254 Mon Sep 17 00:00:00 2001 From: Allen <64094914+allendema@users.noreply.github.com> Date: Sun, 10 Oct 2021 20:52:06 +0200 Subject: [PATCH] [enh] Add Pagination to Wiby / Fix Kaufland (#3000) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [fix] Fix Kaufland engine Changed Xpath expressions * [enh] Remove tracking params from Kaufland results * [enh] Add pagination to Wiby * [fix] Properly select title_xpath Co-authored-by: Noémi Ványi --- searx/plugins/tracker_url_remover.py | 2 +- searx/settings.yml | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/searx/plugins/tracker_url_remover.py b/searx/plugins/tracker_url_remover.py index 98ddddbc..e8735a21 100644 --- a/searx/plugins/tracker_url_remover.py +++ b/searx/plugins/tracker_url_remover.py @@ -21,7 +21,7 @@ from urllib.parse import urlunparse, parse_qsl, urlencode regexes = {re.compile(r'utm_[^&]+'), re.compile(r'(wkey|wemail)[^&]*'), - re.compile(r'(_hsenc|_hsmi|hsCtaTracking|__hssc|__hstc|__hsfp)[^&]*'), + re.compile(r'(_hsenc|_hsmi|hsCtaTracking|__hssc|__hstc|__hsfp|search_value)[^&]*'), re.compile(r'&$')} name = gettext('Tracker URL remover') diff --git a/searx/settings.yml b/searx/settings.yml index 79e0faaf..564d774d 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1231,7 +1231,8 @@ engines: - name : wiby engine : json_engine - search_url : https://wiby.me/json/?q={query} + paging : True + search_url : https://wiby.me/json/?q={query}&o={pageno}0 url_query : URL title_query : Title content_query : Snippet @@ -1533,8 +1534,8 @@ engines: timeout : 3.0 paging : True search_url : https://www.kaufland.de/item/search/?search_value={query}&page={pageno} - title_xpath : //li[@class="product__title"] - url_xpath : //div[@class="product"]//a/@href + title_xpath : //div[@class="product__title"]/text() + url_xpath : //article[@class="product"]//a/@href content_xpath : //div[@class="price"] #thumbnail_xpath : '//div[@class="product__image-container"]//img/@data-src' categories : general