From f00d9e0ec4b307fb62bd5effc7458fcfdda8fda5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=A9mi=20V=C3=A1nyi?= Date: Mon, 6 Jun 2022 00:01:27 +0200 Subject: [PATCH] Pick minor fixes from searxng (#3251) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [fix] Rename ccengine engine to openverse The CC engine was merged with WordPress and renamed to Openverse Source: https://wordpress.org/news/2021/05/welcome-to-openverse/ * [fix] ccengine engine - avoid unwanted redirects api.openverse.engineering is a little picky and wants to have a trailing slash in the path: /v1/images? -->/ v1/images/? otherwise it redirects, here is the debug log: DEBUG searx.network.openverse : HTTP Request: GET https://api.openverse.engineering/v1/images?&page=1&page_size=20&format=json&q=foo "HTTP/2 301 Moved Permanently" (text/html; charset=utf-8) DEBUG searx.network.openverse : HTTP Request: GET https://api.openverse.engineering/v1/images/?&page=1&page_size=20&format=json&q=foo "HTTP/2 200 OK" (application/json) WARNING searx.engines.openverse : ErrorContext('searx/search/processors/online.py', 105, 'count_error(', None, '1 redirects, maximum: 0', ('200', 'OK', 'api.openverse.engineering')) True Signed-off-by: Markus Heiser * [fix] FutureWarning from lxml Just in case if content is None, the original code will skip extract_text(), and just append the None value to 'content'. So just add allow_none=True, and this will return None without raising a ValueError in extract_text(). * [enh] Add pagination to Brave Also added ```&spellcheck=1``` because now it is disabled by default, not returning any ```suggestion_xpath```. Co-authored-by: Léon Tiekötter Co-authored-by: Markus Heiser Co-authored-by: capric98 <42015599+capric98@users.noreply.github.com> Co-authored-by: Allen <64094914+allendema@users.noreply.github.com> --- searx/engines/{ccengine.py => openverse.py} | 6 +++--- searx/engines/yahoo.py | 3 +-- searx/settings.yml | 13 +++++++------ 3 files changed, 11 insertions(+), 11 deletions(-) rename searx/engines/{ccengine.py => openverse.py} (84%) diff --git a/searx/engines/ccengine.py b/searx/engines/openverse.py similarity index 84% rename from searx/engines/ccengine.py rename to searx/engines/openverse.py index 6f3a5adb..e3fc0827 100644 --- a/searx/engines/ccengine.py +++ b/searx/engines/openverse.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: AGPL-3.0-or-later """ - Creative Commons search engine (Images) + Openverse (formerly known as: Creative Commons search engine) [Images] """ @@ -23,8 +23,8 @@ categories = ['images'] paging = True nb_per_page = 20 -base_url = 'https://api.creativecommons.engineering/v1/images?' -search_string = '&page={page}&page_size={nb_per_page}&format=json&{query}' +base_url = 'https://api.openverse.engineering/v1/images/' +search_string = '?page={page}&page_size={nb_per_page}&format=json&{query}' def request(query, params): diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index 591dd501..a6b77904 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -135,8 +135,7 @@ def response(resp): content = eval_xpath_getindex( result, './/div[contains(@class, "compText")]', 0, default='' ) - if content: - content = extract_text(content) + content = extract_text(content, allow_none=True) # append result results.append({'url': url, diff --git a/searx/settings.yml b/searx/settings.yml index ff4c6593..cc955311 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -295,10 +295,10 @@ engines: require_api_key: false results: HTML - - name : ccengine - engine : ccengine - categories : images - shortcut : cce + - name: openverse + engine: openverse + categories: images + shortcut: opv # - name : core.ac.uk # engine : core @@ -1638,8 +1638,9 @@ engines: - name: brave shortcut: brave engine: xpath - paging: false - search_url: https://search.brave.com/search?q={query} + paging: true + first_page_num: 0 + search_url: https://search.brave.com/search?q={query}&offset={pageno}&spellcheck=1 url_xpath: //div[@class="snippet fdb"]/a/@href title_xpath: //span[@class="snippet-title"] content_xpath: //p[1][@class="snippet-description"]