From 8e00249633165772ed705375782ed806ce5d4d06 Mon Sep 17 00:00:00 2001 From: Israel Yago Pereira Date: Fri, 29 Oct 2021 17:02:14 -0300 Subject: [PATCH 1/7] WIP: onesearch engine --- searx/settings.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/searx/settings.yml b/searx/settings.yml index 591c819d..0f79041a 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1621,6 +1621,22 @@ engines: require_api_key: false results: HTML + - name: onesearch + shortcut: onesearch + engine: xpath + paging: false + search_url: https://www.onesearch.com/yhs/search;?p={query} + url_xpath: //div[contains(@class, "algo")]//h3[contains(@class, "title")]/a/@href + title_xpath: //div[contains(@class, "algo")]//h3[contains(@class, "title")] + content_xpath: //div[contains(@class, "algo")]/div[contains(@class, "compText")]/p//text() + categories: general + about: + website: https://www.onesearch.com/ + wikidata_id: None + use_official_api: false + require_api_key: false + results: HTML + # Doku engine lets you access to any Doku wiki instance: # A public one or a private/corporate one. # - name : ubuntuwiki From 258c6fbd5a8b60a848c65c5b3276d382b67459ed Mon Sep 17 00:00:00 2001 From: Israel Yago Pereira Date: Mon, 1 Nov 2021 16:17:01 -0300 Subject: [PATCH 2/7] Onesearch engine without pagination --- searx/engines/onesearch.py | 56 ++++++++++++++++++++++++++++++++++++++ searx/settings.yml | 6 +--- 2 files changed, 57 insertions(+), 5 deletions(-) create mode 100644 searx/engines/onesearch.py diff --git a/searx/engines/onesearch.py b/searx/engines/onesearch.py new file mode 100644 index 00000000..19fabe42 --- /dev/null +++ b/searx/engines/onesearch.py @@ -0,0 +1,56 @@ + +"""Onesearch +""" + +from lxml.html import fromstring + +import re + +from searx.utils import ( + eval_xpath, + extract_text, +) + +from urllib.parse import unquote + +# about +about = { + "website": 'https://www.onesearch.com/', + "wikidata_id": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + +# engine dependent config +categories = ['general'] + +# search-url +URL = 'https://www.onesearch.com/yhs/search;?p=%s' + +def request(query, params): + params['url'] = URL % query + return params + + +# get response from search-request +def response(resp): + + results = [] + doc = fromstring(resp.text) + + titles_tags = eval_xpath(doc, '//div[contains(@class, "algo")]//h3[contains(@class, "title")]') + contents = eval_xpath(doc, '//div[contains(@class, "algo")]/div[contains(@class, "compText")]/p') + onesearch_urls = eval_xpath(doc, '//div[contains(@class, "algo")]//h3[contains(@class, "title")]/a/@href') + + for title_tag, content, onesearch_url in zip(titles_tags, contents, onesearch_urls): + print(f"{title_tag.text_content()} ---> {onesearch_url}") + matches = re.search(r'RU=(.*?)\/', onesearch_url) + results.append({ + 'title': title_tag.text_content(), + 'content': extract_text(content), + 'url': unquote(matches.group(1)), + }) + + return results + diff --git a/searx/settings.yml b/searx/settings.yml index 0f79041a..7c7d4620 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1623,12 +1623,8 @@ engines: - name: onesearch shortcut: onesearch - engine: xpath + engine: onesearch paging: false - search_url: https://www.onesearch.com/yhs/search;?p={query} - url_xpath: //div[contains(@class, "algo")]//h3[contains(@class, "title")]/a/@href - title_xpath: //div[contains(@class, "algo")]//h3[contains(@class, "title")] - content_xpath: //div[contains(@class, "algo")]/div[contains(@class, "compText")]/p//text() categories: general about: website: https://www.onesearch.com/ From 51530bc39409340d9f28f00dade4a2a0a59eda62 Mon Sep 17 00:00:00 2001 From: Israel Yago Pereira Date: Mon, 1 Nov 2021 16:45:45 -0300 Subject: [PATCH 3/7] Fix code style --- searx/engines/onesearch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/searx/engines/onesearch.py b/searx/engines/onesearch.py index 19fabe42..7e2f2913 100644 --- a/searx/engines/onesearch.py +++ b/searx/engines/onesearch.py @@ -28,6 +28,7 @@ categories = ['general'] # search-url URL = 'https://www.onesearch.com/yhs/search;?p=%s' + def request(query, params): params['url'] = URL % query return params @@ -53,4 +54,3 @@ def response(resp): }) return results - From 4b785677d84b050053ad337a702a73ea2270e75b Mon Sep 17 00:00:00 2001 From: Israel Yago Pereira Date: Tue, 2 Nov 2021 13:41:20 -0300 Subject: [PATCH 4/7] Onesearch pagination --- searx/engines/onesearch.py | 6 ++++-- searx/settings.yml | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/searx/engines/onesearch.py b/searx/engines/onesearch.py index 7e2f2913..17a009c2 100644 --- a/searx/engines/onesearch.py +++ b/searx/engines/onesearch.py @@ -24,13 +24,15 @@ about = { # engine dependent config categories = ['general'] +paging = True # search-url -URL = 'https://www.onesearch.com/yhs/search;?p=%s' +URL = 'https://www.onesearch.com/yhs/search;?p=%s&b=%d' def request(query, params): - params['url'] = URL % query + starting_from = (params['pageno'] * 10) - 9 + params['url'] = URL % (query, starting_from) return params diff --git a/searx/settings.yml b/searx/settings.yml index 7c7d4620..164ad9cf 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1624,7 +1624,7 @@ engines: - name: onesearch shortcut: onesearch engine: onesearch - paging: false + paging: true categories: general about: website: https://www.onesearch.com/ From f1f3ad97d9675bb5cb84a7477da3137d69afd2ee Mon Sep 17 00:00:00 2001 From: Israel Yago Pereira Date: Wed, 17 Nov 2021 15:15:17 -0300 Subject: [PATCH 5/7] Remove debug log from onesearch engine --- searx/engines/onesearch.py | 1 - 1 file changed, 1 deletion(-) diff --git a/searx/engines/onesearch.py b/searx/engines/onesearch.py index 17a009c2..dc3cc1af 100644 --- a/searx/engines/onesearch.py +++ b/searx/engines/onesearch.py @@ -47,7 +47,6 @@ def response(resp): onesearch_urls = eval_xpath(doc, '//div[contains(@class, "algo")]//h3[contains(@class, "title")]/a/@href') for title_tag, content, onesearch_url in zip(titles_tags, contents, onesearch_urls): - print(f"{title_tag.text_content()} ---> {onesearch_url}") matches = re.search(r'RU=(.*?)\/', onesearch_url) results.append({ 'title': title_tag.text_content(), From 6b3915a2dc5eb78a95aaf47d9d6e11ac4aed8bf7 Mon Sep 17 00:00:00 2001 From: israelyago Date: Thu, 18 Nov 2021 08:18:50 -0300 Subject: [PATCH 6/7] Removed paging from onesearch config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Noémi Ványi --- searx/settings.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/searx/settings.yml b/searx/settings.yml index 2d8b8d72..4ef3a7d9 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1625,7 +1625,6 @@ engines: - name: onesearch shortcut: onesearch engine: onesearch - paging: true categories: general about: website: https://www.onesearch.com/ From b90616a25fa0d0f51f7f506132abe567bdde6d98 Mon Sep 17 00:00:00 2001 From: israelyago Date: Thu, 18 Nov 2021 08:19:19 -0300 Subject: [PATCH 7/7] Remove categories from onesearch config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Noémi Ványi --- searx/settings.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/searx/settings.yml b/searx/settings.yml index 4ef3a7d9..5d993ec2 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1625,7 +1625,6 @@ engines: - name: onesearch shortcut: onesearch engine: onesearch - categories: general about: website: https://www.onesearch.com/ wikidata_id: None