Merge branch 'master' into update_data_update_languages.py

2024-11-22 01:45:21 +01:00 · 2023-04-04 22:51:07 +02:00 · 2023-04-04 22:51:07 +02:00 · a0ed0006fa
commit a0ed0006fa
parent 1dc270388c 2fc1cd3a15
14 changed files with 12793 additions and 27543 deletions
--- a/PULL_REQUEST_TEMPLATE.md
+++ b/PULL_REQUEST_TEMPLATE.md
@ -16,7 +16,7 @@

 ## Author's checklist

-<!-- additional notes for reviewiers -->
+<!-- additional notes for reviewers -->

 ## Related issues

--- a/README.rst
+++ b/README.rst
@ -85,15 +85,15 @@ of turning it off.
 What is the difference between searx and SearxNG?
 #################################################

-TL;DR: If you want to run a public instance, go with SearxNG. If you want to
-self host your own instance, choose searx.
+TL;DR: SearXNG is for users that want more features and bugs getting fixed quicker.
+If you prefer a minimalist software and stable experience, use searx.

 SearxNG is a fork of searx, created by a former maintainer of searx. The fork
 was created because the majority of the maintainers at the time did not find
 the new proposed features privacy respecting enough. The most significant issue is with
 engine metrics.

-Searx is built for privacy conscious users. It comes a unique set of
+Searx is built for privacy conscious users. It comes with a unique set of
 challenges. One of the problems we face is that users rather not report bugs,
 because they do not want to publicly share what engines they use or what search
 query triggered a problem. It is a challenge we accepted.
--- a/3
+++ b/3
@ -284,9 +284,6 @@ node.env() {
    which npm &> /dev/null || die 1 'node.env - npm is not found!'

    (   set -e
-        # shellcheck disable=SC2030
-        PATH="$(npm bin):$PATH"
-        export PATH

        build_msg INSTALL "npm install $NPM_PACKAGES"
        # shellcheck disable=SC2086
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@ -1,13 +1,13 @@
-mock==4.0.3
+mock==5.0.1
 nose2[coverage_plugin]==0.12.0
 cov-core==1.15.0
-pycodestyle==2.9.1
-pylint==2.15.5
-splinter==0.18.1
+pycodestyle==2.10.0
+pylint==2.15.9
+splinter==0.19.0
 transifex-client==0.14.3; python_version < '3.10'
 transifex-client==0.12.5; python_version == '3.10'
-selenium==4.5.0
-twine==4.0.1
+selenium==4.7.2
+twine==4.0.2
 Pallets-Sphinx-Themes==2.0.2
 docutils==0.18
 Sphinx==5.3.0
@ -16,4 +16,4 @@ sphinx-jinja==2.0.2
 sphinx-tabs==3.4.1
 sphinxcontrib-programoutput==0.17
 sphinx-autobuild==2021.3.14
-linuxdoc==20221025
+linuxdoc==20221127
--- a/requirements.txt
+++ b/requirements.txt
@ -1,13 +1,13 @@
 Brotli==1.0.9
 babel==2.11.0
-certifi==2022.9.24
+certifi==2022.12.7
 flask-babel==2.0.0
 flask==2.2.2
 jinja2==3.1.2
 langdetect==1.0.9
-lxml==4.9.1
+lxml==4.9.2
 pygments==2.12.0
 python-dateutil==2.8.2
 pyyaml==6.0
-requests[socks]==2.28.1
+requests[socks]==2.28.2
 setproctitle==1.3.2
--- a/searx/data/ahmia_blacklist.txt
+++ b/searx/data/ahmia_blacklist.txt
--- a/searx/data/currencies.json
+++ b/searx/data/currencies.json
--- a/searx/data/useragents.json
+++ b/searx/data/useragents.json
@ -1,13 +1,9 @@
 {
    "versions": [
-        "106.0.3",
-        "106.0.2",
-        "106.0.1",
-        "106.0",
-        "105.0.3",
-        "105.0.2",
-        "105.0.1",
-        "105.0"
+        "111.0.1",
+        "111.0",
+        "110.0.1",
+        "110.0"
    ],
    "os": [
        "Windows NT 10.0; WOW64",
--- a/searx/engines/bing.py
+++ b/searx/engines/bing.py
@ -52,8 +52,7 @@ def request(query, params):
        offset=offset)

    params['url'] = base_url + search_path
-    params['headers']['User-Agent'] = ('Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 '
-                                       '(KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36')
+    params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'

    return params

@ -82,11 +81,13 @@ def response(resp):
    for result in eval_xpath(dom, '//li[@class="b_algo"]'):
        link = eval_xpath(result, './/h2/a')[0]
        url = link.attrib.get('href')
+        pretty_url = extract_text(eval_xpath(result, './/cite'))
        title = extract_text(link)
        content = extract_text(eval_xpath(result, './/p'))

        # append result
        results.append({'url': url,
+                        'pretty_url': pretty_url,
                        'title': title,
                        'content': content})

--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@ -18,7 +18,7 @@ from searx.poolrequests import get

 # about
 about = {
-    "website": 'https://lite.duckduckgo.com/lite',
+    "website": 'https://lite.duckduckgo.com/lite/',
    "wikidata_id": 'Q12805',
    "official_api_documentation": 'https://duckduckgo.com/api',
    "use_official_api": False,
@ -45,7 +45,7 @@ language_aliases = {
 time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}

 # search-url
-url = 'https://lite.duckduckgo.com/lite'
+url = 'https://lite.duckduckgo.com/lite/'
 url_ping = 'https://duckduckgo.com/t/sl_l'


@ -73,6 +73,9 @@ def request(query, params):
    # link again and again ..

    params['headers']['Content-Type'] = 'application/x-www-form-urlencoded'
+    params['headers']['Origin'] = 'https://lite.duckduckgo.com'
+    params['headers']['Referer'] = 'https://lite.duckduckgo.com/'
+    params['headers']['User-Agent'] = 'Mozilla/5.0'

    # initial page does not have an offset
    if params['pageno'] == 2:
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@ -112,7 +112,7 @@ filter_mapping = {
 results_xpath = '//div[contains(@class, "MjjYud")]'
 title_xpath = './/h3[1]'
 href_xpath = './/a/@href'
-content_xpath = './/div[@data-content-feature=1]'
+content_xpath = './/div[@data-sncf]'
 results_xpath_mobile_ui = '//div[contains(@class, "g ")]'

 # google *sections* are no usual *results*, we ignore them
--- a/searx/engines/startpage.py
+++ b/searx/engines/startpage.py
@ -91,14 +91,13 @@ def get_sc_code(headers):
        dom = html.fromstring(resp.text)

        try:
-            href = eval_xpath(dom, '//input[@name="sc"]')[0].get('value')
+            sc_code = eval_xpath(dom, '//input[@name="sc"]')[0].get('value')
        except IndexError as exc:
            # suspend startpage API --> https://github.com/searxng/searxng/pull/695
            raise SearxEngineResponseException(
                suspended_time=7 * 24 * 3600, message="PR-695: query new sc time-stamp failed!"
            ) from exc

-        sc_code = href[5:]
        sc_code_ts = time()
        logger.debug("new value is: %s", sc_code)

--- a/searx/settings.yml
+++ b/searx/settings.yml
@ -1291,7 +1291,7 @@ engines:
  - name : wiby
    engine : json_engine
    paging : True
-    search_url : https://wiby.me/json/?q={query}&o={pageno}0
+    search_url : https://wiby.me/json/?q={query}&p={pageno}
    url_query : URL
    title_query : Title
    content_query : Snippet
--- a/searx_extra/update/update_firefox_version.py
+++ b/searx_extra/update/update_firefox_version.py
@ -5,7 +5,7 @@ import requests
 import re
 from os.path import dirname, join
 from urllib.parse import urlparse, urljoin
-from distutils.version import LooseVersion, StrictVersion
+from packaging.version import Version, parse
 from lxml import html
 from searx import searx_dir

@ -39,7 +39,7 @@ def fetch_firefox_versions():
            if path.startswith(RELEASE_PATH):
                version = path[len(RELEASE_PATH):-1]
                if NORMAL_REGEX.match(version):
-                    versions.append(LooseVersion(version))
+                    versions.append(Version(version))

        list.sort(versions, reverse=True)
        return versions
@ -49,12 +49,12 @@ def fetch_firefox_last_versions():
    versions = fetch_firefox_versions()

    result = []
-    major_last = versions[0].version[0]
+    major_last = versions[0].major
    major_list = (major_last, major_last - 1)
    for version in versions:
-        major_current = version.version[0]
+        major_current = version.major
        if major_current in major_list:
-            result.append(version.vstring)
+            result.append(str(version))

    return result