Pick pass cookies from searxng (#3252)

* [enh] Allow passing headers/cookies from settings.yml

Example:

   - engine: xpath
   - search_url: example.org
   - headers: {'example_header': 'example_header'}
   - cookies: {'safesearch': 'off'}

* [fix[ Update only cookies/headers

* [enh] XPath engine - add time range support

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

* [enh] XPath engine - add time safe-search support

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

Co-authored-by: Allen <64094914+allendema@users.noreply.github.com>
Co-authored-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Noémi Ványi 2022-06-06 00:18:33 +02:00 committed by GitHub
parent f00d9e0ec4
commit 2719fd2526
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 90 additions and 6 deletions

View File

@ -5,6 +5,7 @@ from urllib.parse import urlencode
from searx.utils import extract_text, extract_url, eval_xpath, eval_xpath_list
search_url = None
lang_all = 'en'
url_xpath = None
content_xpath = None
title_xpath = None
@ -16,15 +17,71 @@ cached_xpath = ''
cached_url = ''
soft_max_redirects = 0
# parameters for engines with paging support
#
# number of results on each page
# (only needed if the site requires not a page number, but an offset)
cookies = {}
headers = {}
'''Some engines might offer different result based on cookies or headers.
Possible use-case: To set safesearch cookie or header to moderate.'''
paging = False
'''Engine supports paging [True or False].'''
page_size = 1
# number of the first page (usually 0 or 1)
first_page_num = 1
time_range_support = False
'''Engine supports search time range.'''
time_range_url = '&hours={time_range_val}'
'''Time range URL parameter in the in :py:obj:`search_url`. If no time range is
requested by the user, the URL paramter is an empty string. The
``{time_range_val}`` replacement is taken from the :py:obj:`time_range_map`.
.. code:: yaml
time_range_url : '&days={time_range_val}'
'''
time_range_map = {
'day': 24,
'week': 24 * 7,
'month': 24 * 30,
'year': 24 * 365,
}
'''Maps time range value from user to ``{time_range_val}`` in
:py:obj:`time_range_url`.
.. code:: yaml
time_range_map:
day: 1
week: 7
month: 30
year: 365
'''
safe_search_support = False
'''Engine supports safe-search.'''
safe_search_map = {
0: '&filter=none',
1: '&filter=moderate',
2: '&filter=strict'
}
'''Maps safe-search value to ``{safe_search}`` in :py:obj:`search_url`.
.. code:: yaml
safesearch: true
safes_search_map:
0: '&filter=none'
1: '&filter=moderate'
2: '&filter=strict'
'''
def request(query, params):
query = urlencode({'q': query})[2:]
@ -32,8 +89,35 @@ def request(query, params):
if paging and search_url.find('{pageno}') >= 0:
fp['pageno'] = (params['pageno'] - 1) * page_size + first_page_num
params['url'] = search_url.format(**fp)
params['query'] = query
safe_search = ''
if params['safesearch']:
safe_search = safe_search_map[params['safesearch']]
lang = lang_all
if params['language'] != 'all':
lang = params['language'][:2]
time_range = ''
if params.get('time_range'):
time_range_val = time_range_map.get(params.get('time_range'))
time_range = time_range_url.format(time_range_val=time_range_val)
safe_search = ''
if params['safesearch']:
safe_search = safe_search_map[params['safesearch']]
fargs = {
'query': urlencode({'q': query})[2:],
'lang': lang,
'pageno': (params['pageno'] - 1) * page_size + first_page_num,
'time_range': time_range,
'safe_search': safe_search,
}
params['cookies'].update(cookies)
params['headers'].update(headers)
params['url'] = search_url.format(**fargs)
params['soft_max_redirects'] = soft_max_redirects
return params