[enh] add ability to send engine data to subsequent requests

This commit is contained in:
Adam Tauber 2021-03-02 14:24:55 +01:00
parent 87f4cc4a9e
commit 44f4a9d49a
9 changed files with 50 additions and 5 deletions

View File

@ -327,6 +327,7 @@ def _set_https_support_for_engine(engine):
'is_test': True,
'category': 'files',
'raise_for_status': True,
'engine_data': {},
})
if 'url' not in params:

View File

@ -1,4 +1,5 @@
import re
from collections import defaultdict
from operator import itemgetter
from threading import RLock
from urllib.parse import urlparse, unquote
@ -144,7 +145,7 @@ class ResultContainer:
"""docstring for ResultContainer"""
__slots__ = '_merged_results', 'infoboxes', 'suggestions', 'answers', 'corrections', '_number_of_results',\
'_ordered', 'paging', 'unresponsive_engines', 'timings', 'redirect_url'
'_ordered', 'paging', 'unresponsive_engines', 'timings', 'redirect_url', 'engine_data'
def __init__(self):
super().__init__()
@ -154,6 +155,7 @@ class ResultContainer:
self.answers = {}
self.corrections = set()
self._number_of_results = []
self.engine_data = defaultdict(dict)
self._ordered = False
self.paging = False
self.unresponsive_engines = set()
@ -175,6 +177,8 @@ class ResultContainer:
self._merge_infobox(result)
elif 'number_of_results' in result:
self._number_of_results.append(result['number_of_results'])
elif 'engine_data' in result:
self.engine_data[engine_name][result['key']] = result['engine_data']
else:
# standard result (url, title, content)
if 'url' in result and not isinstance(result['url'], str):

View File

@ -111,6 +111,8 @@ class Search:
if request_params is None:
continue
request_params['engine_data'] = self.search_query.engine_data.get(engineref.name, {})
with threading.RLock():
processor.engine.stats['sent_search_count'] += 1

View File

@ -25,7 +25,7 @@ class SearchQuery:
"""container for all the search parameters (query, language, etc...)"""
__slots__ = 'query', 'engineref_list', 'lang', 'safesearch', 'pageno', 'time_range',\
'timeout_limit', 'external_bang'
'timeout_limit', 'external_bang', 'engine_data'
def __init__(self,
query: str,
@ -35,7 +35,8 @@ class SearchQuery:
pageno: int=1,
time_range: typing.Optional[str]=None,
timeout_limit: typing.Optional[float]=None,
external_bang: typing.Optional[str]=None):
external_bang: typing.Optional[str]=None,
engine_data: typing.Optional[dict]=None):
self.query = query
self.engineref_list = engineref_list
self.lang = lang
@ -44,6 +45,9 @@ class SearchQuery:
self.time_range = time_range
self.timeout_limit = timeout_limit
self.external_bang = external_bang
self.engine_data = engine_data
if engine_data is None:
self.engine_data = {}
@property
def categories(self):

View File

@ -7,6 +7,13 @@
<input type="hidden" name="language" value="{{ current_language }}" />{{- "" -}}
{% if timeout_limit %}<input type="hidden" name="timeout_limit" value="{{ timeout_limit|e }}" />{% endif -%}
{%- endmacro %}
{% macro engine_data_form(engine_data) -%}
{% for engine_name, kv_data in engine_data.items() %}
{% for k, v in kv_data.items() %}
<input type="hidden" name="engine_data-{{ engine_name }}-{{ k|e }}" value="{{ v|e }}" />
{% endfor %}
{% endfor %}
{%- endmacro %}
{%- macro search_url() %}{{ url_for('search', _external=True) }}?q={{ q|urlencode }}{% if selected_categories %}&amp;categories={{ selected_categories|join(",") | replace(' ','+') }}{% endif %}{% if pageno > 1 %}&amp;pageno={{ pageno }}{% endif %}{% if time_range %}&amp;time_range={{ time_range }}{% endif %}{% if current_language != 'all' %}&amp;language={{ current_language }}{% endif %}{% endmacro -%}
{% block title %}{{ q|e }} - {% endblock %}
@ -142,12 +149,14 @@
<div class="pull-left">{{- "" -}}
<form method="{{ method or 'POST' }}" action="{{ url_for('search') }}" class="pull-left">
{{- search_form_attrs(pageno+1) -}}
{{- engine_data_form(engine_data) -}}
<button type="submit" class="btn btn-default"><span class="glyphicon glyphicon-backward"></span> {{ _('next page') }}</button>{{- "" -}}
</form>{{- "" -}}
</div>
<div class="pull-right">{{- "" -}}
<form method="{{ method or 'POST' }}" action="{{ url_for('search') }}" class="pull-left">
{{- search_form_attrs(pageno-1) -}}
{{- engine_data_form(engine_data) -}}
<button type="submit" class="btn btn-default" {% if pageno == 1 %}disabled{% endif %}><span class="glyphicon glyphicon-forward"></span> {{ _('previous page') }}</button>{{- "" -}}
</form>{{- "" -}}
</div>
@ -158,12 +167,14 @@
<div class="pull-left">{{- "" -}}
<form method="{{ method or 'POST' }}" action="{{ url_for('search') }}" class="pull-left">
{{- search_form_attrs(pageno-1) -}}
{{- engine_data_form(engine_data) -}}
<button type="submit" class="btn btn-default" {% if pageno == 1 %}disabled{% endif %}><span class="glyphicon glyphicon-backward"></span> {{ _('previous page') }}</button>{{- "" -}}
</form>{{- "" -}}
</div>
<div class="pull-right">{{- "" -}}
<form method="{{ method or 'POST' }}" action="{{ url_for('search') }}" class="pull-left">
{{- search_form_attrs(pageno+1) -}}
{{- engine_data_form(engine_data) -}}
<button type="submit" class="btn btn-default"><span class="glyphicon glyphicon-forward"></span> {{ _('next page') }}</button>{{- "" -}}
</form>{{- "" -}}
</div>

View File

@ -1,5 +1,12 @@
{% extends "simple/base.html" %}
{% from 'simple/macros.html' import icon, icon_small %}
{% macro engine_data_form(engine_data) -%}
{% for engine_name, kv_data in engine_data.items() %}
{% for k, v in kv_data.items() %}
<input type="hidden" name="engine_data-{{ engine_name }}-{{ k|e }}" value="{{ v|e }}" />
{% endfor %}
{% endfor %}
{%- endmacro %}
{% block title %}{% if method == 'GET' %}{{- q|e -}} -{% endif %}{% endblock %}
{% block meta %}<link rel="alternate" type="application/rss+xml" title="Searx search: {{ q|e }}" href="{{ url_for('search', _external=True) }}?q={{ q|urlencode }}&amp;categories={{ selected_categories|join(",") | replace(' ','+') }}&amp;pageno={{ pageno }}&amp;time_range={{ time_range }}&amp;language={{ current_language }}&amp;safesearch={{ safesearch }}&amp;format=rss">{% endblock %}
{% block content %}
@ -136,6 +143,7 @@
<form method="{{ method or 'POST' }}" action="{{ url_for('search') }}">
<div class="{% if rtl %}right{% else %}left{% endif %}">
<input type="hidden" name="q" value="{{ q|e }}" >
{{- engine_data_form(engine_data) -}}
{% for category in selected_categories %}
<input type="hidden" name="category_{{ category }}" value="1" >
{% endfor %}
@ -152,6 +160,7 @@
<form method="{{ method or 'POST' }}" action="{{ url_for('search') }}">
<div class="{% if rtl %}left{% else %}right{% endif %}">
<input type="hidden" name="q" value="{{ q|e }}" >
{{- engine_data_form(engine_data) -}}
{% for category in selected_categories %}
<input type="hidden" name="category_{{ category }}" value="1" >
{% endfor %}

View File

@ -1,3 +1,4 @@
from collections import defaultdict
from typing import Dict, List, Optional, Tuple
from searx.exceptions import SearxParameterException
from searx.webutils import VALID_LANGUAGE_CODE
@ -196,6 +197,15 @@ def parse_generic(preferences: Preferences, form: Dict[str, str], disabled_engin
return query_engineref_list
def parse_engine_data(form):
engine_data = defaultdict(dict)
for k, v in form.items():
if k.startswith("engine_data"):
_, engine, key = k.split('-')
engine_data[engine][key] = v
return engine_data
def get_search_query_from_webapp(preferences: Preferences, form: Dict[str, str])\
-> Tuple[SearchQuery, RawTextQuery, List[EngineRef], List[EngineRef]]:
# no text for the query ?
@ -217,6 +227,7 @@ def get_search_query_from_webapp(preferences: Preferences, form: Dict[str, str])
query_time_range = parse_time_range(form)
query_timeout = parse_timeout(form, raw_text_query)
external_bang = raw_text_query.external_bang
engine_data = parse_engine_data(form)
if not is_locked('categories') and raw_text_query.enginerefs and raw_text_query.specific:
# if engines are calculated from query,
@ -232,7 +243,8 @@ def get_search_query_from_webapp(preferences: Preferences, form: Dict[str, str])
validate_engineref_list(query_engineref_list, preferences)
return (SearchQuery(query, query_engineref_list, query_lang, query_safesearch, query_pageno,
query_time_range, query_timeout, external_bang=external_bang),
query_time_range, query_timeout, external_bang=external_bang,
engine_data=engine_data),
raw_text_query,
query_engineref_list_unknown,
query_engineref_list_notoken)

View File

@ -730,6 +730,7 @@ def search():
answers=result_container.answers,
corrections=correction_urls,
infoboxes=result_container.infoboxes,
engine_data=result_container.engine_data,
paging=result_container.paging,
unresponsive_engines=__get_translated_errors(result_container.unresponsive_engines),
current_language=match_language(search_query.lang,

View File

@ -57,7 +57,8 @@ class ViewsTestCase(SearxTestCase):
results_number=lambda: 3,
results_length=lambda: len(test_results),
get_timings=lambda: timings,
redirect_url=None)
redirect_url=None,
engine_data={})
self.setattr4test(Search, 'search', search_mock)