From d793c2733c7aac3aacf40f3f5cf9fc0919305e76 Mon Sep 17 00:00:00 2001 From: asciimoo Date: Tue, 15 Oct 2013 19:11:43 +0200 Subject: [PATCH] [enh] engine types --- examples/basic_engine.py | 2 +- searx/__init__.py | 7 ------- searx/engines/__init__.py | 14 ++++++++------ searx/engines/duckduckgo.py | 19 ++++++++++++------- searx/engines/duckduckgo_definitions.py | 12 ++++++------ searx/static/css/style.css | 4 +++- searx/templates/results.html | 5 ++++- 7 files changed, 34 insertions(+), 29 deletions(-) diff --git a/examples/basic_engine.py b/examples/basic_engine.py index 1ed5a338..36fb6260 100644 --- a/examples/basic_engine.py +++ b/examples/basic_engine.py @@ -17,5 +17,5 @@ def response(resp): '''post-response callback resp: requests response object ''' - return [resp.text] + return [{'url': '', 'title': '', 'content': ''}] diff --git a/searx/__init__.py b/searx/__init__.py index 91d85647..e69de29b 100644 --- a/searx/__init__.py +++ b/searx/__init__.py @@ -1,7 +0,0 @@ - -base_result_template = """ -
-

{title}

-

{content}
{url}

-
-""" diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index ced673bd..86fa50d2 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -6,7 +6,7 @@ import grequests engine_dir = dirname(realpath(__file__)) -engines = [] +engines = {} for filename in listdir(engine_dir): modname = splitext(filename)[0] @@ -16,14 +16,16 @@ for filename in listdir(engine_dir): engine = load_source(modname, filepath) if not hasattr(engine, 'request') or not hasattr(engine, 'response'): continue - engines.append(engine) + engines[modname] = engine def default_request_params(): return {'method': 'GET', 'headers': {}, 'data': {}, 'url': ''} -def make_callback(results, callback): +def make_callback(engine_name, results, callback): def process_callback(response, **kwargs): - results.extend(callback(response)) + for result in callback(response): + result['engine'] = engine_name + results.append(result) return process_callback def search(query, request): @@ -31,11 +33,11 @@ def search(query, request): requests = [] results = [] user_agent = request.headers.get('User-Agent', '') - for engine in engines: + for ename, engine in engines.items(): headers = default_request_params() headers['User-Agent'] = user_agent request_params = engine.request(query, headers) - callback = make_callback(results, engine.response) + callback = make_callback(ename, results, engine.response) if request_params['method'] == 'GET': req = grequests.get(request_params['url'] ,headers=headers diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index ed93829b..74c17a31 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -1,14 +1,19 @@ -from lxml import html +from json import loads def request(query, params): - params['method'] = 'POST' - params['url'] = 'https://duckduckgo.com/html' - params['data']['q'] = query + params['url'] = 'https://duckduckgo.com/d.js?q=%s&l=us-en&p=1&s=0' % query return params def response(resp): - dom = html.fromstring(resp.text) - results = dom.xpath('//div[@class="results_links results_links_deep web-result"]') - return [html.tostring(x) for x in results] + results = [] + search_res = loads(resp.text[resp.text.find('[{'):-2])[:-1] + for r in search_res: + if not r.get('t'): + continue + results.append({'title': r['t'] + ,'content': r['a'] + ,'url': r['u'] + }) + return results diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py index de694e02..531b53cc 100644 --- a/searx/engines/duckduckgo_definitions.py +++ b/searx/engines/duckduckgo_definitions.py @@ -1,5 +1,4 @@ import json -from searx import base_result_template def request(query, params): params['url'] = 'http://api.duckduckgo.com/?q=%s&format=json&pretty=0' % query @@ -10,10 +9,11 @@ def response(resp): search_res = json.loads(resp.text) results = [] if 'Definition' in search_res: - res = {'title' : search_res.get('Heading', '') - ,'content' : search_res.get('Definition', '') - ,'url' : search_res.get('AbstractURL', '') - } - results.append(base_result_template.format(**res)) + if search_res.get('AbstractURL'): + res = {'title' : search_res.get('Heading', '') + ,'content' : search_res.get('Definition', '') + ,'url' : search_res.get('AbstractURL', '') + } + results.append(res) return results diff --git a/searx/static/css/style.css b/searx/static/css/style.css index 0eaa4b59..d9705578 100644 --- a/searx/static/css/style.css +++ b/searx/static/css/style.css @@ -8,7 +8,9 @@ html { h1 { font-size: 5em; } -input { border: 2px solid #8888FF; padding: 8px; background-color: #FFFFFF; font-size: 1.6em; } +input { border: 2px solid #8888FF; padding: 8px; background-color: #FFFFFF; font-size: 1.3em; } + +a { text-decoration: none; } .result_title { margin-bottom: 0; } diff --git a/searx/templates/results.html b/searx/templates/results.html index 10562403..5665bc50 100644 --- a/searx/templates/results.html +++ b/searx/templates/results.html @@ -5,6 +5,9 @@ {% for result in results %} -

{{ result|safe }}

+
+

{{ result.title|safe }}

+

{{ result.engine }}
{% if result.content %}{{ result.content|safe }}
{% endif %}{{ result.url }}

+
{% endfor %} {% endblock %}