From 3a8ab5880a7e48470f56ddf18f80ff626c4f31c7 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Fri, 30 Dec 2016 17:16:53 +0100 Subject: [PATCH 1/3] [mod] result_container.extend sets result['engine'] = engine_name for each result --- searx/results.py | 1 + searx/search.py | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/searx/results.py b/searx/results.py index 73a96c08..6062f801 100644 --- a/searx/results.py +++ b/searx/results.py @@ -133,6 +133,7 @@ class ResultContainer(object): def extend(self, engine_name, results): for result in list(results): + result['engine'] = engine_name if 'suggestion' in result: self.suggestions.add(result['suggestion']) results.remove(result) diff --git a/searx/search.py b/searx/search.py index c592c655..18bda334 100644 --- a/searx/search.py +++ b/searx/search.py @@ -108,8 +108,6 @@ def search_one_request_safe(engine_name, query, request_params, result_container search_results = search_one_request(engine, query, request_params, timeout_limit) # add results - for result in search_results: - result['engine'] = engine_name result_container.extend(engine_name, search_results) # update engine time when there is no exception From 28d51fd063242f0a439760d4484059ba4a9dfbe2 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Fri, 30 Dec 2016 17:37:46 +0100 Subject: [PATCH 2/3] [mod] timeout in log are readable (the timeouts are compare to the start_time of the request). --- searx/search.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/searx/search.py b/searx/search.py index 18bda334..021dba03 100644 --- a/searx/search.py +++ b/searx/search.py @@ -37,7 +37,7 @@ logger = logger.getChild('search') number_of_searches = 0 -def send_http_request(engine, request_params, timeout_limit): +def send_http_request(engine, request_params, start_time, timeout_limit): # for page_load_time stats time_before_request = time() @@ -62,7 +62,8 @@ def send_http_request(engine, request_params, timeout_limit): # is there a timeout (no parsing in this case) timeout_overhead = 0.2 # seconds - search_duration = time() - request_params['started'] + time_after_request = time() + search_duration = time_after_request - start_time if search_duration > timeout_limit + timeout_overhead: raise Timeout(response=response) @@ -72,14 +73,14 @@ def send_http_request(engine, request_params, timeout_limit): engine.suspend_end_time = 0 # update stats with current page-load-time # only the HTTP request - engine.stats['page_load_time'] += time() - time_before_request + engine.stats['page_load_time'] += time_after_request - time_before_request engine.stats['page_load_count'] += 1 # everything is ok : return the response return response -def search_one_request(engine, query, request_params, timeout_limit): +def search_one_request(engine, query, request_params, start_time, timeout_limit): # update request parameters dependent on # search-engine (contained in engines folder) engine.request(query, request_params) @@ -92,20 +93,19 @@ def search_one_request(engine, query, request_params, timeout_limit): return [] # send request - response = send_http_request(engine, request_params, timeout_limit) + response = send_http_request(engine, request_params, start_time, timeout_limit) # parse the response response.search_params = request_params return engine.response(response) -def search_one_request_safe(engine_name, query, request_params, result_container, timeout_limit): - start_time = time() +def search_one_request_safe(engine_name, query, request_params, result_container, start_time, timeout_limit): engine = engines[engine_name] try: # send requests and parse the results - search_results = search_one_request(engine, query, request_params, timeout_limit) + search_results = search_one_request(engine, query, request_params, start_time, timeout_limit) # add results result_container.extend(engine_name, search_results) @@ -149,14 +149,13 @@ def search_one_request_safe(engine_name, query, request_params, result_container return False -def search_multiple_requests(requests, result_container, timeout_limit): - start_time = time() +def search_multiple_requests(requests, result_container, start_time, timeout_limit): search_id = uuid4().__str__() for engine_name, query, request_params in requests: th = threading.Thread( target=search_one_request_safe, - args=(engine_name, query, request_params, result_container, timeout_limit), + args=(engine_name, query, request_params, result_container, start_time, timeout_limit), name=search_id, ) th._engine_name = engine_name @@ -366,7 +365,6 @@ class Search(object): request_params = default_request_params() request_params['headers']['User-Agent'] = user_agent request_params['category'] = selected_engine['category'] - request_params['started'] = start_time request_params['pageno'] = search_query.pageno if hasattr(engine, 'language') and engine.language: @@ -386,7 +384,7 @@ class Search(object): if requests: # send all search-request - search_multiple_requests(requests, self.result_container, timeout_limit - (time() - start_time)) + search_multiple_requests(requests, self.result_container, start_time, timeout_limit) start_new_thread(gc.collect, tuple()) # return results, suggestions, answers and infoboxes From 5a9551925d4e3fea0ea5ec6d50733803a22b6445 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Fri, 30 Dec 2016 18:08:48 +0100 Subject: [PATCH 3/3] [fix] fix bugs introduced by commit 0a2fde19d031acef80ce5991bc6b41528dbfbf5b from dalf/searchpy4 --- searx/search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/searx/search.py b/searx/search.py index 021dba03..e0f0cfd6 100644 --- a/searx/search.py +++ b/searx/search.py @@ -65,7 +65,7 @@ def send_http_request(engine, request_params, start_time, timeout_limit): time_after_request = time() search_duration = time_after_request - start_time if search_duration > timeout_limit + timeout_overhead: - raise Timeout(response=response) + raise requests.exceptions.Timeout(response=response) with threading.RLock(): # no error : reset the suspend variables @@ -129,7 +129,7 @@ def search_one_request_safe(engine_name, query, request_params, result_container "(search duration : {1} s, timeout: {2} s) : {3}" .format(engine_name, search_duration, timeout_limit, e.__class__.__name__)) requests_exception = True - if (issubclass(e.__class__, requests.exceptions.RequestException)): + elif (issubclass(e.__class__, requests.exceptions.RequestException)): # other requests exception logger.exception("engine {0} : requests exception" "(search duration : {1} s, timeout: {2} s) : {3}"