mirror of https://github.com/searx/searx
Pick image_proxy changes from searxng (#2965)
* [mod] /image_proxy: don't decompress images * [fix] image_proxy: always close the httpx respone previously, when the content type was not an image and some other error, the httpx response was not closed * [mod] /image_proxy: use HTTP/1 instead of HTTP/2 httpx: HTTP/2 is slow when a lot data is downloaded. https://github.com/dalf/pyhttp-benchmark also, the usage of HTTP/1 decreases the load average * [mod] searx.utils.dict_subset: rewrite with comprehension Co-authored-by: Alexandre Flament <alex@al-f.net>
This commit is contained in:
parent
ad7e00ad03
commit
ea38fea711
|
@ -5,6 +5,7 @@ import threading
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
from time import time
|
from time import time
|
||||||
from queue import SimpleQueue
|
from queue import SimpleQueue
|
||||||
|
from types import MethodType
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
import h2.exceptions
|
import h2.exceptions
|
||||||
|
@ -134,15 +135,27 @@ async def stream_chunk_to_queue(network, q, method, url, **kwargs):
|
||||||
try:
|
try:
|
||||||
async with await network.stream(method, url, **kwargs) as response:
|
async with await network.stream(method, url, **kwargs) as response:
|
||||||
q.put(response)
|
q.put(response)
|
||||||
|
# aiter_raw: access the raw bytes on the response without applying any HTTP content decoding
|
||||||
|
# https://www.python-httpx.org/quickstart/#streaming-responses
|
||||||
async for chunk in response.aiter_bytes(65536):
|
async for chunk in response.aiter_bytes(65536):
|
||||||
if len(chunk) > 0:
|
if len(chunk) > 0:
|
||||||
q.put(chunk)
|
q.put(chunk)
|
||||||
|
except httpx.ResponseClosed as e:
|
||||||
|
# the response was closed
|
||||||
|
pass
|
||||||
except (httpx.HTTPError, OSError, h2.exceptions.ProtocolError) as e:
|
except (httpx.HTTPError, OSError, h2.exceptions.ProtocolError) as e:
|
||||||
q.put(e)
|
q.put(e)
|
||||||
finally:
|
finally:
|
||||||
q.put(None)
|
q.put(None)
|
||||||
|
|
||||||
|
|
||||||
|
def _close_response_method(self):
|
||||||
|
asyncio.run_coroutine_threadsafe(
|
||||||
|
self.aclose(),
|
||||||
|
get_loop()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def stream(method, url, **kwargs):
|
def stream(method, url, **kwargs):
|
||||||
"""Replace httpx.stream.
|
"""Replace httpx.stream.
|
||||||
|
|
||||||
|
@ -158,10 +171,18 @@ def stream(method, url, **kwargs):
|
||||||
q = SimpleQueue()
|
q = SimpleQueue()
|
||||||
future = asyncio.run_coroutine_threadsafe(stream_chunk_to_queue(get_network(), q, method, url, **kwargs),
|
future = asyncio.run_coroutine_threadsafe(stream_chunk_to_queue(get_network(), q, method, url, **kwargs),
|
||||||
get_loop())
|
get_loop())
|
||||||
|
# yield response
|
||||||
|
response = q.get()
|
||||||
|
if isinstance(response, Exception):
|
||||||
|
raise response
|
||||||
|
response.close = MethodType(_close_response_method, response)
|
||||||
|
yield response
|
||||||
|
|
||||||
|
# yield chunks
|
||||||
chunk_or_exception = q.get()
|
chunk_or_exception = q.get()
|
||||||
while chunk_or_exception is not None:
|
while chunk_or_exception is not None:
|
||||||
if isinstance(chunk_or_exception, Exception):
|
if isinstance(chunk_or_exception, Exception):
|
||||||
raise chunk_or_exception
|
raise chunk_or_exception
|
||||||
yield chunk_or_exception
|
yield chunk_or_exception
|
||||||
chunk_or_exception = q.get()
|
chunk_or_exception = q.get()
|
||||||
return future.result()
|
future.result()
|
||||||
|
|
|
@ -326,6 +326,14 @@ def initialize(settings_engines=None, settings_outgoing=None):
|
||||||
if isinstance(network, str):
|
if isinstance(network, str):
|
||||||
NETWORKS[engine_name] = NETWORKS[network]
|
NETWORKS[engine_name] = NETWORKS[network]
|
||||||
|
|
||||||
|
# the /image_proxy endpoint has a dedicated network.
|
||||||
|
# same parameters than the default network, but HTTP/2 is disabled.
|
||||||
|
# It decreases the CPU load average, and the total time is more or less the same
|
||||||
|
if 'image_proxy' not in NETWORKS:
|
||||||
|
image_proxy_params = default_params.copy()
|
||||||
|
image_proxy_params['enable_http2'] = False
|
||||||
|
NETWORKS['image_proxy'] = new_network(image_proxy_params)
|
||||||
|
|
||||||
|
|
||||||
@atexit.register
|
@atexit.register
|
||||||
def done():
|
def done():
|
||||||
|
|
|
@ -272,11 +272,7 @@ def dict_subset(d, properties):
|
||||||
>>> >> dict_subset({'A': 'a', 'B': 'b', 'C': 'c'}, ['A', 'D'])
|
>>> >> dict_subset({'A': 'a', 'B': 'b', 'C': 'c'}, ['A', 'D'])
|
||||||
{'A': 'a'}
|
{'A': 'a'}
|
||||||
"""
|
"""
|
||||||
result = {}
|
return {k: d[k] for k in properties if k in d}
|
||||||
for k in properties:
|
|
||||||
if k in d:
|
|
||||||
result[k] = d[k]
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def get_torrent_size(filesize, filesize_multiplier):
|
def get_torrent_size(filesize, filesize_multiplier):
|
||||||
|
|
|
@ -94,7 +94,7 @@ from searx.plugins import plugins
|
||||||
from searx.plugins.oa_doi_rewrite import get_doi_resolver
|
from searx.plugins.oa_doi_rewrite import get_doi_resolver
|
||||||
from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES
|
from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES
|
||||||
from searx.answerers import answerers
|
from searx.answerers import answerers
|
||||||
from searx.network import stream as http_stream
|
from searx.network import stream as http_stream, set_context_network_name
|
||||||
from searx.answerers import ask
|
from searx.answerers import ask
|
||||||
from searx.metrology.error_recorder import errors_per_engines
|
from searx.metrology.error_recorder import errors_per_engines
|
||||||
from searx.settings_loader import get_default_settings_path
|
from searx.settings_loader import get_default_settings_path
|
||||||
|
@ -921,6 +921,8 @@ def _is_selected_language_supported(engine, preferences):
|
||||||
|
|
||||||
@app.route('/image_proxy', methods=['GET'])
|
@app.route('/image_proxy', methods=['GET'])
|
||||||
def image_proxy():
|
def image_proxy():
|
||||||
|
# pylint: disable=too-many-return-statements, too-many-branches
|
||||||
|
|
||||||
url = request.args.get('url')
|
url = request.args.get('url')
|
||||||
|
|
||||||
if not url:
|
if not url:
|
||||||
|
@ -932,14 +934,21 @@ def image_proxy():
|
||||||
return '', 400
|
return '', 400
|
||||||
|
|
||||||
maximum_size = 5 * 1024 * 1024
|
maximum_size = 5 * 1024 * 1024
|
||||||
|
forward_resp = False
|
||||||
|
resp = None
|
||||||
try:
|
try:
|
||||||
headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'})
|
request_headers = {
|
||||||
headers['User-Agent'] = gen_useragent()
|
'User-Agent': gen_useragent(),
|
||||||
|
'Accept': 'image/webp,*/*',
|
||||||
|
'Accept-Encoding': 'gzip, deflate',
|
||||||
|
'Sec-GPC': '1',
|
||||||
|
'DNT': '1',
|
||||||
|
}
|
||||||
|
set_context_network_name('image_proxy')
|
||||||
stream = http_stream(
|
stream = http_stream(
|
||||||
method='GET',
|
method='GET',
|
||||||
url=url,
|
url=url,
|
||||||
headers=headers,
|
headers=request_headers,
|
||||||
timeout=settings['outgoing']['request_timeout'],
|
timeout=settings['outgoing']['request_timeout'],
|
||||||
follow_redirects=True,
|
follow_redirects=True,
|
||||||
max_redirects=20)
|
max_redirects=20)
|
||||||
|
@ -949,25 +958,37 @@ def image_proxy():
|
||||||
if content_length and content_length.isdigit() and int(content_length) > maximum_size:
|
if content_length and content_length.isdigit() and int(content_length) > maximum_size:
|
||||||
return 'Max size', 400
|
return 'Max size', 400
|
||||||
|
|
||||||
if resp.status_code == 304:
|
|
||||||
return '', resp.status_code
|
|
||||||
|
|
||||||
if resp.status_code != 200:
|
if resp.status_code != 200:
|
||||||
logger.debug('image-proxy: wrong response code: {0}'.format(resp.status_code))
|
logger.debug('image-proxy: wrong response code: {0}'.format(resp.status_code))
|
||||||
if resp.status_code >= 400:
|
if resp.status_code >= 400:
|
||||||
return '', resp.status_code
|
return '', resp.status_code
|
||||||
return '', 400
|
return '', 400
|
||||||
|
|
||||||
if not resp.headers.get('content-type', '').startswith('image/'):
|
if not resp.headers.get('Content-Type', '').startswith('image/'):
|
||||||
logger.debug('image-proxy: wrong content-type: {0}'.format(resp.headers.get('content-type')))
|
logger.debug('image-proxy: wrong content-type: %s', resp.headers.get('Content-Type', ''))
|
||||||
return '', 400
|
return '', 400
|
||||||
|
|
||||||
headers = dict_subset(resp.headers, {'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'})
|
forward_resp = True
|
||||||
|
except httpx.HTTPError:
|
||||||
|
logger.exception('HTTP error')
|
||||||
|
return '', 400
|
||||||
|
finally:
|
||||||
|
if resp and not forward_resp:
|
||||||
|
# the code is about to return an HTTP 400 error to the browser
|
||||||
|
# we make sure to close the response between searxng and the HTTP server
|
||||||
|
try:
|
||||||
|
resp.close()
|
||||||
|
except httpx.HTTPError:
|
||||||
|
logger.exception('HTTP error on closing')
|
||||||
|
|
||||||
total_length = 0
|
try:
|
||||||
|
headers = dict_subset(
|
||||||
|
resp.headers,
|
||||||
|
{'Content-Type', 'Content-Encoding', 'Content-Length', 'Length'}
|
||||||
|
)
|
||||||
|
|
||||||
def forward_chunk():
|
def forward_chunk():
|
||||||
nonlocal total_length
|
total_length = 0
|
||||||
for chunk in stream:
|
for chunk in stream:
|
||||||
total_length += len(chunk)
|
total_length += len(chunk)
|
||||||
if total_length > maximum_size:
|
if total_length > maximum_size:
|
||||||
|
@ -1148,6 +1169,13 @@ def run():
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def patch_application(app):
|
||||||
|
# serve pages with HTTP/1.1
|
||||||
|
WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server']['http_protocol_version'])
|
||||||
|
# patch app to handle non root url-s behind proxy & wsgi
|
||||||
|
app.wsgi_app = ReverseProxyPathFix(ProxyFix(app.wsgi_app))
|
||||||
|
|
||||||
|
|
||||||
class ReverseProxyPathFix:
|
class ReverseProxyPathFix:
|
||||||
'''Wrap the application in this middleware and configure the
|
'''Wrap the application in this middleware and configure the
|
||||||
front-end server to add these headers, to let you quietly bind
|
front-end server to add these headers, to let you quietly bind
|
||||||
|
|
Loading…
Reference in New Issue