diff --git a/.gitignore b/.gitignore index b1286ea6..5c0cfe14 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,5 @@ dist/ local/ gh-pages/ searx.egg-info/ +.env +geckodriver.log diff --git a/docs/dev/engine_overview.rst b/docs/dev/engine_overview.rst index 42c205d9..d7a8c77b 100644 --- a/docs/dev/engine_overview.rst +++ b/docs/dev/engine_overview.rst @@ -171,7 +171,7 @@ headers set HTTP header information data set HTTP data information cookies set HTTP cookies verify bool Performing SSL-Validity check -allow_redirects bool Follow redirects +follow_redirects bool Follow redirects max_redirects int maximum redirects, hard limit soft_max_redirects int maximum redirects, soft limit. Record an error but don't stop the engine raise_for_httperror bool True by default: raise an exception if the HTTP code of response is >= 300 diff --git a/requirements.txt b/requirements.txt index 11ac41a1..3221deac 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,10 +7,10 @@ lxml==4.6.3 pygments==2.8.0 python-dateutil==2.8.2 pyyaml==6.0 -httpx[http2]==0.19.0 +httpx[http2]==0.21.3 Brotli==1.0.9 uvloop==0.16.0; python_version >= '3.7' uvloop==0.14.0; python_version < '3.7' -httpx-socks[asyncio]==0.4.1 +httpx-socks[asyncio]==0.7.2 langdetect==1.0.9 setproctitle==1.2.2 diff --git a/searx/engines/yggtorrent.py b/searx/engines/yggtorrent.py index f5af91f4..c2789a6a 100644 --- a/searx/engines/yggtorrent.py +++ b/searx/engines/yggtorrent.py @@ -39,7 +39,7 @@ cookies = dict() def init(engine_settings=None): global cookies # initial cookies - resp = http_get(url, allow_redirects=False) + resp = http_get(url, follow_redirects=False) if resp.ok: for r in resp.history: cookies.update(r.cookies) diff --git a/searx/network/__init__.py b/searx/network/__init__.py index 2e1895cf..f55e4d84 100644 --- a/searx/network/__init__.py +++ b/searx/network/__init__.py @@ -122,17 +122,17 @@ def request(method, url, **kwargs): def get(url, **kwargs): - kwargs.setdefault('allow_redirects', True) + kwargs.setdefault('follow_redirects', True) return request('get', url, **kwargs) def options(url, **kwargs): - kwargs.setdefault('allow_redirects', True) + kwargs.setdefault('follow_redirects', True) return request('options', url, **kwargs) def head(url, **kwargs): - kwargs.setdefault('allow_redirects', False) + kwargs.setdefault('follow_redirects', False) return request('head', url, **kwargs) diff --git a/searx/network/client.py b/searx/network/client.py index 47b2a981..4bf7fb74 100644 --- a/searx/network/client.py +++ b/searx/network/client.py @@ -30,17 +30,15 @@ logger = logger.getChild('searx.http.client') LOOP = None SSLCONTEXTS = {} TRANSPORT_KWARGS = { - 'backend': 'asyncio', 'trust_env': False, } -async def close_connections_for_url(connection_pool: httpcore.AsyncConnectionPool, url: httpcore._utils.URL): - origin = httpcore._utils.url_to_origin(url) - logger.debug('Drop connections for %r', origin) - connections_to_close = connection_pool._connections_for_origin(origin) +async def close_connections_for_url(connection_pool: httpcore.AsyncConnectionPool, url: httpx._models.URL): + logger.debug('Drop connections for %r', url.host) + connections_to_close = [conn for conn in connection_pool._pool if conn._origin == url.host] for connection in connections_to_close: - await connection_pool._remove_from_pool(connection) + connection_pool._pool.remove(connection) try: await connection.aclose() except httpx.NetworkError as e: @@ -76,12 +74,12 @@ class AsyncProxyTransportFixed(AsyncProxyTransport): Note: AsyncProxyTransport inherit from AsyncConnectionPool """ - async def handle_async_request(self, method, url, headers=None, stream=None, extensions=None): + async def handle_async_request(self, request: httpx.Request): retry = 2 while retry > 0: retry -= 1 try: - return await super().handle_async_request(method, url, headers, stream, extensions) + return await super().handle_async_request(request) except (ProxyConnectionError, ProxyTimeoutError, ProxyError) as e: raise httpx.ProxyError(e) except OSError as e: @@ -89,25 +87,25 @@ class AsyncProxyTransportFixed(AsyncProxyTransport): raise httpx.NetworkError(e) except httpx.RemoteProtocolError as e: # in case of httpx.RemoteProtocolError: Server disconnected - await close_connections_for_url(self, url) + await close_connections_for_url(self, request.url) logger.warning('httpx.RemoteProtocolError: retry', exc_info=e) # retry except (httpx.NetworkError, httpx.ProtocolError) as e: # httpx.WriteError on HTTP/2 connection leaves a new opened stream # then each new request creates a new stream and raise the same WriteError - await close_connections_for_url(self, url) + await close_connections_for_url(self, request.url) raise e class AsyncHTTPTransportFixed(httpx.AsyncHTTPTransport): """Fix httpx.AsyncHTTPTransport""" - async def handle_async_request(self, method, url, headers=None, stream=None, extensions=None): + async def handle_async_request(self, request: httpx.Request): retry = 2 while retry > 0: retry -= 1 try: - return await super().handle_async_request(method, url, headers, stream, extensions) + return await super().handle_async_request(request) except OSError as e: # socket.gaierror when DNS resolution fails raise httpx.ConnectError(e) @@ -115,16 +113,16 @@ class AsyncHTTPTransportFixed(httpx.AsyncHTTPTransport): # httpx.CloseError: [Errno 104] Connection reset by peer # raised by _keepalive_sweep() # from https://github.com/encode/httpcore/blob/4b662b5c42378a61e54d673b4c949420102379f5/httpcore/_backends/asyncio.py#L198 # noqa - await close_connections_for_url(self._pool, url) + await close_connections_for_url(self._pool, request.url) logger.warning('httpx.CloseError: retry', exc_info=e) # retry except httpx.RemoteProtocolError as e: # in case of httpx.RemoteProtocolError: Server disconnected - await close_connections_for_url(self._pool, url) + await close_connections_for_url(self._pool, request.url) logger.warning('httpx.RemoteProtocolError: retry', exc_info=e) # retry except (httpx.ProtocolError, httpx.NetworkError) as e: - await close_connections_for_url(self._pool, url) + await close_connections_for_url(self._pool, request.url) raise e diff --git a/searx/search/checker/impl.py b/searx/search/checker/impl.py index e54b3f68..6cc89704 100644 --- a/searx/search/checker/impl.py +++ b/searx/search/checker/impl.py @@ -76,7 +76,7 @@ def _is_url_image(image_url): a = time() try: network.set_timeout_for_thread(10.0, time()) - r = network.get(image_url, timeout=10.0, allow_redirects=True, headers={ + r = network.get(image_url, timeout=10.0, follow_redirects=True, headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US;q=0.5,en;q=0.3', diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py index 66719ea9..59471d14 100644 --- a/searx/search/processors/online.py +++ b/searx/search/processors/online.py @@ -69,9 +69,10 @@ class OnlineProcessor(EngineProcessor): if max_redirects: request_args['max_redirects'] = max_redirects - # allow_redirects - if 'allow_redirects' in params: - request_args['allow_redirects'] = params['allow_redirects'] + # follow_redirects + if 'follow_redirects' in params: + # httpx has renamed this parameter to 'follow_redirects' + request_args['follow_redirects'] = params['follow_redirects'] # soft_max_redirects soft_max_redirects = params.get('soft_max_redirects', max_redirects or 0) diff --git a/searx/webapp.py b/searx/webapp.py index bf9d6130..20431dcb 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -941,7 +941,7 @@ def image_proxy(): url=url, headers=headers, timeout=settings['outgoing']['request_timeout'], - allow_redirects=True, + follow_redirects=True, max_redirects=20) resp = next(stream)