diff --git a/searx/engines/google.py b/searx/engines/google.py index 7fc022ea..9a4124b2 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -235,6 +235,7 @@ def request(query, params): params['url'] = query_url logger.debug("HTTP header Accept-Language --> %s", lang_info.get('Accept-Language')) + params['cookies']['CONSENT'] = "YES+" params['headers'].update(lang_info['headers']) if use_mobile_ui: params['headers']['Accept'] = '*/*' diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index 8c204b29..83c27b71 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -121,6 +121,7 @@ def request(query, params): params['url'] = query_url logger.debug("HTTP header Accept-Language --> %s", lang_info.get('Accept-Language')) + params['cookies']['CONSENT'] = "YES+" params['headers'].update(lang_info['headers']) params['headers']['Accept'] = ( 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py index eb074ebc..3d2022df 100644 --- a/searx/engines/google_news.py +++ b/searx/engines/google_news.py @@ -111,6 +111,8 @@ def request(query, params): params['url'] = query_url logger.debug("HTTP header Accept-Language --> %s", lang_info.get('Accept-Language')) + + params['cookies']['CONSENT'] = "YES+" params['headers'].update(lang_info['headers']) params['headers']['Accept'] = ( 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' diff --git a/searx/engines/google_play_apps.py b/searx/engines/google_play_apps.py new file mode 100644 index 00000000..226e48da --- /dev/null +++ b/searx/engines/google_play_apps.py @@ -0,0 +1,69 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +""" + Google Play Apps +""" + +from urllib.parse import urlencode +from lxml import html +from searx.utils import ( + eval_xpath, + extract_url, + extract_text, + eval_xpath_list, + eval_xpath_getindex, +) + +about = { + "website": "https://play.google.com/", + "wikidata_id": "Q79576", + "use_official_api": False, + "require_api_key": False, + "results": "HTML", +} + +categories = ["files", "apps"] +search_url = "https://play.google.com/store/search?{query}&c=apps" + + +def request(query, params): + params["url"] = search_url.format(query=urlencode({"q": query})) + params['cookies']['CONSENT'] = "YES+" + + return params + + +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + + if eval_xpath(dom, '//div[@class="v6DsQb"]'): + return [] + + spot = eval_xpath_getindex(dom, '//div[@class="ipRz4"]', 0, None) + if spot is not None: + url = extract_url(eval_xpath(spot, './a[@class="Qfxief"]/@href'), search_url) + title = extract_text(eval_xpath(spot, './/div[@class="vWM94c"]')) + content = extract_text(eval_xpath(spot, './/div[@class="LbQbAe"]')) + img = extract_text(eval_xpath(spot, './/img[@class="T75of bzqKMd"]/@src')) + + results.append({"url": url, "title": title, "content": content, "img_src": img}) + + more = eval_xpath_list(dom, '//c-wiz[@jsrenderer="RBsfwb"]//div[@role="listitem"]', min_len=1) + for result in more: + url = extract_url(eval_xpath(result, ".//a/@href"), search_url) + title = extract_text(eval_xpath(result, './/span[@class="DdYX5"]')) + content = extract_text(eval_xpath(result, './/span[@class="wMUdtb"]')) + img = extract_text( + eval_xpath( + result, + './/img[@class="T75of stzEZd" or @class="T75of etjhNc Q8CSx "]/@src', + ) + ) + + results.append({"url": url, "title": title, "content": content, "img_src": img}) + + for suggestion in eval_xpath_list(dom, '//c-wiz[@jsrenderer="qyd4Kb"]//div[@class="ULeU3b neq64b"]'): + results.append({"suggestion": extract_text(eval_xpath(suggestion, './/div[@class="Epkrse "]'))}) + + return results diff --git a/searx/engines/google_scholar.py b/searx/engines/google_scholar.py index 960219aa..ec764465 100644 --- a/searx/engines/google_scholar.py +++ b/searx/engines/google_scholar.py @@ -85,13 +85,13 @@ def request(query, params): # subdomain is: scholar.google.xy lang_info['subdomain'] = lang_info['subdomain'].replace("www.", "scholar.") - query_url = 'https://'+ lang_info['subdomain'] + '/scholar' + "?" + urlencode({ - 'q': query, - **lang_info['params'], - 'ie': "utf8", - 'oe': "utf8", - 'start' : offset, - }) + query_url = ( + 'https://' + + lang_info['subdomain'] + + '/scholar' + + "?" + + urlencode({'q': query, **lang_info['params'], 'ie': "utf8", 'oe': "utf8", 'start': offset}) + ) query_url += time_range_url(params) @@ -99,6 +99,7 @@ def request(query, params): params['url'] = query_url logger.debug("HTTP header Accept-Language --> %s", lang_info.get('Accept-Language')) + params['cookies']['CONSENT'] = "YES+" params['headers'].update(lang_info['headers']) params['headers']['Accept'] = ( 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py index 40c7f2b9..26498fbe 100644 --- a/searx/engines/google_videos.py +++ b/searx/engines/google_videos.py @@ -138,6 +138,7 @@ def request(query, params): params['url'] = query_url logger.debug("HTTP header Accept-Language --> %s", lang_info.get('Accept-Language')) + params['cookies']['CONSENT'] = "YES+" params['headers'].update(lang_info['headers']) params['headers']['Accept'] = ( 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' diff --git a/searx/engines/youtube_noapi.py b/searx/engines/youtube_noapi.py index 68b75bc7..fda03b9c 100644 --- a/searx/engines/youtube_noapi.py +++ b/searx/engines/youtube_noapi.py @@ -44,6 +44,7 @@ base_youtube_url = 'https://www.youtube.com/watch?v=' # do search-request def request(query, params): + params['cookies']['CONSENT'] = "YES+" if not params['engine_data'].get('next_page_token'): params['url'] = search_url.format(query=quote_plus(query), page=params['pageno']) if params['time_range'] in time_range_dict: