From 6513a5606431b5ab4fbe6eefb70510ba47ab5e04 Mon Sep 17 00:00:00 2001 From: spongebob33 Date: Fri, 26 Mar 2021 12:22:49 +0100 Subject: [PATCH] add core.ac.uk engine --- manage | 1 + searx/engines/core.py | 82 +++++++++++++++++++++++++++++++++++++++++++ searx/settings.yml | 7 ++++ 3 files changed, 90 insertions(+) create mode 100644 searx/engines/core.py diff --git a/manage b/manage index 7c89d8a7..fe02bf01 100755 --- a/manage +++ b/manage @@ -35,6 +35,7 @@ PYLINT_FILES=( searx/engines/google_scholar.py searx/engines/yahoo_news.py searx/engines/apkmirror.py + searx/engines/core.py searx_extra/update/update_external_bangs.py ) diff --git a/searx/engines/core.py b/searx/engines/core.py new file mode 100644 index 00000000..ee2443b5 --- /dev/null +++ b/searx/engines/core.py @@ -0,0 +1,82 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""CORE (science) + +""" +# pylint: disable=missing-function-docstring + +from json import loads +from datetime import datetime +from urllib.parse import urlencode + +from searx import logger +from searx.exceptions import SearxEngineAPIException + +logger = logger.getChild('CORE engine') + +about = { + "website": 'https://core.ac.uk', + "wikidata_id": None, + "official_api_documentation": 'https://core.ac.uk/documentation/api/', + "use_official_api": True, + "require_api_key": True, + "results": 'JSON', +} + +categories = ['science'] +paging = True +nb_per_page = 10 + +api_key = 'unset' + +logger = logger.getChild('CORE engine') + +base_url = 'https://core.ac.uk:443/api-v2/search/' +search_string = '{query}?page={page}&pageSize={nb_per_page}&apiKey={apikey}' + +def request(query, params): + + if api_key == 'unset': + raise SearxEngineAPIException('missing CORE API key') + + search_path = search_string.format( + query = urlencode({'q': query}), + nb_per_page = nb_per_page, + page = params['pageno'], + apikey = api_key, + ) + params['url'] = base_url + search_path + + logger.debug("query_url --> %s", params['url']) + return params + +def response(resp): + results = [] + json_data = loads(resp.text) + + for result in json_data['data']: + + source = result['_source'] + time = source['publishedDate'] or source['depositedDate'] + if time : + date = datetime.fromtimestamp(time / 1000) + else: + date = None + + metadata = [] + if source['publisher'] and len(source['publisher']) > 3: + metadata.append(source['publisher']) + if source['topics']: + metadata.append(source['topics'][0]) + if source['doi']: + metadata.append(source['doi']) + metadata = ' / '.join(metadata) + + results.append({ + 'url': source['urls'][0].replace('http://', 'https://', 1), + 'title': source['title'], + 'content': source['description'], + 'publishedDate': date, + 'metadata' : metadata, + }) + + return results diff --git a/searx/settings.yml b/searx/settings.yml index 8b4f9972..df106be5 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -281,6 +281,13 @@ engines: categories : images shortcut : cce +# - name : core.ac.uk +# engine : core +# categories : science +# shortcut : cor +# # get your API key from: https://core.ac.uk/api-keys/register/ +# api_key : 'unset' + - name : crossref engine : json_engine paging : True