From d39e2f7e36bf9b7fb2fca7e7201d9af860027a2f Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Sun, 6 Jun 2021 12:58:53 +0200 Subject: [PATCH] [pylint] searx_extra/update/update_osm_keys_tags.py BTW: move some comments into script's doc-string Signed-off-by: Markus Heiser --- searx_extra/update/update_osm_keys_tags.py | 101 +++++++++++---------- 1 file changed, 53 insertions(+), 48 deletions(-) diff --git a/searx_extra/update/update_osm_keys_tags.py b/searx_extra/update/update_osm_keys_tags.py index 98c7617f..f803d0c3 100755 --- a/searx_extra/update/update_osm_keys_tags.py +++ b/searx_extra/update/update_osm_keys_tags.py @@ -1,12 +1,43 @@ #!/usr/bin/env python -""" -Fetch OSM keys and tags +# lint: pylint +# pylint: disable=missing-function-docstring +"""Fetch OSM keys and tags. -to get the i18n names, the scripts uses query.wikidata.org -instead of for example https://taginfo.openstreetmap.org/taginfo/apidoc +To get the i18n names, the scripts uses `Wikidata Query Service`_ instead of for +example `OSM tags API`_ (sidenote: the actual change log from +map.atownsend.org.uk_ might be useful to normalize OSM tags) + +.. _Wikidata Query Service: https://query.wikidata.org/ +.. _OSM tags API: https://taginfo.openstreetmap.org/taginfo/apidoc +.. _map.atownsend.org.uk: https://map.atownsend.org.uk/maps/map/changelog.html + +:py:obj:`SPARQL_TAGS_REQUEST` : + Wikidata SPARQL query that returns *type-categories* and *types*. The + returned tag is ``Tag:{category}={type}`` (see :py:func:`get_tags`). + Example: + + - https://taginfo.openstreetmap.org/tags/building=house#overview + - https://wiki.openstreetmap.org/wiki/Tag:building%3Dhouse + at the bottom of the infobox (right side), there is a link to wikidata: + https://www.wikidata.org/wiki/Q3947 + see property "OpenStreetMap tag or key" (P1282) + - https://wiki.openstreetmap.org/wiki/Tag%3Abuilding%3Dbungalow + https://www.wikidata.org/wiki/Q850107 + +:py:obj:`SPARQL_KEYS_REQUEST` : + Wikidata SPARQL query that returns *keys*. Example with "payment": + + - https://wiki.openstreetmap.org/wiki/Key%3Apayment + at the bottom of infobox (right side), there is a link to wikidata: + https://www.wikidata.org/wiki/Q1148747 + link made using the "OpenStreetMap tag or key" property (P1282) + to be confirm: there is a one wiki page per key ? + - https://taginfo.openstreetmap.org/keys/payment#values + - https://taginfo.openstreetmap.org/keys/payment:cash#values + + ``rdfs:label`` get all the labels without language selection + (as opposed to SERVICE ``wikibase:label``). -https://map.atownsend.org.uk/maps/map/changelog.html (the actual change log) -might be useful to normalize OSM tags """ import json @@ -19,50 +50,28 @@ from searx.engines.wikidata import send_wikidata_query from searx.languages import language_codes from searx.engines.openstreetmap import get_key_rank, VALUE_TO_LINK - -# nominatim return type category and type -# the tag is "Tag:{category}={type}" -# Example: -# * https://taginfo.openstreetmap.org/tags/building=house#overview -# * https://wiki.openstreetmap.org/wiki/Tag:building%3Dhouse -# at the bottom of the infobox (right side), there is a link to wikidata: -# https://www.wikidata.org/wiki/Q3947 -# see property "OpenStreetMap tag or key" (P1282) -# * https://wiki.openstreetmap.org/wiki/Tag%3Abuilding%3Dbungalow -# https://www.wikidata.org/wiki/Q850107 -SARQL_TAGS_REQUEST = """ +SPARQL_TAGS_REQUEST = """ SELECT ?tag ?item ?itemLabel WHERE { ?item wdt:P1282 ?tag . ?item rdfs:label ?itemLabel . FILTER(STRSTARTS(?tag, 'Tag')) } -GROUP BY ?tag ?item ?itemLabel +GROUP BY ?tag ?item ?itemLabel ORDER BY ?tag ?item ?itemLabel """ -# keys -# Example with "payment"": -# * https://wiki.openstreetmap.org/wiki/Key%3Apayment -# at the bottom of infobox (right side), there is a link to wikidata: -# https://www.wikidata.org/wiki/Q1148747 -# link made using the "OpenStreetMap tag or key" property (P1282) -# to be confirm: there is a one wiki page per key ? -# * https://taginfo.openstreetmap.org/keys/payment#values -# * https://taginfo.openstreetmap.org/keys/payment:cash#values -# -# rdfs:label get all the labels without language selection -# (as opposed to SERVICE wikibase:label) -SARQL_KEYS_REQUEST = """ +SPARQL_KEYS_REQUEST = """ SELECT ?key ?item ?itemLabel WHERE { ?item wdt:P1282 ?key . ?item rdfs:label ?itemLabel . FILTER(STRSTARTS(?key, 'Key')) } -GROUP BY ?key ?item ?itemLabel +GROUP BY ?key ?item ?itemLabel ORDER BY ?key ?item ?itemLabel """ LANGUAGES = [l[0].lower() for l in language_codes] + PRESET_KEYS = { ('wikidata',): {'en': 'Wikidata'}, ('wikipedia',): {'en': 'Wikipedia'}, @@ -71,11 +80,11 @@ PRESET_KEYS = { ('fax',): {'en': 'Fax'}, ('internet_access', 'ssid'): {'en': 'Wi-Fi'}, } + INCLUDED_KEYS = { ('addr', ) } - def get_preset_keys(): results = collections.OrderedDict() for keys, value in PRESET_KEYS.items(): @@ -85,10 +94,9 @@ def get_preset_keys(): r.setdefault('*', value) return results - def get_keys(): results = get_preset_keys() - response = send_wikidata_query(SARQL_KEYS_REQUEST) + response = send_wikidata_query(SPARQL_KEYS_REQUEST) for key in response['results']['bindings']: keys = key['key']['value'].split(':')[1:] @@ -136,7 +144,7 @@ def get_keys(): def get_tags(): results = collections.OrderedDict() - response = send_wikidata_query(SARQL_TAGS_REQUEST) + response = send_wikidata_query(SPARQL_TAGS_REQUEST) for tag in response['results']['bindings']: tag_names = tag['tag']['value'].split(':')[1].split('=') if len(tag_names) == 2: @@ -149,7 +157,6 @@ def get_tags(): results.setdefault(tag_category, {}).setdefault(tag_type, {}).setdefault(lang, label) return results - def optimize_data_lang(translations): language_to_delete = [] # remove "zh-hk" entry if the value is the same as "zh" @@ -174,14 +181,12 @@ def optimize_data_lang(translations): for language in language_to_delete: del translations[language] - def optimize_tags(data): for v in data.values(): for translations in v.values(): optimize_data_lang(translations) return data - def optimize_keys(data): for k, v in data.items(): if k == '*': @@ -190,15 +195,15 @@ def optimize_keys(data): optimize_keys(v) return data - def get_osm_tags_filename(): return Path(searx_dir) / "data" / "osm_keys_tags.json" +if __name__ == '__main__': -set_timeout_for_thread(60) -result = { - 'keys': optimize_keys(get_keys()), - 'tags': optimize_tags(get_tags()), -} -with open(get_osm_tags_filename(), 'w') as f: - json.dump(result, f, indent=4, ensure_ascii=False) + set_timeout_for_thread(60) + result = { + 'keys': optimize_keys(get_keys()), + 'tags': optimize_tags(get_tags()), + } + with open(get_osm_tags_filename(), 'w') as f: + json.dump(result, f, indent=4, ensure_ascii=False)