|
|
@ -1,12 +1,43 @@ |
|
|
|
#!/usr/bin/env python |
|
|
|
""" |
|
|
|
Fetch OSM keys and tags |
|
|
|
|
|
|
|
to get the i18n names, the scripts uses query.wikidata.org |
|
|
|
instead of for example https://taginfo.openstreetmap.org/taginfo/apidoc |
|
|
|
# lint: pylint |
|
|
|
# pylint: disable=missing-function-docstring |
|
|
|
"""Fetch OSM keys and tags. |
|
|
|
|
|
|
|
To get the i18n names, the scripts uses `Wikidata Query Service`_ instead of for |
|
|
|
example `OSM tags API`_ (sidenote: the actual change log from |
|
|
|
map.atownsend.org.uk_ might be useful to normalize OSM tags) |
|
|
|
|
|
|
|
.. _Wikidata Query Service: https://query.wikidata.org/ |
|
|
|
.. _OSM tags API: https://taginfo.openstreetmap.org/taginfo/apidoc |
|
|
|
.. _map.atownsend.org.uk: https://map.atownsend.org.uk/maps/map/changelog.html |
|
|
|
|
|
|
|
:py:obj:`SPARQL_TAGS_REQUEST` : |
|
|
|
Wikidata SPARQL query that returns *type-categories* and *types*. The |
|
|
|
returned tag is ``Tag:{category}={type}`` (see :py:func:`get_tags`). |
|
|
|
Example: |
|
|
|
|
|
|
|
- https://taginfo.openstreetmap.org/tags/building=house#overview |
|
|
|
- https://wiki.openstreetmap.org/wiki/Tag:building%3Dhouse |
|
|
|
at the bottom of the infobox (right side), there is a link to wikidata: |
|
|
|
https://www.wikidata.org/wiki/Q3947 |
|
|
|
see property "OpenStreetMap tag or key" (P1282) |
|
|
|
- https://wiki.openstreetmap.org/wiki/Tag%3Abuilding%3Dbungalow |
|
|
|
https://www.wikidata.org/wiki/Q850107 |
|
|
|
|
|
|
|
:py:obj:`SPARQL_KEYS_REQUEST` : |
|
|
|
Wikidata SPARQL query that returns *keys*. Example with "payment": |
|
|
|
|
|
|
|
- https://wiki.openstreetmap.org/wiki/Key%3Apayment |
|
|
|
at the bottom of infobox (right side), there is a link to wikidata: |
|
|
|
https://www.wikidata.org/wiki/Q1148747 |
|
|
|
link made using the "OpenStreetMap tag or key" property (P1282) |
|
|
|
to be confirm: there is a one wiki page per key ? |
|
|
|
- https://taginfo.openstreetmap.org/keys/payment#values |
|
|
|
- https://taginfo.openstreetmap.org/keys/payment:cash#values |
|
|
|
|
|
|
|
``rdfs:label`` get all the labels without language selection |
|
|
|
(as opposed to SERVICE ``wikibase:label``). |
|
|
|
|
|
|
|
https://map.atownsend.org.uk/maps/map/changelog.html (the actual change log) |
|
|
|
might be useful to normalize OSM tags |
|
|
|
""" |
|
|
|
|
|
|
|
import json |
|
|
@ -19,50 +50,28 @@ from searx.engines.wikidata import send_wikidata_query |
|
|
|
from searx.languages import language_codes |
|
|
|
from searx.engines.openstreetmap import get_key_rank, VALUE_TO_LINK |
|
|
|
|
|
|
|
|
|
|
|
# nominatim return type category and type |
|
|
|
# the tag is "Tag:{category}={type}" |
|
|
|
# Example: |
|
|
|
# * https://taginfo.openstreetmap.org/tags/building=house#overview |
|
|
|
# * https://wiki.openstreetmap.org/wiki/Tag:building%3Dhouse |
|
|
|
# at the bottom of the infobox (right side), there is a link to wikidata: |
|
|
|
# https://www.wikidata.org/wiki/Q3947 |
|
|
|
# see property "OpenStreetMap tag or key" (P1282) |
|
|
|
# * https://wiki.openstreetmap.org/wiki/Tag%3Abuilding%3Dbungalow |
|
|
|
# https://www.wikidata.org/wiki/Q850107 |
|
|
|
SARQL_TAGS_REQUEST = """ |
|
|
|
SPARQL_TAGS_REQUEST = """ |
|
|
|
SELECT ?tag ?item ?itemLabel WHERE { |
|
|
|
?item wdt:P1282 ?tag . |
|
|
|
?item rdfs:label ?itemLabel . |
|
|
|
FILTER(STRSTARTS(?tag, 'Tag')) |
|
|
|
} |
|
|
|
GROUP BY ?tag ?item ?itemLabel |
|
|
|
GROUP BY ?tag ?item ?itemLabel |
|
|
|
ORDER BY ?tag ?item ?itemLabel |
|
|
|
""" |
|
|
|
|
|
|
|
# keys |
|
|
|
# Example with "payment"": |
|
|
|
# * https://wiki.openstreetmap.org/wiki/Key%3Apayment |
|
|
|
# at the bottom of infobox (right side), there is a link to wikidata: |
|
|
|
# https://www.wikidata.org/wiki/Q1148747 |
|
|
|
# link made using the "OpenStreetMap tag or key" property (P1282) |
|
|
|
# to be confirm: there is a one wiki page per key ? |
|
|
|
# * https://taginfo.openstreetmap.org/keys/payment#values |
|
|
|
# * https://taginfo.openstreetmap.org/keys/payment:cash#values |
|
|
|
# |
|
|
|
# rdfs:label get all the labels without language selection |
|
|
|
# (as opposed to SERVICE wikibase:label) |
|
|
|
SARQL_KEYS_REQUEST = """ |
|
|
|
SPARQL_KEYS_REQUEST = """ |
|
|
|
SELECT ?key ?item ?itemLabel WHERE { |
|
|
|
?item wdt:P1282 ?key . |
|
|
|
?item rdfs:label ?itemLabel . |
|
|
|
FILTER(STRSTARTS(?key, 'Key')) |
|
|
|
} |
|
|
|
GROUP BY ?key ?item ?itemLabel |
|
|
|
GROUP BY ?key ?item ?itemLabel |
|
|
|
ORDER BY ?key ?item ?itemLabel |
|
|
|
""" |
|
|
|
|
|
|
|
LANGUAGES = [l[0].lower() for l in language_codes] |
|
|
|
|
|
|
|
PRESET_KEYS = { |
|
|
|
('wikidata',): {'en': 'Wikidata'}, |
|
|
|
('wikipedia',): {'en': 'Wikipedia'}, |
|
|
@ -71,11 +80,11 @@ PRESET_KEYS = { |
|
|
|
('fax',): {'en': 'Fax'}, |
|
|
|
('internet_access', 'ssid'): {'en': 'Wi-Fi'}, |
|
|
|
} |
|
|
|
|
|
|
|
INCLUDED_KEYS = { |
|
|
|
('addr', ) |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
def get_preset_keys(): |
|
|
|
results = collections.OrderedDict() |
|
|
|
for keys, value in PRESET_KEYS.items(): |
|
|
@ -85,10 +94,9 @@ def get_preset_keys(): |
|
|
|
r.setdefault('*', value) |
|
|
|
return results |
|
|
|
|
|
|
|
|
|
|
|
def get_keys(): |
|
|
|
results = get_preset_keys() |
|
|
|
response = send_wikidata_query(SARQL_KEYS_REQUEST) |
|
|
|
response = send_wikidata_query(SPARQL_KEYS_REQUEST) |
|
|
|
|
|
|
|
for key in response['results']['bindings']: |
|
|
|
keys = key['key']['value'].split(':')[1:] |
|
|
@ -136,7 +144,7 @@ def get_keys(): |
|
|
|
|
|
|
|
def get_tags(): |
|
|
|
results = collections.OrderedDict() |
|
|
|
response = send_wikidata_query(SARQL_TAGS_REQUEST) |
|
|
|
response = send_wikidata_query(SPARQL_TAGS_REQUEST) |
|
|
|
for tag in response['results']['bindings']: |
|
|
|
tag_names = tag['tag']['value'].split(':')[1].split('=') |
|
|
|
if len(tag_names) == 2: |
|
|
@ -149,7 +157,6 @@ def get_tags(): |
|
|
|
results.setdefault(tag_category, {}).setdefault(tag_type, {}).setdefault(lang, label) |
|
|
|
return results |
|
|
|
|
|
|
|
|
|
|
|
def optimize_data_lang(translations): |
|
|
|
language_to_delete = [] |
|
|
|
# remove "zh-hk" entry if the value is the same as "zh" |
|
|
@ -174,14 +181,12 @@ def optimize_data_lang(translations): |
|
|
|
for language in language_to_delete: |
|
|
|
del translations[language] |
|
|
|
|
|
|
|
|
|
|
|
def optimize_tags(data): |
|
|
|
for v in data.values(): |
|
|
|
for translations in v.values(): |
|
|
|
optimize_data_lang(translations) |
|
|
|
return data |
|
|
|
|
|
|
|
|
|
|
|
def optimize_keys(data): |
|
|
|
for k, v in data.items(): |
|
|
|
if k == '*': |
|
|
@ -190,15 +195,15 @@ def optimize_keys(data): |
|
|
|
optimize_keys(v) |
|
|
|
return data |
|
|
|
|
|
|
|
|
|
|
|
def get_osm_tags_filename(): |
|
|
|
return Path(searx_dir) / "data" / "osm_keys_tags.json" |
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
|
|
|
|
set_timeout_for_thread(60) |
|
|
|
result = { |
|
|
|
'keys': optimize_keys(get_keys()), |
|
|
|
'tags': optimize_tags(get_tags()), |
|
|
|
} |
|
|
|
with open(get_osm_tags_filename(), 'w') as f: |
|
|
|
json.dump(result, f, indent=4, ensure_ascii=False) |
|
|
|
set_timeout_for_thread(60) |
|
|
|
result = { |
|
|
|
'keys': optimize_keys(get_keys()), |
|
|
|
'tags': optimize_tags(get_tags()), |
|
|
|
} |
|
|
|
with open(get_osm_tags_filename(), 'w') as f: |
|
|
|
json.dump(result, f, indent=4, ensure_ascii=False) |
|
|
|