searx/searx/engines/tineye.py

205 lines
6.1 KiB
Python

"""
Tineye - Reverse search images
"""
from urllib.parse import urlencode
from datetime import datetime
from flask_babel import gettext
from searx import logger
about = {
"website": "https://tineye.com",
"wikidata_id": "Q2382535",
"use_official_api": False,
"require_api_key": False,
"results": "JSON",
}
categories = ['images']
paging = True
safesearch = False
base_url = 'https://tineye.com'
search_string = '/result_json/?page={page}&{query}'
logger = logger.getChild('tineye')
FORMAT_NOT_SUPPORTED = gettext(
"Could not read that image url. This may be due to an unsupported file"
" format. TinEye only supports images that are JPEG, PNG, GIF, BMP, TIFF or WebP."
)
"""TinEye error message"""
NO_SIGNATURE_ERROR = gettext(
"The image is too simple to find matches. TinEye requires a basic level of"
" visual detail to successfully identify matches."
)
"""TinEye error message"""
DOWNLOAD_ERROR = gettext("The image could not be downloaded.")
"""TinEye error message"""
def request(query, params):
params['url'] = base_url +\
search_string.format(
query=urlencode({'url': query}),
page=params['pageno'])
params['headers'].update({
'Connection': 'keep-alive',
'Accept-Encoding': 'gzip, defalte, br',
'Host': 'tineye.com',
'DNT': '1',
'TE': 'trailers',
})
query = urlencode({'url': query})
# see https://github.com/TinEye/pytineye/blob/main/pytineye/api.py
params['url'] = base_url + search_string.format(query=query, page=params['pageno'])
return params
def parse_tineye_match(match_json):
"""Takes parsed JSON from the API server and turns it into a :py:obj:`dict`
object.
Attributes `(class Match) <https://github.com/TinEye/pytineye/blob/main/pytineye/api.py>`__
- `image_url`, link to the result image.
- `domain`, domain this result was found on.
- `score`, a number (0 to 100) that indicates how closely the images match.
- `width`, image width in pixels.
- `height`, image height in pixels.
- `size`, image area in pixels.
- `format`, image format.
- `filesize`, image size in bytes.
- `overlay`, overlay URL.
- `tags`, whether this match belongs to a collection or stock domain.
- `backlinks`, a list of Backlink objects pointing to the original websites
and image URLs. List items are instances of :py:obj:`dict`, (`Backlink
<https://github.com/TinEye/pytineye/blob/main/pytineye/api.py>`__):
- `url`, the image URL to the image.
- `backlink`, the original website URL.
- `crawl_date`, the date the image was crawled.
"""
# HINT: there exists an alternative backlink dict in the domains list / e.g.::
#
# match_json['domains'][0]['backlinks']
backlinks = []
if "backlinks" in match_json:
for backlink_json in match_json["backlinks"]:
if not isinstance(backlink_json, dict):
continue
crawl_date = backlink_json.get("crawl_date")
if crawl_date:
crawl_date = datetime.fromisoformat(crawl_date[:-3])
else:
crawl_date = datetime.min
backlinks.append({
'url': backlink_json.get("url"),
'backlink': backlink_json.get("backlink"),
'crawl_date': crawl_date,
'image_name': backlink_json.get("image_name")}
)
return {
'image_url': match_json.get("image_url"),
'domain': match_json.get("domain"),
'score': match_json.get("score"),
'width': match_json.get("width"),
'height': match_json.get("height"),
'size': match_json.get("size"),
'image_format': match_json.get("format"),
'filesize': match_json.get("filesize"),
'overlay': match_json.get("overlay"),
'tags': match_json.get("tags"),
'backlinks': backlinks,
}
def response(resp):
"""Parse HTTP response from TinEye."""
results = []
try:
json_data = resp.json()
except Exception as exc: # pylint: disable=broad-except
msg = "can't parse JSON response // %s" % exc
logger.error(msg)
json_data = {'error': msg}
# handle error codes from Tineye
if resp.is_error:
if resp.status_code in (400, 422):
message = 'HTTP status: %s' % resp.status_code
error = json_data.get('error')
s_key = json_data.get('suggestions', {}).get('key', '')
if error and s_key:
message = "%s (%s)" % (error, s_key)
elif error:
message = error
if s_key == "Invalid image URL":
# test https://docs.searxng.org/_static/searxng-wordmark.svg
message = FORMAT_NOT_SUPPORTED
elif s_key == 'NO_SIGNATURE_ERROR':
# test https://pngimg.com/uploads/dot/dot_PNG4.png
message = NO_SIGNATURE_ERROR
elif s_key == 'Download Error':
# test https://notexists
message = DOWNLOAD_ERROR
logger.error(message)
return results
resp.raise_for_status()
# append results from matches
for match_json in json_data['matches']:
tineye_match = parse_tineye_match(match_json)
if not tineye_match['backlinks']:
continue
backlink = tineye_match['backlinks'][0]
results.append(
{
'template': 'images.html',
'url': backlink['backlink'],
'thumbnail_src': tineye_match['image_url'],
'source': backlink['url'],
'title': backlink['image_name'],
'img_src': backlink['url'],
'format': tineye_match['image_format'],
'widht': tineye_match['width'],
'height': tineye_match['height'],
'publishedDate': backlink['crawl_date'],
}
)
# append number of results
number_of_results = json_data.get('num_matches')
if number_of_results:
results.append({'number_of_results': number_of_results})
return results