645198d58b
Current Linux distros ship version 5 of the tesseract OCR software, so the nextcube screen test is ignored there. Let's make the check more flexible to allow newer versions, too, and remove the old v3 test since most Linux distros don't ship this version anymore. Message-ID: <20231101204323.35533-1-huth@tuxfamily.org> Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> Signed-off-by: Thomas Huth <huth@tuxfamily.org>
47 lines
1.4 KiB
Python
47 lines
1.4 KiB
Python
# ...
|
|
#
|
|
# Copyright (c) 2019 Philippe Mathieu-Daudé <f4bug@amsat.org>
|
|
#
|
|
# This work is licensed under the terms of the GNU GPL, version 2 or
|
|
# later. See the COPYING file in the top-level directory.
|
|
|
|
import re
|
|
import logging
|
|
|
|
from avocado.utils import process
|
|
from avocado.utils.path import find_command, CmdNotFoundError
|
|
|
|
def tesseract_available(expected_version):
|
|
try:
|
|
find_command('tesseract')
|
|
except CmdNotFoundError:
|
|
return False
|
|
res = process.run('tesseract --version')
|
|
try:
|
|
version = res.stdout_text.split()[1]
|
|
except IndexError:
|
|
version = res.stderr_text.split()[1]
|
|
return int(version.split('.')[0]) >= expected_version
|
|
|
|
match = re.match(r'tesseract\s(\d)', res)
|
|
if match is None:
|
|
return False
|
|
# now this is guaranteed to be a digit
|
|
return int(match.groups()[0]) >= expected_version
|
|
|
|
|
|
def tesseract_ocr(image_path, tesseract_args='', tesseract_version=3):
|
|
console_logger = logging.getLogger('tesseract')
|
|
console_logger.debug(image_path)
|
|
if tesseract_version == 4:
|
|
tesseract_args += ' --oem 1'
|
|
proc = process.run("tesseract {} {} stdout".format(tesseract_args,
|
|
image_path))
|
|
lines = []
|
|
for line in proc.stdout_text.split('\n'):
|
|
sline = line.strip()
|
|
if len(sline):
|
|
console_logger.debug(sline)
|
|
lines += [sline]
|
|
return lines
|