qemu-e2k/tests/acceptance/tesseract_utils.py
Philippe Mathieu-Daudé ca82244928 tests/acceptance: Introduce tesseract_ocr() helper
We are going to reuse the tesseract OCR code.
Create a new tesseract_ocr() helper and use it.

Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20201021105035.2477784-5-f4bug@amsat.org>
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2021-02-08 12:37:33 +01:00

47 lines
1.4 KiB
Python

# ...
#
# Copyright (c) 2019 Philippe Mathieu-Daudé <f4bug@amsat.org>
#
# This work is licensed under the terms of the GNU GPL, version 2 or
# later. See the COPYING file in the top-level directory.
import re
import logging
from avocado.utils import process
from avocado.utils.path import find_command, CmdNotFoundError
def tesseract_available(expected_version):
try:
find_command('tesseract')
except CmdNotFoundError:
return False
res = process.run('tesseract --version')
try:
version = res.stdout_text.split()[1]
except IndexError:
version = res.stderr_text.split()[1]
return int(version.split('.')[0]) == expected_version
match = re.match(r'tesseract\s(\d)', res)
if match is None:
return False
# now this is guaranteed to be a digit
return int(match.groups()[0]) == expected_version
def tesseract_ocr(image_path, tesseract_args='', tesseract_version=3):
console_logger = logging.getLogger('tesseract')
console_logger.debug(image_path)
if tesseract_version == 4:
tesseract_args += ' --oem 1'
proc = process.run("tesseract {} {} stdout".format(tesseract_args,
image_path))
lines = []
for line in proc.stdout_text.split('\n'):
sline = line.strip()
if len(sline):
console_logger.debug(sline)
lines += [sline]
return lines