From cbb397939db355fb53da6bcf6bd8a2627f7a9584 Mon Sep 17 00:00:00 2001 From: asciimoo Date: Sun, 10 Nov 2013 21:41:01 +0100 Subject: [PATCH] [enh] incasesensitive query highlighting --- searx/engines/__init__.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 2abf1610..c17a53f4 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -25,6 +25,7 @@ from urlparse import urlparse from searx import settings import ConfigParser import sys +import re from datetime import datetime engine_dir = dirname(realpath(__file__)) @@ -106,8 +107,17 @@ def highlight_content(content, query): # TODO better html content detection if content.find('<') != -1: return content - for chunk in query.split(): - content = content.replace(chunk, '{0}'.format(chunk)) + + if content.lower().find(query.lower()) > -1: + query_regex = '({0})'.format(re.escape(query)) + content = re.sub(query_regex, '\\1', content, flags=re.I) + else: + for chunk in query.split(): + if len(chunk) == 1: + query_regex = '(\W+{0}\W+)'.format(re.escape(chunk)) + else: + query_regex = '({0})'.format(re.escape(chunk)) + content = re.sub(query_regex, '\\1', content, flags=re.I) return content