From 59eeeaab87951fd6fa3302ec240db98902a20b2c Mon Sep 17 00:00:00 2001 From: asciimoo Date: Thu, 23 Jan 2014 11:08:08 +0100 Subject: [PATCH] [fix] html tag removal --- searx/engines/xpath.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index a7d24e2a..8960b5f2 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -2,6 +2,7 @@ from lxml import html from urllib import urlencode, unquote from urlparse import urlparse, urljoin from lxml.etree import _ElementStringResult +from searx.utils import html_to_text search_url = None url_xpath = None @@ -33,7 +34,7 @@ def extract_text(xpath_results): return ''.join(xpath_results) else: # it's a element - return xpath_results.text_content() + return html_to_text(xpath_results.text_content()) def extract_url(xpath_results):