1
0
mirror of https://github.com/searx/searx synced 2024-11-18 07:57:37 +01:00

[fix] html tag removal

This commit is contained in:
asciimoo 2014-01-23 11:08:08 +01:00
parent ba0f818e89
commit 59eeeaab87

View File

@ -2,6 +2,7 @@ from lxml import html
from urllib import urlencode, unquote
from urlparse import urlparse, urljoin
from lxml.etree import _ElementStringResult
from searx.utils import html_to_text
search_url = None
url_xpath = None
@ -33,7 +34,7 @@ def extract_text(xpath_results):
return ''.join(xpath_results)
else:
# it's a element
return xpath_results.text_content()
return html_to_text(xpath_results.text_content())
def extract_url(xpath_results):