mirror of
https://github.com/searx/searx
synced 2024-11-18 07:57:37 +01:00
[fix] html tag removal
This commit is contained in:
parent
ba0f818e89
commit
59eeeaab87
@ -2,6 +2,7 @@ from lxml import html
|
||||
from urllib import urlencode, unquote
|
||||
from urlparse import urlparse, urljoin
|
||||
from lxml.etree import _ElementStringResult
|
||||
from searx.utils import html_to_text
|
||||
|
||||
search_url = None
|
||||
url_xpath = None
|
||||
@ -33,7 +34,7 @@ def extract_text(xpath_results):
|
||||
return ''.join(xpath_results)
|
||||
else:
|
||||
# it's a element
|
||||
return xpath_results.text_content()
|
||||
return html_to_text(xpath_results.text_content())
|
||||
|
||||
|
||||
def extract_url(xpath_results):
|
||||
|
Loading…
Reference in New Issue
Block a user