[fix] update yahoo engine according to the web site changes

This commit is contained in:
dalf 2015-02-20 12:34:13 +01:00
parent dc036ece85
commit 57996b12fc
2 changed files with 65 additions and 67 deletions

View File

@ -24,11 +24,11 @@ base_url = 'https://search.yahoo.com/'
search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}'
# specific xpath variables
results_xpath = '//div[@class="res"]'
results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]"
url_xpath = './/h3/a/@href'
title_xpath = './/h3/a'
content_xpath = './/div[@class="abstr"]'
suggestion_xpath = '//div[@id="satat"]//a'
content_xpath = './/div[@class="compText aAbs"]'
suggestion_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' AlsoTry ')]//a"
# remove yahoo-specific tracking-url
@ -91,11 +91,12 @@ def response(resp):
'content': content})
# if no suggestion found, return results
if not dom.xpath(suggestion_xpath):
suggestions = dom.xpath(suggestion_xpath)
if not suggestions:
return results
# parse suggestion
for suggestion in dom.xpath(suggestion_xpath):
for suggestion in suggestions:
# append suggestion
results.append({'suggestion': extract_text(suggestion)})

View File

@ -55,86 +55,83 @@ class TestYahooEngine(SearxTestCase):
self.assertEqual(yahoo.response(response), [])
html = """
<div class="res">
<div>
<h3>
<a id="link-1" class="yschttl spt" href="http://r.search.yahoo.com/_ylt=A0LEVzClb9JUSKcAEGRXNyoA;
_ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2JmMQR2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10
/RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"target="_blank" data-bk="5063.1">
<b>This</b> is the title
</a>
<ol class="reg mb-15 searchCenterMiddle">
<li class="first">
<div class="dd algo fst Sr">
<div class="compTitle">
<h3 class="title"><a class=" td-u" href="http://r.search.yahoo.com/_ylt=A0LEb9JUSKcAEGRXNyoA;
_ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2Jm2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10
/RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"
target="_blank" data-bid="54e712e13671c">
<b><b>This is the title</b></b></a>
</h3>
</div>
<span class="url" dir="ltr">www.<b>test</b>.com</span>
<div class="abstr">
<b>This</b> is the content
<div class="compText aAbs">
<p class="lh-18"><b><b>This is the </b>content</b>
</p>
</div>
</div>
<div id="satat" data-bns="Yahoo" data-bk="124.1">
<h2>Also Try</h2>
<table>
<tbody>
<tr>
<td>
<a id="srpnat0" class="" href="https://search.yahoo.com/search=rs-bottom" >
<span>
<b></b>This is <b>the suggestion</b>
</span>
</a>
</td>
</tr>
</tbody>
</table>
</li>
<li>
<div class="dd algo lst Sr">
<div class="compTitle">
<h3 class="title"><a class=" td-u" href="http://r.search.yahoo.com/_ylt=AwrBT7zgEudUW.wAe2ZXNyoA;
_ylu=X3oDMTBybGY3bmpvBGNvbG8DYmYxBHBvcwMyBHZ0aWQDBHNlYwNzcg--/RV=2\/RE=1424458593/RO=10
/RU=https%3a%2f%2fthis.is.the.second.url%2f/RK=0/RS=jIctjj_cBH1Efj88GCgHKp3__Qk-"
target="_blank" data-bid="54e712e136926">
This is the second <b><b>title</b></b></a>
</h3>
</div>
<div class="compText aAbs">
<p class="lh-18">This is the second content</p>
</div>
</div>
</li>
</ol>
<div class="dd assist fst lst AlsoTry" data-bid="54e712e138d04">
<div class="compTitle mb-4 h-17">
<h3 class="title">Also Try</h3> </div>
<table class="compTable m-0 ac-1st td-u fz-ms">
<tbody>
<tr>
<td class="w-50p pr-28"><a href="https://search.yahoo.com/"><B>This is the </B>suggestion<B></B></a>
</td>
</tr>
</table>
</div>
"""
response = mock.Mock(text=html)
results = yahoo.response(response)
print results
self.assertEqual(type(results), list)
self.assertEqual(len(results), 2)
self.assertEqual(len(results), 3)
self.assertEqual(results[0]['title'], 'This is the title')
self.assertEqual(results[0]['url'], 'https://this.is.the.url/')
self.assertEqual(results[0]['content'], 'This is the content')
self.assertEqual(results[1]['suggestion'], 'This is the suggestion')
self.assertEqual(results[1]['title'], 'This is the second title')
self.assertEqual(results[1]['url'], 'https://this.is.the.second.url/')
self.assertEqual(results[1]['content'], 'This is the second content')
self.assertEqual(results[2]['suggestion'], 'This is the suggestion')
html = """
<div class="res">
<div>
<h3>
<a id="link-1" class="yschttl spt" href="http://r.search.yahoo.com/_ylt=A0LEVzClb9JUSKcAEGRXNyoA;
_ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2JmMQR2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10
/RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"target="_blank" data-bk="5063.1">
<b>This</b> is the title
</a>
<ol class="reg mb-15 searchCenterMiddle">
<li class="first">
<div class="dd algo fst Sr">
<div class="compTitle">
<h3 class="title"><a class=" td-u" href="http://r.search.yahoo.com/_ylt=A0LEb9JUSKcAEGRXNyoA;
_ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2Jm2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10
/RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"
target="_blank" data-bid="54e712e13671c">
<b><b>This is the title</b></b></a>
</h3>
</div>
<span class="url" dir="ltr">www.<b>test</b>.com</span>
<div class="abstr">
<b>This</b> is the content
</div>
</div>
<div class="res">
<div>
<h3>
<a id="link-1" class="yschttl spt">
<b>This</b> is the title
</a>
</h3>
</div>
<span class="url" dir="ltr">www.<b>test</b>.com</span>
<div class="abstr">
<b>This</b> is the content
</div>
</div>
<div class="res">
<div>
<h3>
</h3>
</div>
<span class="url" dir="ltr">www.<b>test</b>.com</span>
<div class="abstr">
<b>This</b> is the content
<div class="compText aAbs">
<p class="lh-18"><b><b>This is the </b>content</b>
</p>
</div>
</div>
</li>
</ol>
"""
response = mock.Mock(text=html)
results = yahoo.response(response)