This commit is contained in:
SepehrRS 2022-09-24 17:24:17 -04:00 committed by GitHub
commit 9f638a5842
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 269 additions and 1 deletions

View File

@ -11,3 +11,4 @@ python-dateutil==2.8.2
pyyaml==6.0
requests[socks]==2.28.1
setproctitle==1.3.2
ipwhois==1.2.0

View File

@ -389,6 +389,16 @@ class Preferences:
'2': 2
}
),
'tag_privacy_violators': MapSetting(
settings['search'].get('tag_privacy_violators',False),
is_locked('tag_privacy_violators'),
map={
'0': False,
'1': True,
'False': False,
'True': True
}
),
'theme': EnumStringSetting(
settings['ui'].get('default_theme', 'oscar'),
is_locked('theme'),
@ -413,6 +423,7 @@ class Preferences:
settings['ui'].get('theme_args', {}).get('oscar_style', 'logicodev'),
is_locked('oscar-style'),
choices=['', 'logicodev', 'logicodev-dark', 'pointhi']),
''
'advanced_search': MapSetting(
settings['ui'].get('advanced_search', False),
is_locked('advanced_search'),

View File

@ -20,6 +20,7 @@ search:
ban_time_on_fail : 5 # ban time in seconds after engine errors
max_ban_time_on_fail : 120 # max ban time in seconds after engine errors
prefer_configured_language: False # increase weight of results in confiugred language in ranking
tag_privacy_violators: False # tag privacy violators, disabled by default.
server:
port : 8888

View File

@ -0,0 +1,227 @@
import socket
import ipwhois
from searx import logger
ASN_PRIVACY = {
# Akamai
"55770": "Akamai",
"55409": "Akamai",
"49846": "Akamai",
"49249": "Akamai",
"48163": "Akamai",
"45700": "Akamai",
"43639": "Akamai",
"39836": "Akamai",
"393560": "Akamai",
"393234": "Akamai",
"36183": "Akamai",
"36029": "Akamai",
"35994": "Akamai",
"35993": "Akamai",
"35204": "Akamai",
"34850": "Akamai",
"34164": "Akamai",
"33905": "Akamai",
"32787": "Akamai",
"31377": "Akamai",
"31110": "Akamai",
"31109": "Akamai",
"31108": "Akamai",
"31107": "Akamai",
"30675": "Akamai",
"26008": "Akamai",
"24319": "Akamai",
"23903": "Akamai",
"23455": "Akamai",
"23454": "Akamai",
"22452": "Akamai",
"22207": "Akamai",
"21399": "Akamai",
"21357": "Akamai",
"21342": "Akamai",
"20940": "Akamai",
"20189": "Akamai",
"18717": "Akamai",
"18680": "Akamai",
"17334": "Akamai",
"16702": "Akamai",
"16625": "Akamai",
"12222": "Akamai",
# Alibaba
"45104": "Alibaba",
"45103": "Alibaba",
"45102": "Alibaba",
"45096": "Alibaba",
"37963": "Alibaba",
"34947": "Alibaba",
"134963": "Alibaba",
# Amazon
"9059": "Amazon",
"8987": "Amazon",
"7224": "Amazon",
"62785": "Amazon",
"58588": "Amazon",
"395343": "Amazon",
"39111": "Amazon",
"38895": "Amazon",
"264167": "Amazon",
"19047": "Amazon",
"17493": "Amazon",
"16509": "Amazon",
"14618": "Amazon",
"135630": "Amazon",
"10124": "Amazon",
# Aryaka Networks, Inc
"11179": "Aryaka Networks",
# Azure
"53587": "Azure",
"24221": "Azure",
"134235": "Azure",
# Cloudflare
"395747": "Cloudflare",
"394536": "Cloudflare",
"209242": "Cloudflare",
"203898": "Cloudflare",
"202623": "Cloudflare",
"14789": "Cloudflare",
"139242": "Cloudflare",
"133877": "Cloudflare",
"13335": "Cloudflare",
# CDNetworks Inc
"43303": "CDNetworks",
"40366": "CDNetworks",
"38670": "CDNetworks",
"38107": "CDNetworks",
"36408": "CDNetworks",
"204720": "CDNetworks",
# EdgeCast Networks, Inc. d/b/a Verizon Digital Media Services
"15133": "EdgeCast Networks",
# Highwinds Network Group, Inc.
"33438": "Highwinds Network",
"29798": "Highwinds Network",
"20446": "Highwinds Network",
"18607": "Highwinds Network",
"11588": "Highwinds Network",
# Incapsula Inc
"19551": "Incapsula",
# Instart Logic, Inc
"33047": "Instant Logics",
"133103": "Instant Logics",
"6993": "Instant Logics",
"55755": "Instant Logics",
"48910": "Instant Logics",
"4513": "Instant Logics",
"30637": "Instant Logics",
"30636": "Instant Logics",
"30282": "Instant Logics",
"29791": "Instant Logics",
"24295": "Instant Logics",
"24247": "Instant Logics",
"24246": "Instant Logics",
"24245": "Instant Logics",
"22212": "Instant Logics",
"22211": "Instant Logics",
"22132": "Instant Logics",
"19024": "Instant Logics",
"17675": "Instant Logics",
"15570": "Instant Logics",
"15421": "Instant Logics",
"14745": "Instant Logics",
"14744": "Instant Logics",
"14743": "Instant Logics",
"14742": "Instant Logics",
"14636": "Instant Logics",
"13890": "Instant Logics",
"13792": "Instant Logics",
"13791": "Instant Logics",
"13790": "Instant Logics",
"13789": "Instant Logics",
"12182": "Instant Logics",
"12181": "Instant Logics",
"12180": "Instant Logics",
"12179": "Instant Logics",
"12178": "Instant Logics",
"11855": "Instant Logics",
"11854": "Instant Logics",
"11853": "Instant Logics",
"10913": "Instant Logics",
"10912": "Instant Logics",
"10911": "Instant Logics",
"10910": "Instant Logics",
# Fastly
"54113": "Fastly",
"394192": "Fastly",
# Google
"45566": "Google",
"43515": "Google",
"41264": "Google",
"40873": "Google",
"396982": "Google",
"395973": "Google",
"394699": "Google",
"394639": "Google",
"394507": "Google",
"36987": "Google",
"36492": "Google",
"36385": "Google",
"36384": "Google",
"36040": "Google",
"36039": "Google",
"26910": "Google",
"26684": "Google",
"22859": "Google",
"22577": "Google",
"19527": "Google",
"16550": "Google",
"15169": "Google",
"13949": "Google",
"139190": "Google",
"139070": "Google",
# Limelight
"60261": "Limelight",
"55429": "Limelight",
"45396": "Limelight",
"38622": "Limelight",
"38621": "Limelight",
"37277": "Limelight",
"27191": "Limelight",
"26506": "Limelight",
"25804": "Limelight",
"23164": "Limelight",
"23135": "Limelight",
"23059": "Limelight",
"22822": "Limelight",
"12411": "Limelight",
# Yottaa, Inc
"393259": "Yottaa",
}
class TagPrivacyViolators:
""" Tags websites that violate user's privacy. """
def __init__(self):
self.cache = {}
def find_privacy_violators(self,results):
""" Finds websites that violate privacy through querying whois and looking up their asn value. """
tagged_websites = {}
for result in results:
logger.debug('cache: %s', ', '.join(self.cache))
if result['parsed_url'].netloc in self.cache:
logger.info("%s is in cache",result['url'])
tagged_websites[result['url']] = self.cache.get(result['parsed_url'].netloc)
continue
try:
ipwhois_obj = ipwhois.IPWhois(
socket.gethostbyname(
result['parsed_url'].netloc
) ,timeout=2
)
answer = ipwhois_obj.lookup_rdap()
asn_value = ASN_PRIVACY.get(answer['asn'])
except Exception as e:
logger.error(e)
if asn_value:
tagged_websites[result['url']] = asn_value
self.cache[result['parsed_url'].netloc] = asn_value
return tagged_websites

View File

@ -150,6 +150,17 @@
{{ preferences_item_footer(info, label, rtl) }}
{% endif %}
{% if 'tag_privacy_violators' not in locked_preferences %}
{% set label = _('Tag privacy violators') %}
{% set info = _('Tag websites that are behind Cloudflare and other privacy violators') %}
{{ preferences_item_header(info, label, rtl, 'tag_privacy_violators') }}
<select class="form-control {{ custom_select_class(rtl) }}" name="tag_privacy_violators" id="tag_privacy_violators">
<option value="1" {% if tag_privacy_violators %}selected="selected"{% endif %}>{{ _('On') }}</option>
<option value="0" {% if not tag_privacy_violators %}selected="selected"{% endif %}>{{ _('Off')}}</option>
</select>
{{ preferences_item_footer(info, label, rtl) }}
{% endif %}
{% if 'autofocus' not in locked_preferences %}
{% set label = _('Autofocus search field') %}
{% set info = _('Turn off if you use your keyboard to scroll') %}

View File

@ -128,6 +128,11 @@
{% for result in results -%}
<div class="result {% if result['template'] %}result-{{ result.template|replace('.html', '') }}{% else %}result-default{% endif %}{% for e in result.engines %} {{ e }}{% endfor %}">
{% if result.privacy_violator %}
<div class="pull-right">
{{ result.asn_name }}
</div>
{% endif %}
{%- set index = loop.index -%}
{%- if result.template -%}
{% include get_result_template('oscar', result['template']) %}

View File

@ -124,6 +124,11 @@
<div id="urls">
{% for result in results %}
{% set index = loop.index %}
{% if result['privacy_violator'] %}
<div align="right">
{{ result['asn_name'] }}
</div>
{% endif %}
{% if result['template'] %}
{% include get_result_template('simple', result['template']) %}
{% else %}

View File

@ -84,7 +84,8 @@ from searx.poolrequests import get_global_proxies
from searx.answerers import ask
from searx.metrology.error_recorder import errors_per_engines
from searx.settings_loader import get_default_settings_path
from searx.tag_privacy_violators import TagPrivacyViolators
tag_websites = TagPrivacyViolators()
# serve pages with HTTP/1.1
from werkzeug.serving import WSGIRequestHandler
WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0'))
@ -623,6 +624,8 @@ def search():
# results
results = result_container.get_ordered_results()
if request.preferences.get_value('tag_privacy_violators'):
privacy_violators = tag_websites.find_privacy_violators(results)
number_of_results = result_container.results_number()
if number_of_results < result_container.results_length():
number_of_results = 0
@ -636,6 +639,9 @@ def search():
# output
for result in results:
if result['url'] in privacy_violators:
result['privacy_violator'] = True
result['asn_name'] = privacy_violators.get(result['url'])
if output_format == 'html':
if 'content' in result and result['content']:
result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query)
@ -1100,6 +1106,7 @@ def config():
'default_locale': settings['ui']['default_locale'],
'autocomplete': settings['search']['autocomplete'],
'safe_search': settings['search']['safe_search'],
'tag_privacy_violators': settings['search']['tag_privacy_violators'],
'default_theme': settings['ui']['default_theme'],
'version': VERSION_STRING,
'brand': {