Add tagprivacyviolators, Fixing Issue #1987

This commit is contained in:
SepehrRasouli 2022-08-19 11:22:45 +04:30
parent d004439646
commit 57b5b7d3dd
No known key found for this signature in database
GPG Key ID: 24DAE84A8F63790E
1 changed files with 225 additions and 0 deletions

View File

@ -0,0 +1,225 @@
import socket
import ipwhois
from searx import logger
ASN_PRIVACY = {
# Akamai
"55770": "Akami",
"55409": "Akami",
"49846": "Akami",
"49249": "Akami",
"48163": "Akami",
"45700": "Akami",
"43639": "Akami",
"39836": "Akami",
"393560": "Akami",
"393234": "Akami",
"36183": "Akami",
"36029": "Akami",
"35994": "Akami",
"35993": "Akami",
"35204": "Akami",
"34850": "Akami",
"34164": "Akami",
"33905": "Akami",
"32787": "Akami",
"31377": "Akami",
"31110": "Akami",
"31109": "Akami",
"31108": "Akami",
"31107": "Akami",
"30675": "Akami",
"26008": "Akami",
"24319": "Akami",
"23903": "Akami",
"23455": "Akami",
"23454": "Akami",
"22452": "Akami",
"22207": "Akami",
"21399": "Akami",
"21357": "Akami",
"21342": "Akami",
"20940": "Akami",
"20189": "Akami",
"18717": "Akami",
"18680": "Akami",
"17334": "Akami",
"16702": "Akami",
"16625": "Akami",
"12222": "Akami",
# Alibaba
"45104": "Alibaba",
"45103": "Alibaba",
"45102": "Alibaba",
"45096": "Alibaba",
"37963": "Alibaba",
"34947": "Alibaba",
"134963": "Alibaba",
# Amazon
"9059": "Amazon",
"8987": "Amazon",
"7224": "Amazon",
"62785": "Amazon",
"58588": "Amazon",
"395343": "Amazon",
"39111": "Amazon",
"38895": "Amazon",
"264167": "Amazon",
"19047": "Amazon",
"17493": "Amazon",
"16509": "Amazon",
"14618": "Amazon",
"135630": "Amazon",
"10124": "Amazon",
# Aryaka Networks, Inc
"11179": "Aryaka Networks",
# Azure
"53587": "Azure",
"24221": "Azure",
"134235": "Azure",
# Cloudflare
"395747": "Cloudflare",
"394536": "Cloudflare",
"209242": "Cloudflare",
"203898": "Cloudflare",
"202623": "Cloudflare",
"14789": "Cloudflare",
"139242": "Cloudflare",
"133877": "Cloudflare",
"13335": "Cloudflare",
# CDNetworks Inc
"43303":"CDNetworks",
"40366":"CDNetworks",
"38670":"CDNetworks",
"38107":"CDNetworks",
"36408":"CDNetworks",
"204720":"CDNetworks",
# EdgeCast Networks, Inc. d/b/a Verizon Digital Media Services
"15133": "EdgeCast Networks",
# Highwinds Network Group, Inc.
"33438": "Highwinds Network",
"29798": "Highwinds Network",
"20446": "Highwinds Network",
"18607": "Highwinds Network",
"11588": "Highwinds Network",
# Incapsula Inc
"19551": "Incapsula",
# Instart Logic, Inc
"33047": "Instant Logics",
"133103": "Instant Logics",
"6993": "Instant Logics",
"55755": "Instant Logics",
"48910": "Instant Logics",
"4513": "Instant Logics",
"30637": "Instant Logics",
"30636": "Instant Logics",
"30282": "Instant Logics",
"29791": "Instant Logics",
"24295": "Instant Logics",
"24247": "Instant Logics",
"24246": "Instant Logics",
"24245": "Instant Logics",
"22212": "Instant Logics",
"22211": "Instant Logics",
"22132": "Instant Logics",
"19024": "Instant Logics",
"17675": "Instant Logics",
"15570": "Instant Logics",
"15421": "Instant Logics",
"14745": "Instant Logics",
"14744": "Instant Logics",
"14743": "Instant Logics",
"14742": "Instant Logics",
"14636": "Instant Logics",
"13890": "Instant Logics",
"13792": "Instant Logics",
"13791": "Instant Logics",
"13790": "Instant Logics",
"13789": "Instant Logics",
"12182": "Instant Logics",
"12181": "Instant Logics",
"12180": "Instant Logics",
"12179": "Instant Logics",
"12178": "Instant Logics",
"11855": "Instant Logics",
"11854": "Instant Logics",
"11853": "Instant Logics",
"10913": "Instant Logics",
"10912": "Instant Logics",
"10911": "Instant Logics",
"10910": "Instant Logics",
# Fastly
"54113": "Fastly",
"394192": "Fastly",
# Google
"45566": "Google",
"43515": "Google",
"41264": "Google",
"40873": "Google",
"396982": "Google",
"395973": "Google",
"394699": "Google",
"394639": "Google",
"394507": "Google",
"36987": "Google",
"36492": "Google",
"36385": "Google",
"36384": "Google",
"36040": "Google",
"36039": "Google",
"26910": "Google",
"26684": "Google",
"22859": "Google",
"22577": "Google",
"19527": "Google",
"16550": "Google",
"15169": "Google",
"13949": "Google",
"139190": "Google",
"139070": "Google",
# Limelight
"60261": "Limelight",
"55429": "Limelight",
"45396": "Limelight",
"38622": "Limelight",
"38621": "Limelight",
"37277": "Limelight",
"27191": "Limelight",
"26506": "Limelight",
"25804": "Limelight",
"23164": "Limelight",
"23135": "Limelight",
"23059": "Limelight",
"22822": "Limelight",
"12411": "Limelight",
# Yottaa, Inc
"393259": "Yottaa",
}
class TagPrivacyViolators:
""" Tags websites that violate user's privacy. """
def __init__(self):
self.cache = {}
def find_privacy_violators(self,results):
""" Finds websites that violate privacy through querying whois and looking up their asn value. """
tagged_websites = {}
for result in results:
logger.debug('cache: %s',', '.join(self.cache))
if result['parsed_url'].netloc in self.cache:
logger.info("%s is in cache",result['url'])
tagged_websites[result['url']] = self.cache.get(result['parsed_url'].netloc)
continue
try:
ipwhois_obj = ipwhois.IPWhois(
socket.gethostbyname(
result['parsed_url'].netloc
),timeout=2
)
answer = ipwhois_obj.lookup_rdap()
asn_value = ASN_PRIVACY.get(answer['asn'])
except Exception as e:
logger.error(e)
if asn_value:
tagged_websites[result['url']] = asn_value
self.cache[result['parsed_url'].netloc] = asn_value
return tagged_websites