diff --git a/searx/data/__init__.py b/searx/data/__init__.py index 391947bf..1116e5d4 100644 --- a/searx/data/__init__.py +++ b/searx/data/__init__.py @@ -2,7 +2,7 @@ import json from pathlib import Path -__init__ = ['ENGINES_LANGUGAGES', 'CURRENCIES', 'USER_AGENTS', 'bangs_loader'] +__init__ = ['ENGINES_LANGUGAGES', 'CURRENCIES', 'USER_AGENTS', 'bangs_loader', 'ahmia_blacklist_loader'] data_dir = Path(__file__).parent @@ -16,6 +16,11 @@ def bangs_loader(): return load('bangs.json') +def ahmia_blacklist_loader(): + with open(str(data_dir / 'ahmia_blacklist.txt'), encoding='utf-8') as fd: + return fd.read().split() + + ENGINES_LANGUAGES = load('engines_languages.json') CURRENCIES = load('currencies.json') USER_AGENTS = load('useragents.json') diff --git a/searx/plugins/ahmia_filter.py b/searx/plugins/ahmia_filter.py index 8eb7f941..83b05e4d 100644 --- a/searx/plugins/ahmia_filter.py +++ b/searx/plugins/ahmia_filter.py @@ -3,9 +3,7 @@ ''' from hashlib import md5 -from os.path import join -from urllib.parse import urlparse -from searx import searx_dir +from searx.data import ahmia_blacklist_loader name = "Ahmia blacklist" description = "Filter out onion results that appear in Ahmia's blacklist. (See https://ahmia.fi/blacklist)" @@ -18,15 +16,14 @@ ahmia_blacklist = None def get_ahmia_blacklist(): global ahmia_blacklist if not ahmia_blacklist: - with open(join(join(searx_dir, "data"), "ahmia_blacklist.txt"), 'r') as f: - ahmia_blacklist = f.read().split() + ahmia_blacklist = ahmia_blacklist_loader() return ahmia_blacklist def not_blacklisted(result): - if not result.get('is_onion'): + if not result.get('is_onion') or not result.get('parsed_url'): return True - result_hash = md5(urlparse(result.get('url')).hostname.encode()).hexdigest() + result_hash = md5(result['parsed_url'].hostname.encode()).hexdigest() return result_hash not in get_ahmia_blacklist()