forked from tanner/qotnews
feat: Add domain exclusion to smallweb list loading
This commit is contained in:
@@ -31,13 +31,17 @@ from flask_cors import CORS
|
|||||||
|
|
||||||
smallweb_set = set()
|
smallweb_set = set()
|
||||||
def load_smallweb_list():
|
def load_smallweb_list():
|
||||||
|
EXCLUDED = [
|
||||||
|
'github.com',
|
||||||
|
]
|
||||||
|
|
||||||
global smallweb_set
|
global smallweb_set
|
||||||
try:
|
try:
|
||||||
url = 'https://raw.githubusercontent.com/kagisearch/smallweb/refs/heads/main/smallweb.txt'
|
url = 'https://raw.githubusercontent.com/kagisearch/smallweb/refs/heads/main/smallweb.txt'
|
||||||
with urllib.request.urlopen(url, timeout=10) as response:
|
with urllib.request.urlopen(url, timeout=10) as response:
|
||||||
urls = response.read().decode('utf-8').splitlines()
|
urls = response.read().decode('utf-8').splitlines()
|
||||||
hosts = {urlparse(u).hostname for u in urls if u and urlparse(u).hostname}
|
hosts = {urlparse(u).hostname for u in urls if u and urlparse(u).hostname}
|
||||||
smallweb_set = {h.replace('www.', '') for h in hosts}
|
smallweb_set = {h.replace('www.', '') for h in hosts if h not in EXCLUDED}
|
||||||
logging.info('Loaded {} smallweb domains.'.format(len(smallweb_set)))
|
logging.info('Loaded {} smallweb domains.'.format(len(smallweb_set)))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error('Failed to load smallweb list: {}'.format(e))
|
logging.error('Failed to load smallweb list: {}'.format(e))
|
||||||
|
|||||||
Reference in New Issue
Block a user