From ed8ad1b6f6d1fe49aba27bf5c129fd364dcce7d8 Mon Sep 17 00:00:00 2001 From: Tanner Collin Date: Thu, 4 Dec 2025 22:18:19 +0000 Subject: [PATCH] feat: Add domain exclusion to smallweb list loading --- apiserver/server.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/apiserver/server.py b/apiserver/server.py index e0d7193..e6318bb 100644 --- a/apiserver/server.py +++ b/apiserver/server.py @@ -31,13 +31,17 @@ from flask_cors import CORS smallweb_set = set() def load_smallweb_list(): + EXCLUDED = [ + 'github.com', + ] + global smallweb_set try: url = 'https://raw.githubusercontent.com/kagisearch/smallweb/refs/heads/main/smallweb.txt' with urllib.request.urlopen(url, timeout=10) as response: urls = response.read().decode('utf-8').splitlines() hosts = {urlparse(u).hostname for u in urls if u and urlparse(u).hostname} - smallweb_set = {h.replace('www.', '') for h in hosts} + smallweb_set = {h.replace('www.', '') for h in hosts if h not in EXCLUDED} logging.info('Loaded {} smallweb domains.'.format(len(smallweb_set))) except Exception as e: logging.error('Failed to load smallweb list: {}'.format(e))