feat: Add smallweb filter checkbox and server-side filtering
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
This commit is contained in:
@@ -16,6 +16,7 @@ import traceback
|
||||
import time
|
||||
import datetime
|
||||
import humanize
|
||||
import urllib.request
|
||||
from urllib.parse import urlparse, parse_qs
|
||||
|
||||
import settings
|
||||
@@ -28,6 +29,21 @@ from flask import abort, Flask, request, render_template, stream_with_context, R
|
||||
from werkzeug.exceptions import NotFound
|
||||
from flask_cors import CORS
|
||||
|
||||
smallweb_set = set()
|
||||
def load_smallweb_list():
|
||||
global smallweb_set
|
||||
try:
|
||||
url = 'https://raw.githubusercontent.com/kagisearch/smallweb/refs/heads/main/smallweb.txt'
|
||||
with urllib.request.urlopen(url, timeout=10) as response:
|
||||
urls = response.read().decode('utf-8').splitlines()
|
||||
hosts = {urlparse(u).hostname for u in urls if u and urlparse(u).hostname}
|
||||
smallweb_set = {h.replace('www.', '') for h in hosts}
|
||||
logging.info('Loaded {} smallweb domains.'.format(len(smallweb_set)))
|
||||
except Exception as e:
|
||||
logging.error('Failed to load smallweb list: {}'.format(e))
|
||||
|
||||
load_smallweb_list()
|
||||
|
||||
database.init()
|
||||
search.init()
|
||||
|
||||
@@ -57,6 +73,21 @@ def api():
|
||||
skip = request.args.get('skip', 0)
|
||||
limit = request.args.get('limit', settings.FEED_LENGTH)
|
||||
stories = database.get_stories(limit, skip)
|
||||
|
||||
if request.args.get('smallweb') == 'true' and smallweb_set:
|
||||
filtered_stories = []
|
||||
for story_str in stories:
|
||||
story = json.loads(story_str)
|
||||
story_url = story.get('url') or story.get('link') or ''
|
||||
if not story_url:
|
||||
continue
|
||||
hostname = urlparse(story_url).hostname
|
||||
if hostname:
|
||||
hostname = hostname.replace('www.', '')
|
||||
if hostname in smallweb_set:
|
||||
filtered_stories.append(story_str)
|
||||
stories = filtered_stories
|
||||
|
||||
# hacky nested json
|
||||
res = Response('{"stories":[' + ','.join(stories) + ']}')
|
||||
res.headers['content-type'] = 'application/json'
|
||||
|
||||
Reference in New Issue
Block a user