Ignore praw.ini

refactor: Adapt Meilisearch integration to v1.29.0 API
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2026-06-13 16:35:38 -06:00 · 2026-06-13 11:58:08 -06:00 · 2026-06-13 11:58:08 -06:00 · 2026-01-06 20:08:43 +00:00 · 2026-01-06 20:08:40 +00:00 · 2026-01-06 20:03:59 +00:00
79 changed files with 7769 additions and 6057 deletions
@@ -0,0 +1 @@
+.aider*
@@ -1,3 +0,0 @@
-[submodule "readerserver/scraper/browser/scripts/bypass-paywalls-chrome"]
-	path = readerserver/scraper/browser/scripts/bypass-paywalls-chrome
-	url = https://github.com/iamadamdev/bypass-paywalls-chrome.git
@@ -1,6 +1,6 @@
 The MIT License (MIT)

-Copyright (c) 2019 Tanner Collin
+Copyright (c) 2019 Tanner (tanner.vc)

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -20,7 +20,7 @@ $ sudo apt install yarn
 Clone this repo:

 ```text
-$ git clone https://gogs.tannercollin.com/tanner/qotnews.git
+$ git clone https://git.tanner.vc/tanner/qotnews.git
 $ cd qotnews
 ```

@@ -109,4 +109,6 @@ settings.py
 data.db
 data.db.bak
 data/archive/*
+data/backup/*
 qotnews.sqlite
+praw.ini
@@ -1,11 +1,11 @@
-from datetime import datetime, timedelta
+import json
+
 from sqlalchemy import create_engine, Column, String, ForeignKey, Integer
 from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.orm import sessionmaker
 from sqlalchemy.exc import IntegrityError
-from sqlalchemy.types import JSON

-engine = create_engine('sqlite:///data/qotnews.sqlite')
+engine = create_engine('sqlite:///data/qotnews.sqlite', connect_args={'timeout': 360})
 Session = sessionmaker(bind=engine)

 Base = declarative_base()
@@ -15,8 +15,8 @@ class Story(Base):

    sid = Column(String(16), primary_key=True)
    ref = Column(String(16), unique=True)
-    meta = Column(JSON)
-    data = Column(JSON)
+    meta_json = Column(String)
+    full_json = Column(String)
    title = Column(String)

 class Reflist(Base):
@@ -24,7 +24,6 @@ class Reflist(Base):

    rid = Column(Integer, primary_key=True)
    ref = Column(String(16), unique=True)
-    urlref = Column(String)
    sid = Column(String, ForeignKey('stories.sid'), unique=True)
    source = Column(String(16))

@@ -37,21 +36,19 @@ def get_story(sid):

 def put_story(story):
    story = story.copy()
-    data = {}
-    data.update(story)
+    full_json = json.dumps(story)

-    meta = {}
-    meta.update(story)
-    meta.pop('text', None)
-    meta.pop('comments', None)
+    story.pop('text', None)
+    story.pop('comments', None)
+    meta_json = json.dumps(story)

    try:
        session = Session()
        s = Story(
            sid=story['id'],
            ref=story['ref'],
-            data=data,
-            meta=meta,
+            full_json=full_json,
+            meta_json=meta_json,
            title=story.get('title', None),
        )
        session.merge(s)
@@ -66,32 +63,25 @@ def get_story_by_ref(ref):
    session = Session()
    return session.query(Story).filter(Story.ref==ref).first()

-def get_stories_by_url(url):
+def get_reflist(amount):
    session = Session()
-    return session.query(Story).\
-            filter(Story.title != None).\
-            filter(Story.meta['url'].as_string() == url).\
-            order_by(Story.meta['date'].desc())
+    q = session.query(Reflist).order_by(Reflist.rid.desc()).limit(amount)
+    return [dict(ref=x.ref, sid=x.sid, source=x.source) for x in q.all()]

-def get_reflist():
+def get_stories(amount, skip=0):
    session = Session()
-    q = session.query(Reflist).order_by(Reflist.rid.desc())
-    return [dict(ref=x.ref, sid=x.sid, source=x.source, urlref=x.urlref) for x in q.all()]
-
-def get_stories(maxage=60*60*24*2):
-    time = datetime.now().timestamp() - maxage
-    session = Session()
-    q = session.query(Reflist, Story.meta).\
+    q = session.query(Reflist, Story.meta_json).\
+            order_by(Reflist.rid.desc()).\
            join(Story).\
            filter(Story.title != None).\
-            filter(Story.meta['date'].as_integer() > time).\
-            order_by(Story.meta['date'].desc())
+            offset(skip).\
+            limit(amount)
    return [x[1] for x in q]

-def put_ref(ref, sid, source, urlref):
+def put_ref(ref, sid, source):
    try:
        session = Session()
-        r = Reflist(ref=ref, sid=sid, source=source, urlref=urlref)
+        r = Reflist(ref=ref, sid=sid, source=source)
        session.add(r)
        session.commit()
    except:
@@ -111,7 +101,22 @@ def del_ref(ref):
    finally:
        session.close()

+def count_stories():
+    try:
+        session = Session()
+        return session.query(Story).count()
+    finally:
+        session.close()
+
+def get_story_list():
+    try:
+        session = Session()
+        return session.query(Story.sid).all()
+    finally:
+        session.close()
+
 if __name__ == '__main__':
    init()

-    print(get_story_by_ref('hgi3sy'))
+    #print(get_story_by_ref('hgi3sy'))
+    print(len(get_reflist(99999)))
@@ -6,120 +6,84 @@ logging.basicConfig(
 import requests
 import time
 from bs4 import BeautifulSoup
-import itertools

 import settings
-from feeds import hackernews, reddit, tildes, substack, manual
-from feeds.sitemap import Sitemap
-from feeds.category import Category
-from scrapers import outline, declutter, browser, local
+from feeds import hackernews, reddit, tildes, manual, lobsters
+import utils

-INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com']
-
-substacks = {}
-for key, value in settings.SUBSTACK.items():
-    substacks[key] = substack.Publication(value['url'])
-categories = {}
-for key, value in settings.CATEGORY.items():
-    categories[key] = Category(value)
-sitemaps = {}
-for key, value in settings.SITEMAP.items():
-    sitemaps[key] = Sitemap(value)
-
-def get_list():
-    feeds = {}
+INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com', 'sec.gov']
+TWO_DAYS = 60*60*24*2

+def list():
+    feed = []
    if settings.NUM_HACKERNEWS:
-        feeds['hackernews'] = [(x, 'hackernews', x) for x in hackernews.feed()[:settings.NUM_HACKERNEWS]]
+        feed += [(x, 'hackernews') for x in hackernews.feed()[:settings.NUM_HACKERNEWS]]
+
+    if settings.NUM_LOBSTERS:
+        feed += [(x, 'lobsters') for x in lobsters.feed()[:settings.NUM_LOBSTERS]]

    if settings.NUM_REDDIT:
-        feeds['reddit'] = [(x, 'reddit', x) for x in reddit.feed()[:settings.NUM_REDDIT]]
+        feed += [(x, 'reddit') for x in reddit.feed()[:settings.NUM_REDDIT]]

    if settings.NUM_TILDES:
-        feeds['tildes'] = [(x, 'tildes', x) for x in tildes.feed()[:settings.NUM_TILDES]]
+        feed += [(x, 'tildes') for x in tildes.feed()[:settings.NUM_TILDES]]

-    if settings.NUM_SUBSTACK:
-        feeds['substack'] = [(x, 'substack', x) for x in substack.top.feed()[:settings.NUM_SUBSTACK]]
-
-    for key, publication in substacks.items():
-        count = settings.SUBSTACK[key]['count']
-        feeds[key] = [(x, key, x) for x in publication.feed()[:count]]
-
-    for key, sites in categories.items():
-        count = settings.CATEGORY[key].get('count') or 0
-        excludes = settings.CATEGORY[key].get('excludes')
-        tz = settings.CATEGORY[key].get('tz')
-        feeds[key] = [(x, key, u) for x, u in sites.feed(excludes)[:count]]
-
-    for key, sites in sitemaps.items():
-        count = settings.SITEMAP[key].get('count') or 0
-        excludes = settings.SITEMAP[key].get('excludes')
-        feeds[key] = [(x, key, u) for x, u in sites.feed(excludes)[:count]]
-
-    values = feeds.values()
-    feed = itertools.chain.from_iterable(itertools.zip_longest(*values, fillvalue=None))
-    feed = list(filter(None, feed))
    return feed

 def get_article(url):
-    scrapers = {
-        'declutter': declutter,
-        'outline': outline,
-        'browser': browser,
-        'local': local,
-    }
-    available = settings.SCRAPERS or ['local']
-    if 'local' not in available:
-        available += ['local']
+    if not settings.READER_URL:
+        logging.info('Readerserver not configured, aborting.')
+        return ''

-    for scraper in available:
-        if scraper not in scrapers.keys():
-            continue
-        try:
-            html = scrapers[scraper].get_html(url)
-            if html:
-                return html
-        except KeyboardInterrupt:
-            raise
-        except:
-            pass
-    return ''
+    if url.startswith('https://twitter.com'):
+        logging.info('Replacing twitter.com url with nitter.net')
+        url = url.replace('twitter.com', 'nitter.net')
+
+    try:
+        r = requests.post(settings.READER_URL, data=dict(url=url), timeout=20)
+        if r.status_code != 200:
+            raise Exception('Bad response code ' + str(r.status_code))
+        return r.text
+    except KeyboardInterrupt:
+        raise
+    except BaseException as e:
+        logging.error('Problem getting article: {}'.format(str(e)))
+        return ''

 def get_content_type(url):
+    try:
+        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'}
+        return requests.get(url, headers=headers, timeout=5).headers['content-type']
+    except:
+        return ''
+
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
            'X-Forwarded-For': '66.249.66.1',
        }
-        return requests.get(url, headers=headers, timeout=5).headers['content-type']
+        return requests.get(url, headers=headers, timeout=10).headers['content-type']
    except:
        pass

-    try:
-        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'}
-        return requests.get(url, headers=headers, timeout=10).headers['content-type']
-    except:
-        return ''
-
-def update_story(story, is_manual=False, urlref=None):
+def update_story(story, is_manual=False):
    res = {}

-    if story['source'] == 'hackernews':
-        res = hackernews.story(story['ref'])
-    elif story['source'] == 'reddit':
-        res = reddit.story(story['ref'])
-    elif story['source'] == 'tildes':
-        res = tildes.story(story['ref'])
-    elif story['source'] == 'substack':
-        res = substack.top.story(story['ref'])
-    elif story['source'] in categories.keys():
-        res = categories[story['source']].story(story['ref'], urlref)
-    elif story['source'] in sitemaps.keys():
-        res = sitemaps[story['source']].story(story['ref'], urlref)
-    elif story['source'] in substacks.keys():
-        res = substacks[story['source']].story(story['ref'])
-    elif story['source'] == 'manual':
-        res = manual.story(story['ref'])
+    try:
+        if story['source'] == 'hackernews':
+            res = hackernews.story(story['ref'])
+        elif story['source'] == 'lobsters':
+            res = lobsters.story(story['ref'])
+        elif story['source'] == 'reddit':
+            res = reddit.story(story['ref'])
+        elif story['source'] == 'tildes':
+            res = tildes.story(story['ref'])
+        elif story['source'] == 'manual':
+            res = manual.story(story['ref'])
+    except BaseException as e:
+        utils.alert_tanner('Problem updating {} story, ref {}: {}'.format(story['source'], story['ref'], str(e)))
+        logging.exception(e)
+        return False

    if res:
        story.update(res) # join dicts
@@ -127,8 +91,8 @@ def update_story(story, is_manual=False, urlref=None):
        logging.info('Story not ready yet')
        return False

-    if story['date'] and not is_manual and story['date'] + settings.MAX_STORY_AGE < time.time():
-        logging.info('Story too old, removing')
+    if story['date'] and not is_manual and story['date'] + TWO_DAYS < time.time():
+        logging.info('Story too old, removing. Date: {}'.format(story['date']))
        return False

    if story.get('url', '') and not story.get('text', ''):
@@ -142,6 +106,12 @@ def update_story(story, is_manual=False, urlref=None):
            logging.info(story['url'])
            return False

+        if 'trump' in story['title'].lower() or 'musk' in story['title'].lower() or 'Removed by moderator' in story['title']:
+            logging.info('Trump / Musk / removed story, skipping')
+            logging.info(story['url'])
+            return False
+
+
        logging.info('Getting article ' + story['url'])
        story['text'] = get_article(story['url'])
        if not story['text']: return False
@@ -159,7 +129,7 @@ if __name__ == '__main__':

    #print(get_article('https://www.bloomberg.com/news/articles/2019-09-23/xi-s-communists-under-pressure-as-high-prices-hit-china-workers'))

-    a = get_article('https://blog.joinmastodon.org/2019/10/mastodon-3.0/')
+    a = get_content_type('https://tefkos.comminfo.rutgers.edu/Courses/e530/Readings/Beal%202008%20full%20text%20searching.pdf')
    print(a)

    print('done')
@@ -1,72 +0,0 @@
-import logging
-logging.basicConfig(
-        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-        level=logging.DEBUG)
-
-if __name__ == '__main__':
-    import sys
-    sys.path.insert(0,'.')
-
-from bs4 import BeautifulSoup
-
-import settings
-from utils import clean
-from misc.api import xml
-from misc.news import Base
-
-def _filter_links(links, category_url, excludes=None):
-    links = list(filter(None, [link if link.startswith(category_url) else None for link in links]))
-    links = list(filter(None, [link if link != category_url else None for link in links]))
-    links = list(set(links))
-    if excludes:
-        links = list(filter(None, [None if any(e in link for e in excludes) else link for link in links]))
-    return links
-
-def _get_category(category_url, excludes=None):
-    base_url = '/'.join(category_url.split('/')[:3])
-    markup = xml(lambda x: category_url)
-    if not markup: return []
-    soup = BeautifulSoup(markup, features='html.parser')
-    links = soup.find_all('a', href=True)
-    links = [link.get('href') for link in links]
-    links = [f"{base_url}{link}" if link.startswith('/') else link for link in links]
-    links = _filter_links(links, category_url, excludes)
-    return links
-
-class Category(Base):
-    def __init__(self, config):
-        self.config = config
-        self.category_url = config.get('url')
-        self.tz = config.get('tz')
-
-    def feed(self, excludes=None):
-        links = []
-        if isinstance(self.category_url, str):
-            links += _get_category(self.category_url, excludes)
-        elif isinstance(self.category_url, list):
-            for url in self.category_url:
-                links += _get_category(url, excludes)
-        links = list(set(links))
-        return [(self.get_id(link), link) for link in links]
-
-
-# scratchpad so I can quickly develop the parser
-if __name__ == '__main__':
-    print("Category: RadioNZ")
-    site = Category("https://www.rnz.co.nz/news/")
-    excludes = [
-        'rnz.co.nz/news/sport',
-        'rnz.co.nz/weather',
-        'rnz.co.nz/news/weather',
-    ]
-    posts = site.feed(excludes)
-    print(posts[:5])
-    print(site.story(posts[0]))
-
-    print("Category: Newsroom")
-    site = Category("https://www.newsroom.co.nz/news/", tz='Pacific/Auckland')
-    posts = site.feed()
-    print(posts[:5])
-    print(site.story(posts[0]))
-
-
@@ -12,7 +12,8 @@ import requests
 from utils import clean

 API_TOPSTORIES = lambda x: 'https://hacker-news.firebaseio.com/v0/topstories.json'
-API_ITEM = lambda x : 'https://hn.algolia.com/api/v1/items/{}'.format(x)
+ALG_API_ITEM = lambda x : 'https://hn.algolia.com/api/v1/items/{}'.format(x)
+BHN_API_ITEM = lambda x : 'https://api.hnpwa.com/v0/item/{}.json'.format(x)

 SITE_LINK = lambda x : 'https://news.ycombinator.com/item?id={}'.format(x)
 SITE_AUTHOR_LINK = lambda x : 'https://news.ycombinator.com/user?id={}'.format(x)
@@ -42,7 +43,7 @@ def api(route, ref=None):
 def feed():
    return [str(x) for x in api(API_TOPSTORIES) or []]

-def comment(i):
+def alg_comment(i):
    if 'author' not in i:
        return False

@@ -51,21 +52,25 @@ def comment(i):
    c['score'] = i.get('points', 0)
    c['date'] = i.get('created_at_i', 0)
    c['text'] = clean(i.get('text', '') or '')
-    c['comments'] = [comment(j) for j in i['children']]
+    c['comments'] = [alg_comment(j) for j in i['children']]
    c['comments'] = list(filter(bool, c['comments']))
    return c

-def comment_count(i):
+def alg_comment_count(i):
    alive = 1 if i['author'] else 0
-    return sum([comment_count(c) for c in i['comments']]) + alive
+    return sum([alg_comment_count(c) for c in i['comments']]) + alive

-def story(ref):
-    r = api(API_ITEM, ref)
-    if not r: return False
+def alg_story(ref):
+    r = api(ALG_API_ITEM, ref)
+    if not r:
+        logging.info('Bad Algolia Hackernews API response.')
+        return None

    if 'deleted' in r:
+        logging.info('Story was deleted.')
        return False
    elif r.get('type', '') != 'story':
+        logging.info('Type "{}" is not "story".'.format(r.get('type', '')))
        return False

    s = {}
@@ -76,17 +81,88 @@ def story(ref):
    s['title'] = r.get('title', '')
    s['link'] = SITE_LINK(ref)
    s['url'] = r.get('url', '')
-    s['comments'] = [comment(i) for i in r['children']]
+    s['comments'] = [alg_comment(i) for i in r['children']]
    s['comments'] = list(filter(bool, s['comments']))
-    s['num_comments'] = comment_count(s) - 1
+    s['num_comments'] = alg_comment_count(s) - 1

    if 'text' in r and r['text']:
        s['text'] = clean(r['text'] or '')

    return s

+def bhn_comment(i):
+    if 'user' not in i:
+        return False
+
+    c = {}
+    c['author'] = i.get('user', '')
+    c['score'] = 0   # Not present?
+    c['date'] = i.get('time', 0)
+    c['text'] = clean(i.get('content', '') or '')
+    c['comments'] = [bhn_comment(j) for j in i['comments']]
+    c['comments'] = list(filter(bool, c['comments']))
+    return c
+
+def bhn_story(ref):
+    r = api(BHN_API_ITEM, ref)
+    if not r:
+        logging.info('Bad BetterHN Hackernews API response.')
+        return None
+
+    if 'deleted' in r:   # TODO: verify
+        logging.info('Story was deleted.')
+        return False
+    elif r.get('dead', False):
+        logging.info('Story was deleted.')
+        return False
+    elif r.get('type', '') != 'link':
+        logging.info('Type "{}" is not "link".'.format(r.get('type', '')))
+        return False
+
+    s = {}
+    s['author'] = r.get('user', '')
+    s['author_link'] = SITE_AUTHOR_LINK(r.get('user', ''))
+    s['score'] = r.get('points', 0)
+    s['date'] = r.get('time', 0)
+    s['title'] = r.get('title', '')
+    s['link'] = SITE_LINK(ref)
+    s['url'] = r.get('url', '')
+    if s['url'].startswith('item'):
+        s['url'] = SITE_LINK(ref)
+    s['comments'] = [bhn_comment(i) for i in r['comments']]
+    s['comments'] = list(filter(bool, s['comments']))
+    s['num_comments'] = r.get('comments_count', 0)
+
+    if 'content' in r and r['content']:
+        s['text'] = clean(r['content'] or '')
+
+    return s
+
+def story(ref):
+    s = alg_story(ref)
+    if s is None:
+        s = bhn_story(ref)
+    if not s:
+        return False
+
+
+    if not s['title']:
+        return False
+
+    if s['score'] < 25 and s['num_comments'] < 10:
+        logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
+        return False
+
+    return s
+
 # scratchpad so I can quickly develop the parser
 if __name__ == '__main__':
    print(feed())
    #print(story(20763961))
    #print(story(20802050))
+
+    #print(story(42899834))   # type "job"
+    #print(story(42900076))   # Ask HN
+    #print(story(42898201))   # Show HN
+    #print(story(42899703))   # normal
+    print(story(42902678))   # bad title?
@@ -0,0 +1,120 @@
+import logging
+logging.basicConfig(
+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+        level=logging.DEBUG)
+
+if __name__ == '__main__':
+    import sys
+    sys.path.insert(0,'.')
+
+import requests
+from datetime import datetime
+
+from utils import clean
+
+API_HOTTEST = lambda x: 'https://lobste.rs/hottest.json'
+API_ITEM = lambda x : 'https://lobste.rs/s/{}.json'.format(x)
+
+SITE_LINK = lambda x : 'https://lobste.rs/s/{}'.format(x)
+SITE_AUTHOR_LINK = lambda x : 'https://lobste.rs/u/{}'.format(x)
+
+def api(route, ref=None):
+    try:
+        r = requests.get(route(ref), timeout=5)
+        if r.status_code != 200:
+            raise Exception('Bad response code ' + str(r.status_code))
+        return r.json()
+    except KeyboardInterrupt:
+        raise
+    except BaseException as e:
+        logging.error('Problem hitting lobsters API: {}, trying again'.format(str(e)))
+
+    try:
+        r = requests.get(route(ref), timeout=15)
+        if r.status_code != 200:
+            raise Exception('Bad response code ' + str(r.status_code))
+        return r.json()
+    except KeyboardInterrupt:
+        raise
+    except BaseException as e:
+        logging.error('Problem hitting lobsters API: {}'.format(str(e)))
+        return False
+
+def feed():
+    return [x['short_id'] for x in api(API_HOTTEST) or []]
+
+def unix(date_str):
+    date_str = date_str.replace(':', '')
+    return int(datetime.strptime(date_str, '%Y-%m-%dT%H%M%S.%f%z').timestamp())
+
+def make_comment(i):
+    c = {}
+    try:
+        c['author'] = i['commenting_user']
+    except KeyError:
+        c['author'] = ''
+    c['score'] = i.get('score', 0)
+    try:
+        c['date'] = unix(i['created_at'])
+    except KeyError:
+        c['date'] = 0
+    c['text'] = clean(i.get('comment', '') or '')
+    c['comments'] = []
+    return c
+
+def iter_comments(flat_comments):
+    nested_comments = []
+    parent_stack = []
+    for comment in flat_comments:
+        c = make_comment(comment)
+        indent = comment['depth']
+
+        if indent == 0:
+            nested_comments.append(c)
+            parent_stack = [c]
+        else:
+            parent_stack = parent_stack[:indent]
+            p = parent_stack[-1]
+            p['comments'].append(c)
+            parent_stack.append(c)
+    return nested_comments
+
+def story(ref):
+    r = api(API_ITEM, ref)
+    if not r:
+        logging.info('Bad Lobsters API response.')
+        return False
+
+    s = {}
+    try:
+        s['author'] = r['submitter_user']
+        s['author_link'] = SITE_AUTHOR_LINK(s['author'])
+    except KeyError:
+        s['author'] = ''
+        s['author_link'] = ''
+    s['score'] = r.get('score', 0)
+    try:
+        s['date'] = unix(r['created_at'])
+    except KeyError:
+        s['date'] = 0
+    s['title'] = r.get('title', '')
+    s['link'] = SITE_LINK(ref)
+    s['url'] = r.get('url', '')
+    s['comments'] = iter_comments(r['comments'])
+    s['num_comments'] = r['comment_count']
+
+    if s['score'] < 15 and s['num_comments'] < 10:
+        logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
+        return False
+
+    if 'description' in r and r['description']:
+        s['text'] = clean(r['description'] or '')
+
+    return s
+
+# scratchpad so I can quickly develop the parser
+if __name__ == '__main__':
+    #print(feed())
+    import json
+    print(json.dumps(story('fzvd1v'), indent=4))
+    #print(json.dumps(story('ixyv5u'), indent=4))
@@ -7,8 +7,6 @@ import requests
 import time
 from bs4 import BeautifulSoup

-import settings
-
 USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'

 def api(route):
@@ -29,13 +27,15 @@ def api(route):

 def story(ref):
    html = api(ref)
-    if not html: return False
+    if not html:
+        logging.info('Bad http GET response.')
+        return False

    soup = BeautifulSoup(html, features='html.parser')

    s = {}
    s['author'] = 'manual submission'
-    s['author_link'] = 'https://{}'.format(settings.HOSTNAME)
+    s['author_link'] = 'https://news.t0.vc'
    s['score'] = 0
    s['date'] = int(time.time())
    s['title'] = str(soup.title.string) if soup.title else ref
@@ -32,11 +32,8 @@ def feed():
        return [x.id for x in reddit.subreddit(subs).hot()]
    except KeyboardInterrupt:
        raise
-    except PRAWException as e:
-        logging.error('Problem hitting reddit API: {}'.format(str(e)))
-        return []
-    except PrawcoreException as e:
-        logging.error('Problem hitting reddit API: {}'.format(str(e)))
+    except BaseException as e:
+        logging.critical('Problem hitting reddit API: {}'.format(str(e)))
        return []

 def comment(i):
@@ -59,7 +56,9 @@ def comment(i):
 def story(ref):
    try:
        r = reddit.submission(ref)
-        if not r: return False
+        if not r:
+            logging.info('Bad Reddit API response.')
+            return False

        s = {}
        s['author'] = r.author.name if r.author else '[Deleted]'
@@ -73,7 +72,8 @@ def story(ref):
        s['comments'] = list(filter(bool, s['comments']))
        s['num_comments'] = r.num_comments

-        if s['score'] < settings.REDDIT_SCORE_THRESHOLD and s['num_comments'] < settings.REDDIT_COMMENT_THRESHOLD:
+        if s['score'] < 25 and s['num_comments'] < 10:
+            logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
            return False

        if r.selftext:
@@ -84,10 +84,10 @@ def story(ref):
    except KeyboardInterrupt:
        raise
    except PRAWException as e:
-        logging.error('Problem hitting reddit API: {}'.format(str(e)))
+        logging.critical('Problem hitting reddit API: {}'.format(str(e)))
        return False
    except PrawcoreException as e:
-        logging.error('Problem hitting reddit API: {}'.format(str(e)))
+        logging.critical('Problem hitting reddit API: {}'.format(str(e)))
        return False

 # scratchpad so I can quickly develop the parser
@@ -1,99 +0,0 @@
-import logging
-logging.basicConfig(
-        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-        level=logging.DEBUG)
-
-if __name__ == '__main__':
-    import sys
-    sys.path.insert(0,'.')
-
-from datetime import datetime
-from bs4 import BeautifulSoup
-
-import settings
-from utils import clean
-from misc.time import unix
-from misc.api import xml
-from misc.news import Base
-
-def _get_sitemap_date(a):
-    if a.find('lastmod'):
-        return a.find('lastmod').text
-    if a.find('news:publication_date'):
-        return a.find('news:publication_date').text
-    if a.find('ns2:publication_date'):
-        return a.find('ns2:publication_date').text
-    return ''
-
-def _filter_links(links, excludes=None):
-    too_old = datetime.now().timestamp() - settings.MAX_STORY_AGE
-    links = list(filter(None, [a if _get_sitemap_date(a) else None for a in links]))
-    links = list(filter(None, [a if unix(_get_sitemap_date(a)) > too_old else None for a in links]))
-    links.sort(key=lambda a: unix(_get_sitemap_date(a)), reverse=True)
-
-    links = [x.find('loc').text for x in links] or []
-    links = list(set(links))
-    if excludes:
-        links = list(filter(None, [None if any(e in link for e in excludes) else link for link in links]))
-    return links
-
-def _get_sitemap(feed_url, excludes=None):
-    markup = xml(lambda x: feed_url)
-    if not markup: return []
-    soup = BeautifulSoup(markup, features='lxml')
-    links = []
-    feed_urls = []
-    if soup.find('sitemapindex'):
-        sitemap = soup.find('sitemapindex').findAll('sitemap')
-        feed_urls = list(filter(None, [a if a.find('loc') else None for a in sitemap]))
-    if soup.find('urlset'):
-        sitemap = soup.find('urlset').findAll('url')
-        links = list(filter(None, [a if a.find('loc') else None for a in sitemap]))
-
-    feed_urls = _filter_links(feed_urls, excludes)
-    links = _filter_links(links, excludes)
-
-    for url in feed_urls:
-        links += _get_sitemap(url, excludes)
-    return list(set(links))
-
-class Sitemap(Base):
-    def __init__(self, config):
-        self.config = config
-        self.sitemap_url = config.get('url')
-        self.tz = config.get('tz')
-
-    def feed(self, excludes=None):
-        links = []
-        if isinstance(self.sitemap_url, str):
-            links += _get_sitemap(self.sitemap_url, excludes)
-        elif isinstance(self.sitemap_url, list):
-            for url in self.sitemap_url:
-                links += _get_sitemap(url, excludes)
-        links = list(set(links))
-        return [(self.get_id(link), link) for link in links]
-
-# scratchpad so I can quickly develop the parser
-if __name__ == '__main__':
-    print("Sitemap: The Spinoff")
-    site = Sitemap("https://thespinoff.co.nz/sitemap.xml")
-    excludes = [
-        'thespinoff.co.nz/sitemap-misc.xml',
-        'thespinoff.co.nz/sitemap-authors.xml',
-        'thespinoff.co.nz/sitemap-tax-category.xml',
-    ]
-    posts = site.feed(excludes)
-    print(posts[:5])
-    print(site.story(posts[0]))
-
-    print("Sitemap: Newshub")
-    site = Sitemap([
-        'https://www.newshub.co.nz/home/politics.gnewssitemap.xml',
-        'https://www.newshub.co.nz/home/new-zealand.gnewssitemap.xml',
-        'https://www.newshub.co.nz/home/world.gnewssitemap.xml',
-        'https://www.newshub.co.nz/home/money.gnewssitemap.xml',
-    ])
-    posts = site.feed()
-    print(posts[:5])
-    print(site.story(posts[0]))
-    print(site.story(posts[:-1]))
@@ -1,165 +0,0 @@
-import logging
-logging.basicConfig(
-        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-        level=logging.DEBUG)
-
-if __name__ == '__main__':
-    import sys
-    sys.path.insert(0,'.')
-
-import requests
-from datetime import datetime
-
-from utils import clean
-
-SUBSTACK_REFERER = 'https://substack.com'
-SUBSTACK_API_TOP_POSTS = lambda x: "https://substack.com/api/v1/reader/top-posts"
-
-def author_link(author_id, base_url):
-    return f"{base_url}/people/{author_id}"
-def api_comments(post_id, base_url):
-    return f"{base_url}/api/v1/post/{post_id}/comments?all_comments=true&sort=best_first"
-def api_stories(x, base_url): 
-    return f"{base_url}/api/v1/archive?sort=new&search=&offset=0&limit=100"
-
-def unix(date_str):
-    return int(datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S.%fZ').timestamp())
-
-def api(route, ref=None, referer=None):
-    headers = {'Referer': referer} if referer else None
-    try:
-        r = requests.get(route(ref), headers=headers, timeout=10)
-        if r.status_code != 200:
-            raise Exception('Bad response code ' + str(r.status_code))
-        return r.json()
-    except KeyboardInterrupt:
-        raise
-    except BaseException as e:
-        logging.error('Problem hitting Substack API: {}, trying again'.format(str(e)))
-
-    try:
-        r = requests.get(route(ref), headers=headers, timeout=20)
-        if r.status_code != 200:
-            raise Exception('Bad response code ' + str(r.status_code))
-        return r.json()
-    except KeyboardInterrupt:
-        raise
-    except BaseException as e:
-        logging.error('Problem hitting Substack API: {}'.format(str(e)))
-        return False
-
-def comment(i):
-    if 'body' not in i:
-        return False
-
-    c = {}
-    c['date'] = unix(i.get('date'))
-    c['author'] = i.get('name', '')
-    c['score'] = i.get('reactions').get('❤')
-    c['text'] = clean(i.get('body', '') or '')
-    c['comments'] = [comment(j) for j in i['children']]
-    c['comments'] = list(filter(bool, c['comments']))
-
-    return c
-
-class Publication:
-    def __init__(self, domain):
-        self.BASE_DOMAIN = domain
-
-    def feed(self):
-        stories = api(lambda x: api_stories(x, self.BASE_DOMAIN), referer=self.BASE_DOMAIN)
-        if not stories: return []
-        stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
-        return [str(i.get("id")) for i in stories or []]
-
-    def story(self, ref):
-        stories = api(lambda x: api_stories(x, self.BASE_DOMAIN), referer=self.BASE_DOMAIN)
-        if not stories: return False
-        stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
-        stories = list(filter(None, [i if str(i.get('id')) == ref else None for i in stories]))
-
-        if len(stories) == 0:
-            return False
-
-        r = stories[0]
-        if not r:
-            return False
-
-        s = {}
-        s['author'] = ''
-        s['author_link'] = ''
-
-        s['date'] = unix(r.get('post_date'))
-        s['score'] = r.get('reactions').get('❤')
-        s['title'] = r.get('title', '')
-        s['link'] = r.get('canonical_url', '')
-        s['url'] = r.get('canonical_url', '')
-        comments = api(lambda x: api_comments(x, self.BASE_DOMAIN), r.get('id'), referer=self.BASE_DOMAIN)
-        s['comments'] = [comment(i) for i in comments.get('comments')]
-        s['comments'] = list(filter(bool, s['comments']))
-        s['num_comments'] = r.get('comment_count', 0)
-
-        authors = list(filter(None, [self._bylines(byline) for byline in r.get('publishedBylines')]))
-        if len(authors):
-            s['author'] = authors[0].get('name')
-            s['author_link'] = authors[0].get('link')
-
-        return s
-
-    def _bylines(self, b):
-        if 'id' not in b:
-            return None
-        a = {}
-        a['name'] = b.get('name')
-        a['link'] = author_link(b.get('id'), self.BASE_DOMAIN)
-        return a
-
-
-class Top:
-    def feed(self):
-        stories = api(SUBSTACK_API_TOP_POSTS, referer=SUBSTACK_REFERER)
-        if not stories: return []
-        stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
-        return [str(i.get("id")) for i in stories or []]
-
-    def story(self, ref):
-        stories = api(SUBSTACK_API_TOP_POSTS, referer=SUBSTACK_REFERER)
-        if not stories: return False
-        stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
-        stories = list(filter(None, [i if str(i.get('id')) == ref else None for i in stories]))
-
-        if len(stories) == 0:
-            return False
-
-        r = stories[0]
-        if not r:
-            return False
-
-        s = {}
-        pub = r.get('pub')
-        base_url = pub.get('base_url')
-        s['author'] = pub.get('author_name')
-        s['author_link'] = author_link(pub.get('author_id'), base_url)
-
-        s['date'] = unix(r.get('post_date'))
-        s['score'] = r.get('score')
-        s['title'] = r.get('title', '')
-        s['link'] = r.get('canonical_url', '')
-        s['url'] = r.get('canonical_url', '')
-        comments = api(lambda x: api_comments(x, base_url), r.get('id'), referer=SUBSTACK_REFERER)
-        s['comments'] = [comment(i) for i in comments.get('comments')]
-        s['comments'] = list(filter(bool, s['comments']))
-        s['num_comments'] = r.get('comment_count', 0)
-
-        return s
-
-top = Top()        
-
-# scratchpad so I can quickly develop the parser
-if __name__ == '__main__':
-    top_posts = top.feed()
-    print(top.story(top_posts[0]))
-
-    webworm = Publication("https://www.webworm.co/")
-    posts = webworm.feed()
-    print(webworm.story(posts[0]))
@@ -16,7 +16,7 @@ from utils import clean
 # cache the topic groups to prevent redirects
 group_lookup = {}

-USER_AGENT = 'qotnews scraper (github:tannercollin)'
+USER_AGENT = 'qotnews scraper (github:tanner37)'

 API_TOPSTORIES = lambda : 'https://tildes.net'
 API_ITEM = lambda x : 'https://tildes.net/shortener/{}'.format(x)
@@ -34,7 +34,7 @@ def api(route):
    except KeyboardInterrupt:
        raise
    except BaseException as e:
-        logging.error('Problem hitting tildes website: {}'.format(str(e)))
+        logging.critical('Problem hitting tildes website: {}'.format(str(e)))
        return False

 def feed():
@@ -71,11 +71,15 @@ def story(ref):
        html = api(SITE_LINK(group_lookup[ref], ref))
    else:
        html = api(API_ITEM(ref))
-    if not html: return False
+    if not html:
+        logging.info('Bad Tildes API response.')
+        return False

    soup = BeautifulSoup(html, features='html.parser')
    a = soup.find('article', class_='topic-full')
-    if a is None: return False
+    if a is None:
+        logging.info('Tildes <article> element not found.')
+        return False

    h = a.find('header')
    lu = h.find('a', class_='link-user')
@@ -83,6 +87,7 @@ def story(ref):
    error = a.find('div', class_='text-error')
    if error:
        if 'deleted' in error.string or 'removed' in error.string:
+            logging.info('Article was deleted or removed.')
            return False

    s = {}
@@ -102,7 +107,21 @@ def story(ref):
    ch = a.find('header', class_='topic-comments-header')
    s['num_comments'] = int(ch.h2.string.split(' ')[0]) if ch else 0

-    if s['score'] < 8 and s['num_comments'] < 6:
+    if s['group'].split('.')[0] not in [
+        '~arts',
+        '~comp',
+        '~creative',
+        '~design',
+        '~engineering',
+        '~finance',
+        '~science',
+        '~tech',
+    ]:
+        logging.info('Group ({}) not in whitelist.'.format(s['group']))
+        return False
+
+    if s['score'] < 15 and s['num_comments'] < 10:
+        logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
        return False

    td = a.find('div', class_='topic-full-text')
@@ -113,7 +132,7 @@ def story(ref):

 # scratchpad so I can quickly develop the parser
 if __name__ == '__main__':
-    #print(feed())
+    print(feed())
    #normal = story('gxt')
    #print(normal)
    #no_comments = story('gxr')
@@ -122,8 +141,8 @@ if __name__ == '__main__':
    #print(self_post)
    #li_comment = story('gqx')
    #print(li_comment)
-    broken = story('q4y')
-    print(broken)
+    #broken = story('q4y')
+    #print(broken)

    # make sure there's no self-reference
    #import copy
@@ -1,35 +0,0 @@
-import logging
-logging.basicConfig(
-        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-        level=logging.DEBUG)
-
-import requests
-
-USER_AGENT = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
-FORWARD_IP = '66.249.66.1'
-
-def xml(route, ref=None):
-    try:
-        headers = {'User-Agent': USER_AGENT, 'X-Forwarded-For': FORWARD_IP}
-        r = requests.get(route(ref), headers=headers, timeout=5)
-        if r.status_code != 200:
-            raise Exception('Bad response code ' + str(r.status_code))
-        return r.text
-    except KeyboardInterrupt:
-        raise
-    except BaseException as e:
-        logging.error('Problem hitting URL: {}'.format(str(e)))
-        return False
-
-def json(route, ref=None):
-    try:
-        headers = {'User-Agent': USER_AGENT, 'X-Forwarded-For': FORWARD_IP}
-        r = requests.get(route(ref), headers=headers, timeout=5)
-        if r.status_code != 200:
-            raise Exception('Bad response code ' + str(r.status_code))
-        return r.json()
-    except KeyboardInterrupt:
-        raise
-    except BaseException as e:
-        logging.error('Problem hitting URL: {}'.format(str(e)))
-        return False
@@ -1,69 +0,0 @@
-
-def parse_extruct(s, data):
-    rdfa_keys = {
-        'title': [
-            'http://ogp.me/ns#title',
-            'https://ogp.me/ns#title',
-        ],
-        'date': [
-            'http://ogp.me/ns/article#modified_time',
-            'https://ogp.me/ns/article#modified_time',
-            'http://ogp.me/ns/article#published_time',
-            'https://ogp.me/ns/article#published_time',
-        ]
-    }
-    for rdfa in data['rdfa']:
-        for key, props in rdfa.items():
-            for attribute, properties in rdfa_keys.items():
-                for prop in properties:
-                    if prop in props:
-                        for values in props[prop]:
-                            s[attribute] = values['@value']
-
-    for og in data['opengraph']:
-        titles = list(filter(None, [value if 'og:title' in key else None for key, value in og['properties']]))
-        modified = list(filter(None, [value if 'article:modified_time' in key else None for key, value in og['properties']]))
-        published = list(filter(None, [value if 'article:published_time' in key else None for key, value in og['properties']]))
-        if len(modified):
-            s['date'] = modified[0]
-        if len(published):
-            s['date'] = published[0]
-        if len(titles):
-            s['title'] = titles[0]
-
-    for md in data['microdata']:
-        if md['type'] in ['https://schema.org/NewsArticle', 'http://schema.org/NewsArticle']:
-            props = md['properties']
-            s['title'] = props['headline']
-            if props['dateModified']:
-                s['date'] = props['dateModified']
-            if props['datePublished']:
-                s['date'] = props['datePublished']
-            if 'author' in props and props['author']:
-                if 'properties' in props['author']:
-                    s['author'] = props['author']['properties']['name']
-                elif isinstance(props['author'], list):
-                    s['author'] = props['author'][0]['properties']['name']
-
-    for ld in data['json-ld']:
-        if '@type' in ld and ld['@type'] in ['Article', 'NewsArticle']:
-            s['title'] = ld['headline']
-            if ld['dateModified']:
-                s['date'] = ld['dateModified']
-            if ld['datePublished']:
-                s['date'] = ld['datePublished']
-            if 'author' in ld and ld['author']:
-                if 'name' in ld['author']:
-                    s['author'] = ld['author']['name']
-                elif isinstance(ld['author'], list):
-                    s['author'] = ld['author'][0]['name']
-        if '@graph' in ld:
-            for gld in ld['@graph']:
-                if '@type' in gld and gld['@type'] in ['Article', 'NewsArticle']:
-                    s['title'] = gld['headline']
-                    if gld['dateModified']:
-                        s['date'] = gld['dateModified']
-                    if gld['datePublished']:
-                        s['date'] = gld['datePublished']
-
-    return s
@@ -1,101 +0,0 @@
-import logging
-logging.basicConfig(
-        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-        level=logging.DEBUG)
-
-import re
-import requests
-from bs4 import BeautifulSoup
-from scrapers import declutter
-import extruct
-
-import settings
-from utils import clean
-from misc.metadata import parse_extruct
-from misc.time import unix
-from misc.api import xml
-
-def comment(i):
-    if 'author' not in i:
-        return False
-
-    c = {}
-    c['author'] = i.get('author', '')
-    c['score'] = i.get('points', 0)
-    c['date'] = unix(i.get('date', 0))
-    c['text'] = clean(i.get('text', '') or '')
-    c['comments'] = [comment(j) for j in i['children']]
-    c['comments'] = list(filter(bool, c['comments']))
-    return c
-
-def comment_count(i):
-    alive = 1 if i['author'] else 0
-    return sum([comment_count(c) for c in i['comments']]) + alive
-
-class Base:
-    def __init__(config):
-        self.config = config
-        self.url = config.get('url')
-        self.tz = config.get('tz')
-
-    def get_id(self, link):
-        patterns = self.config.get('patterns')
-        if not patterns:
-            return link
-        patterns = [re.compile(p) for p in patterns]
-        patterns = list(filter(None, [p.match(link) for p in patterns]))
-        patterns = list(set([':'.join(p.groups()) for p in patterns]))
-        if not patterns:
-            return link
-        return patterns[0]
-
-    def feed(self, excludes=None):
-        return []
-
-    def story(self, ref, urlref):
-        if urlref is None:
-            return False
-        markup = xml(lambda x: urlref)
-        if not markup:
-            return False
-
-        s = {}
-        s['author_link'] = ''
-        s['score'] = 0
-        s['comments'] = []
-        s['num_comments'] = 0
-        s['link'] = urlref
-        s['url'] = urlref
-        s['date'] = 0
-
-        soup = BeautifulSoup(markup, features='html.parser')
-        icon32 = soup.find_all('link', rel="icon", href=True, sizes="32x32")
-        icon16 = soup.find_all('link', rel="icon", href=True, sizes="16x16")
-        favicon = soup.find_all('link', rel="shortcut icon", href=True)
-        others = soup.find_all('link', rel="icon", href=True)
-        icons = icon32 + icon16 + favicon + others
-        base_url = '/'.join(urlref.split('/')[:3])
-        icons = list(set([i.get('href') for i in icons]))
-        icons = [i if i.startswith('http') else base_url + i for i in icons]
-
-        if icons:
-            s['icon'] = icons[0]
-
-        data = extruct.extract(markup)
-        s = parse_extruct(s, data)
-        if s['date']:
-            s['date'] = unix(s['date'], tz=self.tz)
-
-        if 'disqus' in markup:
-            try:
-                s['comments'] = declutter.get_comments(urlref)
-                c['comments'] = list(filter(bool, c['comments']))
-                s['num_comments'] = comment_count(s['comments'])
-            except KeyboardInterrupt:
-                raise
-            except:
-                pass
-
-        if not s['date']:
-            return False
-        return s
@@ -1,18 +0,0 @@
-import pytz
-import dateutil.parser
-
-
-TZINFOS = {
-    'NZDT': pytz.timezone('Pacific/Auckland'),
-    'NZST': pytz.timezone('Pacific/Auckland')
-}
-
-def unix(date_str, tz=None, tzinfos=TZINFOS):
-    try:
-        dt = dateutil.parser.parse(date_str, tzinfos=tzinfos)
-        if tz:
-            dt = pytz.timezone(tz).localize(dt)
-        return int(dt.timestamp())
-    except:
-        pass
-    return 0
@@ -4,21 +4,19 @@ certifi==2020.6.20
 chardet==3.0.4
 click==7.1.2
 commonmark==0.9.1
-extruct==0.10.0
 Flask==1.1.2
 Flask-Cors==3.0.8
 gevent==20.6.2
 greenlet==0.4.16
+humanize==4.10.0
 idna==2.10
 itsdangerous==1.1.0
 Jinja2==2.11.2
-lxml==4.6.1
 MarkupSafe==1.1.1
 packaging==20.4
 praw==6.4.0
 prawcore==1.4.0
 pyparsing==2.4.7
-pytz==2020.4
 requests==2.24.0
 six==1.15.0
 soupsieve==2.0.1
@@ -30,4 +28,3 @@ websocket-client==0.57.0
 Werkzeug==1.0.1
 zope.event==4.4
 zope.interface==5.1.0
-python-dateutil==2.8.1
@@ -1,41 +0,0 @@
-import logging
-logging.basicConfig(
-        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-        level=logging.DEBUG)
-import requests
-
-READ_API = 'http://127.0.0.1:33843/browser/details'
-READ_COMMENT__API = 'http://127.0.0.1:33843/browser/commentd'
-TIMEOUT = 60
-
-
-def get_html(url):
-    logging.info(f"Reader Scraper: {url}")
-    details = get_details(url)
-    if not details:
-        return ''
-    return details['content']
-
-def get_details(url):
-    try:
-        r = requests.post(READ_API, data=dict(url=url), timeout=TIMEOUT)
-        if r.status_code != 200:
-            raise Exception('Bad response code ' + str(r.status_code))
-        return r.json()
-    except KeyboardInterrupt:
-        raise
-    except BaseException as e:
-        logging.error('Problem Scraping article: {}'.format(str(e)))
-        return None
-
-def get_comments(url):
-    try:
-        r = requests.post(READ_COMMENT_API, data=dict(url=url), timeout=TIMEOUT)
-        if r.status_code != 200:
-            raise Exception('Bad response code ' + str(r.status_code))
-        return r.json()
-    except KeyboardInterrupt:
-        raise
-    except BaseException as e:
-        logging.error('Problem getting comments for article: {}'.format(str(e)))
-        return None
@@ -1,41 +0,0 @@
-import logging
-logging.basicConfig(
-        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-        level=logging.DEBUG)
-import requests
-
-DECLUTTER_API = 'https://declutter.1j.nz/details'
-DECLUTTER_COMMENT_API = 'https://declutter.1j.nz/comments'
-TIMEOUT = 30
-
-
-def get_html(url):
-    logging.info(f"Declutter Scraper: {url}")
-    details = get_details(url)
-    if not details:
-        return ''
-    return details['content']
-
-def get_details(url):
-    try:
-        r = requests.post(DECLUTTER_API, data=dict(url=url), timeout=TIMEOUT)
-        if r.status_code != 200:
-            raise Exception('Bad response code ' + str(r.status_code))
-        return r.json()
-    except KeyboardInterrupt:
-        raise
-    except BaseException as e:
-        logging.error('Problem decluttering article: {}'.format(str(e)))
-        return None
-
-def get_comments(url):
-    try:
-        r = requests.post(DECLUTTER_COMMENT_API, data=dict(url=url), timeout=TIMEOUT)
-        if r.status_code != 200:
-            raise Exception('Bad response code ' + str(r.status_code))
-        return r.json()
-    except KeyboardInterrupt:
-        raise
-    except BaseException as e:
-        logging.error('Problem getting comments for article: {}'.format(str(e)))
-        return None
@@ -1,27 +0,0 @@
-import logging
-logging.basicConfig(
-        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-        level=logging.DEBUG)
-import requests
-
-READ_API = 'http://127.0.0.1:33843/details'
-TIMEOUT = 20
-
-def get_html(url):
-    logging.info(f"Local Scraper: {url}")
-    details = get_details(url)
-    if not details:
-        return ''
-    return details['content']
-
-def get_details(url):
-    try:
-        r = requests.post(READ_API, data=dict(url=url), timeout=TIMEOUT)
-        if r.status_code != 200:
-            raise Exception('Bad response code ' + str(r.status_code))
-        return r.json()
-    except KeyboardInterrupt:
-        raise
-    except BaseException as e:
-        logging.error('Problem getting article: {}'.format(str(e)))
-        return None
@@ -1,37 +0,0 @@
-import logging
-logging.basicConfig(
-        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-        level=logging.DEBUG)
-import requests
-
-OUTLINE_REFERER = 'https://outline.com/'
-OUTLINE_API = 'https://api.outline.com/v3/parse_article'
-TIMEOUT = 20
-
-def get_html(url):
-    details = get_details(url)
-    if not details:
-        return ''
-    return details['html']
-
-def get_details(url):
-    try:
-        logging.info(f"Outline Scraper: {url}")
-        params = {'source_url': url}
-        headers = {'Referer': OUTLINE_REFERER}
-        r = requests.get(OUTLINE_API, params=params, headers=headers, timeout=TIMEOUT)
-        if r.status_code == 429:
-            logging.info('Rate limited by outline, sleeping 30s and skipping...')
-            time.sleep(30)
-            return None
-        if r.status_code != 200:
-            raise Exception('Bad response code ' + str(r.status_code))
-        data = r.json()['data']
-        if 'URL is not supported by Outline' in data['html']:
-            raise Exception('URL not supported by Outline')
-        return data
-    except KeyboardInterrupt:
-        raise
-    except BaseException as e:
-        logging.error('Problem outlining article: {}'.format(str(e)))
-        return None
@@ -1,6 +1,8 @@
 import database
 import search
 import sys
+import settings
+import logging

 import json
 import requests
@@ -21,7 +23,7 @@ def database_del_story(sid):

 def search_del_story(sid):
    try:
-        r = requests.delete(search.MEILI_URL + 'indexes/qotnews/documents/'+sid, timeout=2)
+        r = requests.delete(settings.MEILI_URL + 'indexes/qotnews/documents/'+sid, timeout=2)
        if r.status_code != 202:
            raise Exception('Bad response code ' + str(r.status_code))
        return r.json()
@@ -0,0 +1,58 @@
+import time
+import json
+import logging
+
+import feed
+import database
+import search
+
+database.init()
+
+def fix_gzip_bug(story_list):
+    FIX_THRESHOLD = 150
+
+    count = 1
+    for sid in story_list:
+        try:
+            sid = sid[0]
+            story = database.get_story(sid)
+            full_json = json.loads(story.full_json)
+            meta_json = json.loads(story.meta_json)
+            text = full_json.get('text', '')
+
+            count = text.count('�')
+            if not count: continue
+
+            ratio = count / len(text) * 1000
+            print('Bad story:', sid, 'Num ?:', count, 'Ratio:', ratio)
+            if ratio < FIX_THRESHOLD: continue
+
+            print('Attempting to fix...')
+
+            valid = feed.update_story(meta_json, is_manual=True)
+            if valid:
+                database.put_story(meta_json)
+                search.put_story(meta_json)
+                print('Success')
+            else:
+                print('Story was not valid')
+
+            time.sleep(3)
+
+        except KeyboardInterrupt:
+            raise
+        except BaseException as e:
+            logging.exception(e)
+            breakpoint()
+
+if __name__ == '__main__':
+    num_stories = database.count_stories()
+
+    print('Fix {} stories?'.format(num_stories))
+    print('Press ENTER to continue, ctrl-c to cancel')
+    input()
+
+    story_list = database.get_story_list()
+
+    fix_gzip_bug(story_list)
+
@@ -0,0 +1,62 @@
+import logging
+logging.basicConfig(
+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+        level=logging.INFO)
+
+import database
+from sqlalchemy import select
+import search
+import sys
+
+import time
+import json
+import requests
+
+database.init()
+search.init()
+
+BATCH_SIZE = 5000
+
+def put_stories(stories):
+    return search.meili_api(requests.post, 'indexes/qotnews/documents', stories)
+
+def get_update(update_id):
+    return search.meili_api(requests.get, 'tasks/{}'.format(update_id))
+
+if __name__ == '__main__':
+    num_stories = database.count_stories()
+
+    print('Reindex {} stories?'.format(num_stories))
+    print('Press ENTER to continue, ctrl-c to cancel')
+    input()
+
+    story_list = database.get_story_list()
+
+    count = 1
+    while len(story_list):
+        stories = []
+
+        for _ in range(BATCH_SIZE):
+            try:
+                sid = story_list.pop()
+            except IndexError:
+                break
+
+            story = database.get_story(sid)
+            print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title))
+            story_obj = json.loads(story.meta_json)
+            stories.append(story_obj)
+            count += 1
+
+        res = put_stories(stories)
+        update_id = res['uid']
+
+        print('Waiting for processing', end='')
+        while get_update(update_id)['status'] != 'succeeded':
+            time.sleep(0.5)
+            print('.', end='', flush=True)
+
+        print()
+
+    print('Done.')
+
@@ -0,0 +1,23 @@
+import time
+import requests
+
+def test_search_api():
+    num_tests = 100
+    total_time = 0
+
+    for i in range(num_tests):
+        start = time.time()
+
+        res = requests.get('http://127.0.0.1:33842/api/search?q=iphone')
+        res.raise_for_status()
+
+        duration = time.time() - start
+        total_time += duration
+
+    avg_time = total_time / num_tests
+
+    print('Average search time:', avg_time)
+
+
+if __name__ == '__main__':
+    test_search_api()
@@ -4,83 +4,57 @@ logging.basicConfig(
        level=logging.DEBUG)

 import requests
+import settings

-MEILI_URL = 'http://127.0.0.1:7700/'
+SEARCH_ENABLED = bool(settings.MEILI_URL)
+
+def meili_api(method, route, json=None, params=None, parse_json=True):
+    try:
+        headers = {'Authorization': 'Bearer ' + settings.MEILI_API_KEY}
+        r = method(settings.MEILI_URL + route, json=json, params=params, timeout=4)
+        if r.status_code > 299:
+            raise Exception('Bad response code ' + str(r.status_code))
+        if parse_json:
+            return r.json()
+        else:
+            r.encoding = 'utf-8'
+            return r.text
+    except KeyboardInterrupt:
+        raise
+    except BaseException as e:
+        logging.error('Problem with MeiliSearch api route: %s: %s', route, str(e))
+        return False

 def create_index():
-    try:
-        json = dict(name='qotnews', uid='qotnews')
-        r = requests.post(MEILI_URL + 'indexes', json=json, timeout=2)
-        if r.status_code != 201:
-            raise Exception('Bad response code ' + str(r.status_code))
-        return r.json()
-    except KeyboardInterrupt:
-        raise
-    except BaseException as e:
-        logging.error('Problem creating MeiliSearch index: {}'.format(str(e)))
-        return False
+    json = dict(uid='qotnews', primaryKey='id')
+    return meili_api(requests.post, 'indexes', json=json)

-def update_rankings():
-    try:
-        json = ['typo', 'words', 'proximity', 'attribute', 'desc(date)', 'wordsPosition', 'exactness']
-        r = requests.post(MEILI_URL + 'indexes/qotnews/settings/ranking-rules', json=json, timeout=2)
-        if r.status_code != 202:
-            raise Exception('Bad response code ' + str(r.status_code))
-        return r.json()
-    except KeyboardInterrupt:
-        raise
-    except BaseException as e:
-        logging.error('Problem setting MeiliSearch ranking rules: {}'.format(str(e)))
-        return False
-
-def update_attributes():
-    try:
-        json = ['title', 'url', 'author', 'link', 'id', 'source']
-        r = requests.post(MEILI_URL + 'indexes/qotnews/settings/searchable-attributes', json=json, timeout=2)
-        if r.status_code != 202:
-            raise Exception('Bad response code ' + str(r.status_code))
-        requests.delete(MEILI_URL + 'indexes/qotnews/settings/displayed-attributes', timeout=2)
-        return r.json()
-    except KeyboardInterrupt:
-        raise
-    except BaseException as e:
-        logging.error('Problem setting MeiliSearch searchable attributes: {}'.format(str(e)))
-        return False
+def update_settings():
+    json = {
+        'rankingRules': ['typo', 'words', 'proximity', 'date:desc', 'exactness'],
+        'searchableAttributes': ['title', 'url', 'author'],
+        'displayedAttributes': ['id', 'ref', 'source', 'author', 'author_link', 'score', 'date', 'title', 'link', 'url', 'num_comments'],
+    }
+    return meili_api(requests.post, 'indexes/qotnews/settings', json=json)

 def init():
-    create_index()
-    update_rankings()
-    update_attributes()
+    if not SEARCH_ENABLED:
+        logging.info('Search is not enabled, skipping init.')
+        return
+    print(create_index())
+    update_settings()

 def put_story(story):
-    story = story.copy()
-    story.pop('text', None)
-    story.pop('comments', None)
-    try:
-        r = requests.post(MEILI_URL + 'indexes/qotnews/documents', json=[story], timeout=2)
-        if r.status_code != 202:
-            raise Exception('Bad response code ' + str(r.status_code))
-        return r.json()
-    except KeyboardInterrupt:
-        raise
-    except BaseException as e:
-        logging.error('Problem putting MeiliSearch story: {}'.format(str(e)))
-        return False
+    if not SEARCH_ENABLED: return
+    return meili_api(requests.post, 'indexes/qotnews/documents', [story])

 def search(q):
-    try:
-        params = dict(q=q, limit=250)
-        r = requests.get(MEILI_URL + 'indexes/qotnews/search', params=params, timeout=2)
-        if r.status_code != 200:
-            raise Exception('Bad response code ' + str(r.status_code))
-        return r.json()['hits']
-    except KeyboardInterrupt:
-        raise
-    except BaseException as e:
-        logging.error('Problem searching MeiliSearch: {}'.format(str(e)))
-        return False
+    if not SEARCH_ENABLED: return []
+    json = dict(q=q, limit=settings.FEED_LENGTH)
+    r = meili_api(requests.post, 'indexes/qotnews/search', json=json, parse_json=False)
+    return r
    
 if __name__ == '__main__':
-    create_index()
+    init()

-    print(search('the'))
+    print(search('facebook'))
@@ -1,7 +1,8 @@
-import logging
+import os, logging
+DEBUG = os.environ.get('DEBUG')
 logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-        level=logging.INFO)
+        level=logging.DEBUG if DEBUG else logging.INFO)

 import gevent
 from gevent import monkey
@@ -13,53 +14,154 @@ import json
 import threading
 import traceback
 import time
+import datetime
+import humanize
+import urllib.request
 from urllib.parse import urlparse, parse_qs

 import settings
 import database
 import search
 import feed
-from utils import gen_rand_id
+from utils import gen_rand_id, NUM_ID_CHARS

 from flask import abort, Flask, request, render_template, stream_with_context, Response
 from werkzeug.exceptions import NotFound
 from flask_cors import CORS

+smallweb_set = set()
+def load_smallweb_list():
+    EXCLUDED = [
+        'github.com',
+    ]
+
+    global smallweb_set
+    try:
+        url = 'https://raw.githubusercontent.com/kagisearch/smallweb/refs/heads/main/smallweb.txt'
+        with urllib.request.urlopen(url, timeout=10) as response:
+            urls = response.read().decode('utf-8').splitlines()
+            hosts = {urlparse(u).hostname for u in urls if u and urlparse(u).hostname}
+            smallweb_set = {h.replace('www.', '') for h in hosts if h not in EXCLUDED}
+            logging.info('Loaded {} smallweb domains.'.format(len(smallweb_set)))
+    except Exception as e:
+        logging.error('Failed to load smallweb list: {}'.format(e))
+
+load_smallweb_list()
+
 database.init()
 search.init()

+news_index = 0
+ref_list = []
+current_item = {}
+
 def new_id():
    nid = gen_rand_id()
    while database.get_story(nid):
        nid = gen_rand_id()
    return nid

-build_folder = '../webclient/build'
+
+def fromnow(ts):
+    return humanize.naturaltime(datetime.datetime.fromtimestamp(ts))
+
+
+build_folder = './build'
+
 flask_app = Flask(__name__, template_folder=build_folder, static_folder=build_folder, static_url_path='')
+flask_app.jinja_env.filters['fromnow'] = fromnow
 cors = CORS(flask_app)

@flask_app.route('/api')
 def api():
-    stories = database.get_stories(settings.MAX_STORY_AGE)
-    res = Response(json.dumps({"stories": stories}))
+    skip = request.args.get('skip', 0)
+    limit = request.args.get('limit', settings.FEED_LENGTH)
+    is_smallweb_filter = request.args.get('smallweb') == 'true' and smallweb_set
+    sources_filter = request.args.getlist('source')
+
+    if not is_smallweb_filter and not sources_filter:
+        stories = database.get_stories(limit, skip)
+    else:
+        limit = int(limit)
+        skip = int(skip)
+        filtered_stories = []
+        current_skip = skip
+
+        while len(filtered_stories) < limit:
+            stories_batch = database.get_stories(limit, current_skip)
+            if not stories_batch:
+                break
+
+            for story_str in stories_batch:
+                story = json.loads(story_str)
+
+                if is_smallweb_filter:
+                    story_url = story.get('url') or story.get('link') or ''
+                    if not story_url:
+                        continue
+                    hostname = urlparse(story_url).hostname
+                    if not hostname or hostname.replace('www.', '') not in smallweb_set:
+                        continue
+                
+                if sources_filter:
+                    if story.get('source') not in sources_filter:
+                        continue
+
+                filtered_stories.append(story_str)
+                if len(filtered_stories) == limit:
+                    break
+            
+            if len(filtered_stories) == limit:
+                break
+            
+            current_skip += limit
+        stories = filtered_stories
+
+    # hacky nested json
+    res = Response('{"stories":[' + ','.join(stories) + ']}')
    res.headers['content-type'] = 'application/json'
    return res

+@flask_app.route('/api/stats', strict_slashes=False)
+def apistats():
+    stats = {
+        'news_index': news_index,
+        'ref_list': ref_list,
+        'len_ref_list': len(ref_list),
+        'current_item': current_item,
+        'total_stories': database.count_stories(),
+        'id_space': 26**NUM_ID_CHARS,
+    }
+
+    return stats
+
+
@flask_app.route('/api/search', strict_slashes=False)
 def apisearch():
    q = request.args.get('q', '')
    if len(q) >= 3:
        results = search.search(q)
    else:
-        results = []
-    return dict(results=results)
+        results = '[]'
+    res = Response(results)
+    res.headers['content-type'] = 'application/json'
+    return res
+

@flask_app.route('/api/submit', methods=['POST'], strict_slashes=False)
 def submit():
    try:
        url = request.form['url']
+        for prefix in ['http://', 'https://']:
+            if url.lower().startswith(prefix):
+                break
+        else:  # for
+            url = 'http://' + url
+
        nid = new_id()

+        logging.info('Manual submission: ' + url)
+
        parse = urlparse(url)
        if 'news.ycombinator.com' in parse.hostname:
            source = 'hackernews'
@@ -67,16 +169,24 @@ def submit():
        elif 'tildes.net' in parse.hostname and '~' in url:
            source = 'tildes'
            ref = parse.path.split('/')[2]
+        elif 'lobste.rs' in parse.hostname and '/s/' in url:
+            source = 'lobsters'
+            ref = parse.path.split('/')[2]
        elif 'reddit.com' in parse.hostname and 'comments' in url:
            source = 'reddit'
            ref = parse.path.split('/')[4]
-        elif settings.HOSTNAME in parse.hostname:
+        elif 'news.t0.vc' in parse.hostname:
            raise Exception('Invalid article')
        else:
            source = 'manual'
            ref = url

        existing = database.get_story_by_ref(ref)
+
+        if existing and DEBUG:
+            ref = ref + '#' + str(time.time())
+            existing = False
+
        if existing:
            return {'nid': existing.sid}
        else:
@@ -85,23 +195,28 @@ def submit():
            if valid:
                database.put_story(story)
                search.put_story(story)
+
+                if DEBUG:
+                    logging.info('Adding manual ref: {}, id: {}, source: {}'.format(ref, nid, source))
+                    database.put_ref(ref, nid, source)
+
                return {'nid': nid}
            else:
                raise Exception('Invalid article')

-    except BaseException as e:
-        logging.error('Problem with article submission: {} - {}'.format(e.__class__.__name__, str(e)))
+    except Exception as e:
+        msg = 'Problem with article submission: {} - {}'.format(e.__class__.__name__, str(e))
+        logging.error(msg)
        print(traceback.format_exc())
-        abort(400)
+        return {'error': msg.split('\n')[0]}, 400


@flask_app.route('/api/<sid>')
 def story(sid):
    story = database.get_story(sid)
    if story:
-        related = database.get_stories_by_url(story.meta['url'])
-        related = [r.meta for r in related]
-        res = Response(json.dumps({"story": story.data, "related": related}))
+        # hacky nested json
+        res = Response('{"story":' + story.full_json + '}')
        res.headers['content-type'] = 'application/json'
        return res
    else:
@@ -110,10 +225,19 @@ def story(sid):
@flask_app.route('/')
@flask_app.route('/search')
 def index():
+    stories_json = database.get_stories(settings.FEED_LENGTH, 0)
+    stories = [json.loads(s) for s in stories_json]
+    for s in stories:
+        url = urlparse(s.get('url') or s.get('link') or '').hostname or ''
+        s['hostname'] = url.replace('www.', '')
+
    return render_template('index.html',
-            title='Feed',
-            url=settings.HOSTNAME,
-            description='Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode')
+        title='QotNews',
+        url='news.t0.vc',
+        description='Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode',
+        robots='index',
+        stories=stories,
+    )

@flask_app.route('/<sid>', strict_slashes=False)
@flask_app.route('/<sid>/c', strict_slashes=False)
@@ -123,9 +247,9 @@ def static_story(sid):
    except NotFound:
        pass

-    story = database.get_story(sid)
-    if not story: return abort(404)
-    story = story.data
+    story_obj = database.get_story(sid)
+    if not story_obj: return abort(404)
+    story = json.loads(story_obj.full_json)

    score = story['score']
    num_comments = story['num_comments']
@@ -134,69 +258,77 @@ def static_story(sid):
            score, 's' if score != 1 else '',
            num_comments, 's' if num_comments != 1 else '',
            source)
-    url = urlparse(story['url']).hostname or urlparse(story['link']).hostname or ''
+    url = urlparse(story.get('url') or story.get('link') or '').hostname or ''
    url = url.replace('www.', '')

    return render_template('index.html',
-            title=story['title'],
-            url=url,
-            description=description)
+        title=story['title'] + ' | QotNews',
+        url=url,
+        description=description,
+        robots='noindex',
+        story=story,
+        show_comments=request.path.endswith('/c'),
+    )

 http_server = WSGIServer(('', 33842), flask_app)

-def _add_new_refs():
-    for ref, source, urlref in feed.get_list():
-        if database.get_story_by_ref(ref):
-            continue
-        try:
-            nid = new_id()
-            database.put_ref(ref, nid, source, urlref)
-            logging.info('Added ref ' + ref)
-        except database.IntegrityError:
-            continue
-
-def _update_current_story(item):
-    try:
-        story = database.get_story(item['sid']).data
-    except AttributeError:
-        story = dict(id=item['sid'], ref=item['ref'], source=item['source'])
-
-    logging.info('Updating story: {}'.format(str(story['ref'])))
-
-    valid = feed.update_story(story, urlref=item['urlref'])
-    if valid:
-        database.put_story(story)
-        search.put_story(story)
-    else:
-        database.del_ref(item['ref'])
-        logging.info('Removed ref {}'.format(item['ref']))
-
 def feed_thread():
-    ref_list = []
+    global news_index, ref_list, current_item
+
    try:
        while True:
            # onboard new stories
-            if not len(ref_list):
-                _add_new_refs()
-                ref_list = database.get_reflist()
+            if news_index == 0:
+                for ref, source in feed.list():
+                    if database.get_story_by_ref(ref):
+                        continue
+                    try:
+                        nid = new_id()
+                        logging.info('Adding ref: {}, id: {}, source: {}'.format(ref, nid, source))
+                        database.put_ref(ref, nid, source)
+                    except database.IntegrityError:
+                        logging.info('Already have ID / ref, skipping.')
+                        continue
+
+            ref_list = database.get_reflist(settings.FEED_LENGTH)

            # update current stories
-            if len(ref_list):
-                item = ref_list.pop(0)
-                _update_current_story(item)
+            if news_index < len(ref_list):
+                current_item = ref_list[news_index]
+
+                try:
+                    story_json = database.get_story(current_item['sid']).full_json
+                    story = json.loads(story_json)
+                except AttributeError:
+                    story = dict(id=current_item['sid'], ref=current_item['ref'], source=current_item['source'])
+
+                logging.info('Updating {} story: {}, index: {}'.format(story['source'], story['ref'], news_index))
+
+                valid = feed.update_story(story)
+                if valid:
+                    database.put_story(story)
+                    search.put_story(story)
+                else:
+                    database.del_ref(current_item['ref'])
+                    logging.info('Removed ref {}'.format(current_item['ref']))
+            else:
+                logging.info('Skipping index: ' + str(news_index))

            gevent.sleep(6)

+            news_index += 1
+            if news_index == settings.FEED_LENGTH: news_index = 0
+
    except KeyboardInterrupt:
        logging.info('Ending feed thread...')
    except ValueError as e:
-        logging.error('feed_thread error: {} {}'.format(e.__class__.__name__, e))
+        logging.critical('feed_thread error: {} {}'.format(e.__class__.__name__, e))
        http_server.stop()

-print('Starting Feed thread...')
+logging.info('Starting Feed thread...')
 gevent.spawn(feed_thread)

-print('Starting HTTP thread...')
+logging.info('Starting HTTP thread...')
 try:
    http_server.serve_forever()
 except KeyboardInterrupt:
@@ -1,57 +1,23 @@
 # QotNews settings
 # edit this file and save it as settings.py

-HOSTNAME = 'news.t0.vc'
-MAX_STORY_AGE = 3*24*60*60
-
 # Feed Lengths
 # Number of top items from each site to pull
 # set to 0 to disable that site
+FEED_LENGTH = 75
 NUM_HACKERNEWS = 15
-NUM_REDDIT = 10
+NUM_LOBSTERS = 10
+NUM_REDDIT = 15
 NUM_TILDES = 5
-NUM_SUBSTACK = 10

-SITEMAP = {}
-# SITEMAP['nzherald'] = {
-#     'url': "https://www.nzherald.co.nz/arcio/news-sitemap/",
-#     'count': 20,
-#     'patterns': [
-#         r'^https:\/\/www\.(nzherald\.co\.nz)\/.*\/([^/]+)\/?$',
-#     ],
-#     'excludes': [
-#         'driven.co.nz',
-#         'oneroof.co.nz',
-#         'nzherald.co.nz/sponsored-stories',
-#         'nzherald.co.nz/entertainment/',
-#         'nzherald.co.nz/lifestyle/',
-#         'nzherald.co.nz/travel/',
-#         'nzherald.co.nz/sport/',
-#         'nzherald.co.nz/promotions/',
-#         'nzherald.co.nzhttp',
-#         'herald-afternoon-quiz',
-#         'herald-morning-quiz'
-#     ],
-# }
+# Meilisearch server URL
+# Leave blank if not using search
+#MEILI_URL = 'http://127.0.0.1:7700/'
+MEILI_URL = ''

-SUBSTACK = {}
-# SUBSTACK['webworm'] = { 'url': "https://www.webworm.co", 'count': 10},
-# SUBSTACK['the bulletin'] = { 'url': "https://thespinoff.substack.com", 'count': 10},
-
-CATEGORY = {}
-# CATEGORY['radionz'] = {
-#     'url': "https://www.rnz.co.nz/news/",
-#     'count': 20,
-#     'patterns': [
-#         r'https:\/\/www\.(rnz\.co\.nz)\/news\/[^\/]+\/(\d+)\/[^\/]+\/?'
-#     ],
-#     'excludes': [
-#         'rnz.co.nz/news/sport',
-#         'rnz.co.nz/weather',
-#     ],
-# }
-
-SCRAPERS = ['browser', 'declutter', 'outline', 'local']
+# Readerserver URL
+# Leave blank if not using, but that defeats the whole point
+READER_URL = 'http://127.0.0.1:33843/'

 # Reddit account info
 # leave blank if not using Reddit
@@ -59,10 +25,6 @@ REDDIT_CLIENT_ID = ''
 REDDIT_CLIENT_SECRET = ''
 REDDIT_USER_AGENT = ''

-# Minimum points or number of comments before including a thread:
-REDDIT_COMMENT_THRESHOLD = 10
-REDDIT_SCORE_THRESHOLD = 25
-
 SUBREDDITS = [
    'Economics',
    'AcademicPhilosophy',
@@ -71,13 +33,9 @@ SUBREDDITS = [
    'HistoryofIdeas',
    'LaymanJournals',
    'PhilosophyofScience',
-    'PoliticsPDFs',
-    'Scholar',
    'StateOfTheUnion',
    'TheAgora',
-    'TrueFilm',
    'TrueReddit',
-    'UniversityofReddit',
    'culturalstudies',
    'hardscience',
    'indepthsports',
@@ -86,4 +44,7 @@ SUBREDDITS = [
    'neurophilosophy',
    'resilientcommunities',
    'worldevents',
+    'StallmanWasRight',
+    'EverythingScience',
+    'longevity',
 ]
@@ -8,8 +8,17 @@ import string

 from bleach.sanitizer import Cleaner

+def alert_tanner(message):
+    try:
+        logging.info('Alerting Tanner: ' + message)
+        params = dict(qotnews=message)
+        requests.get('https://tbot.tanner.vc/message', params=params, timeout=4)
+    except BaseException as e:
+        logging.error('Problem alerting Tanner: ' + str(e))
+
+NUM_ID_CHARS = 4
 def gen_rand_id():
-    return ''.join(random.choice(string.ascii_uppercase) for _ in range(4))
+    return ''.join(random.choice(string.ascii_uppercase) for _ in range(NUM_ID_CHARS))

 def render_md(md):
    if md:
@@ -1,29 +1,53 @@
-const port = 33843;
 const express = require('express');
 const app = express();
-const simple = require('./scraper/simple');
-const browser = require('./scraper/browser');
+const port = 33843;
+
+const request = require('request');
+const JSDOM = require('jsdom').JSDOM;
+const { Readability } = require('readability');

 app.use(express.urlencoded({ extended: true }));

 app.get('/', (req, res) => {
-	const routes = ['/', '/details', '/browser', '/browser/details', '/browser/comments'];
-
-	const html = routes.map(route => `
-	<form method="POST" action="${route}" accept-charset="UTF-8">
-		<fieldset>
-			<legend>route: POST ${route}</legend>
-			<input name="url">
-			<button type="submit">SUBMIT</button>
-		</fieldset>
-	</form>`).join('<hr />');
-	res.send(html);
+	res.send('<form method="POST" accept-charset="UTF-8"><input name="url"><button type="submit">SUBMIT</button></form>');
+});
+
+const requestCallback = (url, res) => (error, response, body) => {
+	if (!error && response.statusCode == 200) {
+		console.log('Response OK.');
+
+		const doc = new JSDOM(body, {url: url});
+		const reader = new Readability(doc.window.document);
+		const article = reader.parse();
+
+		if (article && article.content) {
+			res.send(article.content);
+		} else {
+			res.sendStatus(404);
+		}
+	} else {
+		console.log('Response error:', error ? error.toString() : response.statusCode);
+		res.sendStatus(response ? response.statusCode : 404);
+	}
+};
+
+app.post('/', (req, res) => {
+	const url = req.body.url;
+	const requestOptions = {
+		url: url,
+		gzip: true,
+		//headers: {'User-Agent': 'Googlebot/2.1 (+http://www.google.com/bot.html)'},
+		//headers: {'User-Agent': 'Twitterbot/1.0'},
+		headers: {
+			'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0',
+			'X-Forwarded-For': '66.249.66.1',
+		},
+	};
+
+	console.log('Parse request for:', url);
+
+	request(requestOptions, requestCallback(url, res));
 });
-app.post('/', simple.scrape);
-app.post('/details', simple.details);
-app.post('/browser', browser.scrape);
-app.post('/browser/details', browser.details);
-app.post('/browser/comments', browser.comments);

 app.listen(port, () => {
 	console.log(`Example app listening on port ${port}!`);
@@ -4,12 +4,10 @@
  "main": "main.js",
  "license": "MIT",
  "dependencies": {
-    "@mozilla/readability": "^0.3.0",
    "dompurify": "^1.0.11",
    "express": "^4.17.1",
    "jsdom": "^15.1.1",
-    "node-fetch": "^2.6.1",
-    "playwright": "^1.5.2",
+    "readability": "https://github.com/mozilla/readability",
    "request": "^2.88.0"
  }
 }
@@ -1,45 +0,0 @@
-const { firefox } = require("playwright");
-const { JSDOM } = require("jsdom");
-const { Readability } = require("@mozilla/readability");
-
-const { getUserAgent } = require('../../utils/user-agent');
-const { blockedRegexes, matchUrlDomain } = require("../../utils/sites");
-
-module.exports.getDetails = async (url) => {
-	const { userAgent, headers } = getUserAgent(url);
-
-	const browser = await firefox.launch({ args: [], headless: true });
-	const tab = await browser.newPage({
-		extraHTTPHeaders: headers,
-		userAgent,
-		viewport: { width: 2000, height: 10000 },
-	});
-
-	try {
-		await tab.route(/.*/, (route) => {
-			const routeUrl = route.request().url();
-			const blockedDomains = Object.keys(blockedRegexes);
-			const domain = matchUrlDomain(blockedDomains, routeUrl);
-			if (domain && routeUrl.match(blockedRegexes[domain])) {
-				return route.abort();
-			}
-			return route.continue();
-		});
-		await tab.addInitScript({ path: "scraper/browser/scripts/bypass-paywalls-chrome/src/js/contentScript.js" });
-		await tab.addInitScript({ path: "scraper/browser/scripts/cosmetic-filters.js" });
-		await tab.addInitScript({ path: "scraper/browser/scripts/fix-relative-links.js" });
-		await tab.goto(url, { timeout: 60000, waitUntil: "domcontentloaded" });
-		await tab.waitForTimeout(2000);
-
-		const body = await tab.content();
-		const doc = new JSDOM(body, { url });
-		const reader = new Readability(doc.window.document);
-		const article = reader.parse();
-		return article;
-	} catch (e) {
-		throw e;
-	} finally {
-		await tab.close();
-		await browser.close();
-	}
-};
@@ -1,34 +0,0 @@
-const { JSDOM } = require("jsdom");
-const { firefox } = require("playwright");
-const { getUserAgent } = require('../../utils/user-agent');
-const { disqusThread } = require('../../utils/disqus-thread');
-
-const DISQUS_EMBED = 'https://disqus.com/embed/comments/';
-
-module.exports.getComments = async (url) => {
-	const { userAgent, headers } = getUserAgent(url);
-
-	const browser = await firefox.launch({ args: [], headless: true });
-	const tab = await browser.newPage({
-		extraHTTPHeaders: headers,
-		userAgent,
-		viewport: { width: 2000, height: 10000 },
-	});
-
-	try {
-		await tab.goto(url, { timeout: 60000, waitUntil: "domcontentloaded" });
-
-		const response = await tab.waitForResponse(response => response.url().includes(DISQUS_EMBED));
-		const text = await response.text();
-		const dom = new JSDOM(text, response.url());
-		const script = dom.window.document.querySelector('#disqus-threadData')
-		const data = JSON.parse(script.innerHTML);
-
-		return disqusThread(data);
-	} catch (e) {
-		throw e;
-	} finally {
-		await tab.close();
-		await browser.close();
-	}
-};
@@ -1,40 +0,0 @@
-const { getDetails } = require('./_browser');
-const { getComments } = require('./_comments');
-
-module.exports.scrape = async (req, res) => {
-	try {
-		const article = await getDetails(req.body.url);
-		if (!article || !article.content) {
-			throw new Error('failed to get details.');
-		}
-		return res.send(article.content);
-	} catch (e) {
-		return res.sendStatus(500);
-	}
-};
-
-module.exports.details = async (req, res) => {
-	try {
-		const article = await getDetails(req.body.url);
-		if (!article) {
-			throw new Error('failed to get details.');
-		}
-		return res.send(article);
-	} catch (e) {
-		console.log(e);
-		return res.sendStatus(500);
-	}
-};
-
-module.exports.comments = async (req, res) => {
-	try {
-		const comments = await getComments(req.body.url);
-		if (!comments) {
-			throw new Error('failed to get comments.');
-		}
-		return res.send(comments);
-	} catch (e) {
-		console.log(e);
-		return res.sendStatus(500);
-	}
-};
@@ -1,104 +0,0 @@
-(function () {
-	removeHiddenElements();
-
-	if (matchDomain("stuff.co.nz")) {
-		removeSelectors([
-			".support-brief-container",
-			'[class*="donation-in-"]',
-			".sics-component__sharebar",
-			".breaking-news-pointer",
-			".bigbyline-container",
-			[
-				".sics-component__html-injector.sics-component__story__paragraph",
-				"READ MORE:",
-			],
-		]);
-	}
-	if (matchDomain("nzherald.co.nz")) {
-		removeSelectors([
-			"[href$='#commenting-widget']",
-			".related-articles",
-			".article__print-button",
-			".share-bar",
-			".c-suggest-links.read-more-links",
-			".website-of-year",
-			".meta-data",
-			".article__kicker",
-			".author__image",
-		]);
-	}
-	if (matchDomain(["rnz.co.nz", "radionz.co.nz"])) {
-		removeSelectors([".c-advert-app", ".c-sub-nav"]);
-	}
-	if (matchDomain(["newsroom.co.nz"])) {
-		removeSelectors([".article_content__section", ".bio"]);
-	}
-	if (matchDomain(["newshub.co.nz"])) {
-		removeSelectors([
-			".c-ArticleHeading-authorPicture",
-			".relatedarticles",
-			".ArticleAttribution",
-			'.GlobalFooter'
-		]);
-	}
-	if (matchDomain(["tvnz.co.nz"])) {
-		removeSelectors([".signup-container container"]);
-	}
-	if (matchDomain(["thespinoff.co.nz"])) {
-		removeSelectors([".the-spinoff-club-interruptive", ".bulletin-signup"]);
-	}
-
-	function matchDomain(domains) {
-		const hostname = window.location.hostname;
-		if (typeof domains === "string") {
-			domains = [domains];
-		}
-		return domains.some(
-			(domain) => hostname === domain || hostname.endsWith("." + domain)
-		);
-	}
-
-	function removeDOMElement(...elements) {
-		for (const element of elements) {
-			if (element) {
-				element.remove();
-			}
-		}
-	}
-
-	function pageContains(selector, text) {
-		const elements = document.querySelectorAll(selector);
-		return Array.prototype.filter.call(elements, function (element) {
-			return RegExp(text).test(element.textContent);
-		});
-	}
-
-	function removeHiddenElements() {
-		window.setTimeout(function () {
-			const selector = "*:not(script):not(head):not(meta):not(link):not(style)";
-			Array.from(document.querySelectorAll(selector))
-				.filter((element) => {
-					const computed = getComputedStyle(element);
-					const displayNone = computed["display"] === "none";
-					const visibilityHidden = computed["visibility"] === "hidden";
-					return displayNone || visibilityHidden;
-				})
-				.forEach((element) => element && element.remove());
-		}, 1000);
-	}
-
-	function removeSelectors(selectors) {
-		window.setTimeout(function () {
-			const elements = selectors.flatMap((s) => {
-				if (typeof s === "string") {
-					return Array.from(document.querySelectorAll(s));
-				}
-				if (s && s.constructor.name === "Array") {
-					return pageContains(...s);
-				}
-				return undefined;
-			});
-			removeDOMElement(...elements);
-		}, 1000);
-	}
-})();
@@ -1,14 +0,0 @@
-(function () {
-	const { host, protocol } = window.location;
-	const url = `${protocol}//${host}`;
-	[
-		['[src^="/"]', 'src'],
-		['[href^="/"]', 'href']
-	].forEach(([selector, attribute]) => {
-		Array.from(document.querySelectorAll(selector))
-			.filter(e => e.attributes[attribute] && /^\/[^\/]/.test(e.attributes[attribute].value))
-			.forEach((e) => {
-				e.attributes[attribute].value = `${url}${e.attributes[attribute].value}`;
-			});
-	});
-})();
@@ -1,59 +0,0 @@
-const fetch = require('node-fetch');
-const { JSDOM } = require('jsdom');
-const { Readability } = require('@mozilla/readability');
-
-const { getUserAgent } = require('../utils/user-agent');
-
-const extract = (url, body) => {
-	const doc = new JSDOM(body, { url: url });
-	const reader = new Readability(doc.window.document);
-	return reader.parse();
-};
-
-module.exports.scrape = async (req, res) => {
-	try {
-		const { userAgent, headers } = getUserAgent(req.body.url);
-		const response = await fetch(req.body.url, {
-			headers: {
-				...headers,
-				'User-Agent': userAgent
-			}
-		});
-		if (!response.ok) {
-			return res.sendStatus(response.statusCode);
-		}
-		const html = await response.text();
-		const article = await extract(req.body.url, html);
-		if (article && article.content) {
-			return res.send(article.content);
-		}
-		return res.sendStatus(404);
-	} catch (e) {
-		console.error(e);
-		return res.sendStatus(500);
-	}
-};
-
-module.exports.details = async (req, res) => {
-	try {
-		const { userAgent, headers } = getUserAgent(req.body.url);
-		const response = await fetch(req.body.url, {
-			headers: {
-				...headers,
-				'User-Agent': userAgent
-			}
-		});
-		if (!response.ok) {
-			return res.sendStatus(response.statusCode);
-		}
-		const html = await response.text();
-		const article = await extract(req.body.url, html);
-		if (article) {
-			return res.send(article);
-		}
-		return res.sendStatus(404);
-	} catch (e) {
-		console.error(e);
-		return res.sendStatus(500);
-	}
-};
@@ -1,11 +0,0 @@
-const googleBotUserAgent = 'Googlebot/2.1 (+http://www.google.com/bot.html)';
-const googleBotIp = '66.249.66.1';
-
-module.exports.googleBot = {
-	userAgent: googleBotUserAgent,
-	ip: googleBotIp,
-	headers: {
-		'User-Agent': googleBotUserAgent,
-		'X-Forwarded-For': googleBotIp,
-	}
-}
@@ -1,21 +0,0 @@
-module.exports.disqusThread = data => {
-	const comments = data.response.posts.reduce((c, post) => ({
-		...c,
-		[post.id.toString()]: {
-			author: post.author.name,
-			authorLink: post.author.profileUrl,
-			date: post.createdAt,
-			text: post.raw_message,
-			score: post.points,
-			children: [],
-			id: post.id.toString(),
-			parent: (post.parent || '').toString(),
-		}
-	}), {});
-	Object.keys(comments).filter(id => !!comments[id].parent).forEach(id => {
-		const comment = comments[id];
-		comments[comment.parent].children.push(comment);
-	});
-	const parents = Object.keys(comments).filter(id => comments[id].parent).map(id => comments[id]);
-	return parents;
-};
@@ -1,98 +0,0 @@
-module.exports.blockedRegexes = {
-	"adweek.com": /.+\.lightboxcdn\.com\/.+/,
-	"afr.com": /afr\.com\/assets\/vendorsReactRedux_client.+\.js/,
-	"businessinsider.com": /(.+\.tinypass\.com\/.+|cdn\.onesignal\.com\/sdks\/.+\.js)/,
-	"chicagotribune.com": /.+:\/\/.+\.tribdss\.com\//,
-	"economist.com": /(.+\.tinypass\.com\/.+|economist\.com\/engassets\/_next\/static\/chunks\/framework.+\.js)/,
-	"editorialedomani.it": /(js\.pelcro\.com\/.+|editorialedomani.it\/pelcro\.js)/,
-	"foreignpolicy.com": /.+\.tinypass\.com\/.+/,
-	"fortune.com": /.+\.tinypass\.com\/.+/,
-	"haaretz.co.il": /haaretz\.co\.il\/htz\/js\/inter\.js/,
-	"haaretz.com": /haaretz\.com\/hdc\/web\/js\/minified\/header-scripts-int.js.+/,
-	"inquirer.com": /.+\.tinypass\.com\/.+/,
-	"lastampa.it": /.+\.repstatic\.it\/minify\/sites\/lastampa\/.+\/config\.cache\.php\?name=social_js/,
-	"lrb.co.uk": /.+\.tinypass\.com\/.+/,
-	"nzherald.co.nz": /(.+nzherald\.co\.nz\/.+\/subs\/p\.js|.+nzherald\.co\.nz\/.+\/react\.js|.+nzherald\.co\.nz\/.+\/appear\.js|.+nzherald\.co\.nz\/.+\/tracking\/.+|.+nzherald\.co\.nz\/.+\/default\.js|.+\/newsbarscript\.js)/,
-	"medscape.com": /.+\.medscapestatic\.com\/.*medscape-library\.js/,
-	"interest.co.nz": /(.+\.presspatron\.com.+|.+interest\.co\.nz.+pp-ablock-banner\.js)/,
-	"repubblica.it": /scripts\.repubblica\.it\/pw\/pw\.js.+/,
-	"spectator.co.uk": /.+\.tinypass\.com\/.+/,
-	"spectator.com.au": /.+\.tinypass\.com\/.+/,
-	"telegraph.co.uk": /.+telegraph\.co\.uk.+martech.+/,
-	"thecourier.com.au": /.+cdn-au\.piano\.io\/api\/tinypass.+\.js/,
-	"thenation.com": /thenation\.com\/.+\/paywall-script\.php/,
-	"thenational.scot": /(.+\.tinypass\.com\/.+|.+thenational\.scot.+omniture\.js|.+thenational\.scot.+responsive-sync.+)/,
-	"thewrap.com": /thewrap\.com\/.+\/wallkit\.js/,
-	"wsj.com": /cdn\.ampproject\.org\/v\d\/amp-access-.+\.js/,
-	"historyextra.com": /.+\.evolok\.net\/.+\/authorize\/.+/,
-	"barrons.com": /cdn\.ampproject\.org\/v\d\/amp-access-.+\.js/,
-	"irishtimes.com": /cdn\.ampproject\.org\/v\d\/amp-access-.+\.js/,
-	"elmercurio.com": /(merreader\.emol\.cl\/assets\/js\/merPramV2.js|staticmer\.emol\.cl\/js\/inversiones\/PramModal.+\.js)/,
-	"sloanreview.mit.edu": /(.+\.tinypass\.com\/.+|.+\.netdna-ssl\.com\/wp-content\/themes\/smr\/assets\/js\/libs\/welcome-ad\.js)/,
-	"latercera.com": /.+\.cxense\.com\/+/,
-	"lesechos.fr": /.+\.tinypass\.com\/.+/,
-	"washingtonpost.com": /.+\.washingtonpost\.com\/.+\/pwapi-proxy\.min\.js/,
-	"thehindu.com": /ajax\.cloudflare\.com\/cdn-cgi\/scripts\/.+\/cloudflare-static\/rocket-loader\.min\.js/,
-	"technologyreview.com": /.+\.blueconic\.net\/.+/,
-};
-
-module.exports.useGoogleBotSites = [
-	"adelaidenow.com.au",
-	"barrons.com",
-	"couriermail.com.au",
-	"dailytelegraph.com.au",
-	"fd.nl",
-	"genomeweb.com",
-	"haaretz.co.il",
-	"haaretz.com",
-	"heraldsun.com.au",
-	"mexiconewsdaily.com",
-	"ntnews.com.au",
-	"quora.com",
-	"seekingalpha.com",
-	"telegraph.co.uk",
-	"theaustralian.com.au",
-	"themarker.com",
-	"themercury.com.au",
-	"thenational.scot",
-	"thetimes.co.uk",
-	"wsj.com",
-	"kansascity.com",
-	"republic.ru",
-	"nzz.ch",
-	"handelsblatt.com",
-	"washingtonpost.com",
-	"df.cl",
-];
-
-function matchDomain(domains, hostname) {
-	let matchedDomain = false;
-	if (typeof domains === "string") {
-		domains = [domains];
-	}
-	domains.some(
-		(domain) =>
-			(hostname === domain || hostname.endsWith("." + domain)) &&
-			(matchedDomain = domain)
-	);
-	return matchedDomain;
-}
-
-function matchUrlDomain(domains, url) {
-	return matchDomain(domains, urlHost(url));
-}
-
-function urlHost(url) {
-	if (url && url.startsWith("http")) {
-		try {
-			return new URL(url).hostname;
-		} catch (e) {
-			console.log(`url not valid: ${url} error: ${e}`);
-		}
-	}
-	return url;
-}
-
-module.exports.matchDomain = matchDomain;
-module.exports.matchUrlDomain = matchUrlDomain;
-module.exports.urlHost = urlHost;
@@ -1,18 +0,0 @@
-const { googleBot } = require('./constants');
-const { matchUrlDomain, useGoogleBotSites } = require("./sites");
-
-module.exports.getUserAgent = (url) => {
-	const useGoogleBot = useGoogleBotSites.some(function (item) {
-		return typeof item === "string" && matchUrlDomain(item, url);
-	});
-
-	if (!useGoogleBot) {
-		return {};
-	}
-	return {
-		userAgent: googleBot.userAgent,
-		headers: {
-			"X-Forwarded-For": googleBot.ip
-		}
-	}
-};
@@ -3,7 +3,7 @@
 Download MeiliSearch with:

 ```
-wget https://github.com/meilisearch/MeiliSearch/releases/download/v0.11.1/meilisearch-linux-amd64
+wget https://github.com/meilisearch/meilisearch/releases/download/v0.27.0/meilisearch-linux-amd64
 chmod +x meilisearch-linux-amd64
 ```

@@ -4,12 +4,14 @@
  "private": true,
  "dependencies": {
    "abort-controller": "^3.0.0",
+    "katex": "^0.16.25",
    "localforage": "^1.7.3",
    "moment": "^2.24.0",
    "query-string": "^6.8.3",
    "react": "^16.9.0",
    "react-dom": "^16.9.0",
    "react-helmet": "^5.2.1",
+    "react-latex-next": "^3.0.0",
    "react-router-dom": "^5.0.1",
    "react-router-hash-link": "^1.2.2",
    "react-scripts": "3.1.1"
@@ -8,6 +8,8 @@
 			content="{{ description }}"
 		/>
 		<meta content="{{ url }}" name="og:site_name">
+		<meta name="robots" content="{{ robots }}">
+

 		<link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
 		<link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">
@@ -26,26 +28,137 @@
 			work correctly both with client-side routing and a non-root public URL.
 			Learn how to configure a non-root public URL by running `npm run build`.
 		-->
-		<title>{{ title }} - QotNews</title>
+		<title>{{ title }}</title>

+		<script>document.documentElement.className = 'js-enabled';</script>
 		<style>
+			.js-enabled .static-content {
+				display: none;
+			}
 			html {
 				overflow-y: scroll;
 			}
 			body {
-				background: #000;
-			}
-			.nojs {
-				color: white;
+				background: #eeeeee;
 			}
 		</style>
 	</head>

 	<body>
-		<div class="nojs">
-			<noscript>You need to enable JavaScript to run this app.</noscript>
+		<script>
+			(function() {
+				try {
+					var theme = localStorage.getItem('theme');
+					if (theme === 'dark') {
+						document.body.style.backgroundColor = '#1a1a1a';
+					} else if (theme === 'black' || theme === 'red') {
+						document.body.style.backgroundColor = '#000';
+					}
+				} catch (e) {}
+			})();
+		</script>
+		<div id="root">
+			<div class="static-content">
+
+				{% if False %}
+				<noscript>
+					<meta http-equiv="refresh" content="0;url=?/no.script">
+				</noscript>
+				{% endif %}
+
+				<div class="container menu">
+					<p>
+						<a href="/">QotNews</a>
+						<br />
+						<span class="slogan">Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode.</span>
+					</p>
+				</div>
+				{% if story %}
+					<div class="{% if show_comments %}container{% else %}article-container{% endif %}">
+						<div class="article">
+							<h1>{{ story.title }}</h1>
+
+							{% if show_comments %}
+								<div class="info">
+									<a href="/{{ story.id }}">View article</a>
+								</div>
+							{% else %}
+								<div class="info">
+									Source: <a class="source" href="{{ story.url or story.link }}">{{ url }}</a>
+								</div>
+							{% endif %}
+
+							<div class="info">
+								{{ story.score }} points
+								by <a href="{{ story.author_link }}">{{ story.author }}</a>
+								{{ story.date | fromnow }}
+								on <a href="{{ story.link }}">{{ story.source }}</a> |
+								<a href="/{{ story.id }}/c">
+									{{ story.num_comments }} comment{{ 's' if story.num_comments != 1 }}
+								</a>
+							</div>
+
+							{% if not show_comments and story.text %}
+								<div class="story-text">{{ story.text | safe }}</div>
+							{% elif show_comments %}
+								{% macro render_comment(comment, level) %}
+									<dt></dt>
+									<dd class="comment{% if level > 0 %} lined{% endif %}">
+										<div class="info">
+											<p>
+												{% if comment.author == story.author %}[OP] {% endif %}{{ comment.author or '[Deleted]' }} | <a href="#{{ comment.author }}{{ comment.date }}" id="{{ comment.author }}{{ comment.date }}">{{ comment.date | fromnow }}</a>
+											</p>
+										</div>
+										<div class="text">{{ (comment.text | safe) if comment.text else '<p>[Empty / deleted comment]</p>' }}</div>
+										{% if comment.comments %}
+											<dl>
+												{% for reply in comment.comments %}
+													{{ render_comment(reply, level + 1) }}
+												{% endfor %}
+											</dl>
+										{% endif %}
+									</dd>
+								{% endmacro %}
+								<dl class="comments">
+									{% for comment in story.comments %}{{ render_comment(comment, 0) }}{% endfor %}
+								</dl>
+							{% endif %}
+						</div>
+						<div class='dot toggleDot'>
+							<div class='button'>
+								<a href="/{{ story.id }}{{ '/c' if not show_comments else '' }}">
+									{{ '' if not show_comments else '' }}
+								</a>
+							</div>
+						</div>
+					</div>
+				{% elif stories %}
+					<div class="container">
+						{% for story in stories %}
+							<div class='item'>
+								<div class='title'>
+									<a class='link' href='/{{ story.id }}'>
+										<img class='source-logo' src='/logos/{{ story.source }}.png' alt='{{ story.source }}:' /> {{ story.title }}
+									</a>
+									<span class='source'>
+										(<a class='source' href='{{ story.url or story.link }}'>{{ story.hostname }}</a>)
+									</span>
+								</div>
+								<div class='info'>
+									{{ story.score }} points
+									by <a href="{{ story.author_link }}">{{ story.author }}</a>
+									{{ story.date | fromnow }}
+									on <a href="{{ story.link }}">{{ story.source }}</a> |
+									<a class="{{ 'hot' if story.num_comments > 99 else '' }}" href="/{{ story.id }}/c">
+										{{ story.num_comments }} comment{{ 's' if story.num_comments != 1 }}
+									</a>
+								</div>
+							</div>
+						{% endfor %}
+					</div>
+				{% endif %}
+			</div>
 		</div>
-		<div id="root"></div>
 		<!--
 			This HTML file is a template.
 			If you open it directly in the browser, you will see an empty page.
@@ -1,85 +1,175 @@
-import React from 'react';
+import React, { useState, useLayoutEffect, useEffect, useRef, useCallback } from 'react';
 import { BrowserRouter as Router, Route, Link, Switch } from 'react-router-dom';
 import localForage from 'localforage';
 import './Style-light.css';
 import './Style-dark.css';
+import './Style-black.css';
+import './Style-red.css';
 import './fonts/Fonts.css';
-import { ForwardDot } from './utils.js';
+import { BackwardDot, ForwardDot } from './utils.js';
+import Feed from './Feed.js';
+import Article from './Article.js';
+import Comments from './Comments.js';
 import Search from './Search.js';
 import Submit from './Submit.js';
+import Results from './Results.js';
 import ScrollToTop from './ScrollToTop.js';
-import Feed from './pages/Feed.js';
-import Article from './pages/Article.js';
-import Comments from './pages/Comments.js';
-import Results from './pages/Results.js';
+import Settings from './Settings.js';

-
-class App extends React.Component {
-	constructor(props) {
-		super(props);
-
-		this.state = {
-			theme: localStorage.getItem('theme') || '',
+function App() {
+	const [theme, setTheme] = useState(localStorage.getItem('theme') || '');
+	const cache = useRef({});
+	const [isFullScreen, setIsFullScreen] = useState(!!document.fullscreenElement);
+	const [waitingWorker, setWaitingWorker] = useState(null);
+	const [settingsOpen, setSettingsOpen] = useState(false);
+	const defaultBodyFontSize = 1.0;
+	const [bodyFontSize, setBodyFontSize] = useState(Number(localStorage.getItem('bodyFontSize')) || defaultBodyFontSize);
+	const [bodyFont, setBodyFont] = useState(localStorage.getItem('bodyFont') || 'Sans Serif');
+	const [articleFont, setArticleFont] = useState(localStorage.getItem('articleFont') || 'Apparatus SIL');
+	const [filterSmallweb, setFilterSmallweb] = useState(() => localStorage.getItem('filterSmallweb') === 'true');
+	const [feedSources, setFeedSources] = useState(() => {
+		const saved = localStorage.getItem('feedSources');
+		return saved ? JSON.parse(saved) : {
+			hackernews: true,
+			reddit: true,
+			lobsters: true,
+			tildes: true,
 		};
+	});

-		this.cache = {};
-	}
+	const updateCache = useCallback((key, value) => {
+		cache.current[key] = value;
+	}, []);

-	updateCache = (key, value) => {
-		this.cache[key] = value;
-	}
+	useEffect(() => {
+		const onSWUpdate = e => {
+			setWaitingWorker(e.detail.waiting);
+		};
+		window.addEventListener('swUpdate', onSWUpdate);
+		return () => window.removeEventListener('swUpdate', onSWUpdate);
+	}, []);

-	light() {
-		this.setState({ theme: '' });
-		localStorage.setItem('theme', '');
-	}
-
-	dark() {
-		this.setState({ theme: 'dark' });
-		localStorage.setItem('theme', 'dark');
-	}
-
-	componentDidMount() {
-		if (!this.cache.length) {
+	useEffect(() => {
+		if (Object.keys(cache.current).length === 0) {
 			localForage.iterate((value, key) => {
-				this.updateCache(key, value);
+				updateCache(key, value);
+			}).then(() => {
+				console.log('loaded cache from localforage');
 			});
-			console.log('loaded cache from localforage');
 		}
-	}
+	}, [updateCache]);

-	render() {
-		const theme = this.state.theme;
-		document.body.style.backgroundColor = theme === 'dark' ? '#000' : '#eeeeee';
+	useEffect(() => {
+		const onFullScreenChange = () => setIsFullScreen(!!document.fullscreenElement);
+		document.addEventListener('fullscreenchange', onFullScreenChange);
+		return () => document.removeEventListener('fullscreenchange', onFullScreenChange);
+	}, []);

-		return (
-			<div className={theme}>
-				<Router>
-					<div className='container menu'>
-						<p>
-							<Link to='/'>QotNews - Feed</Link>
-							<span className='theme'>Theme: <a href='#' onClick={() => this.light()}>Light</a> - <a href='#' onClick={() => this.dark()}>Dark</a></span>
-							<br />
-							<span className='slogan'>Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode.</span>
-						</p>
-						<Route path='/(|search)' component={Search} />
-						<Route path='/(|search)' component={Submit} />
-					</div>
+	useLayoutEffect(() => {
+		if (theme === 'dark') {
+			document.body.style.backgroundColor = '#1a1a1a';
+		} else if (theme === 'black') {
+			document.body.style.backgroundColor = '#000';
+		} else if (theme === 'red') {
+			document.body.style.backgroundColor = '#000';
+		} else {
+			document.body.style.backgroundColor = '#eeeeee';
+		}
+	}, [theme]);

-					<Route path='/' exact render={(props) => <Feed {...props} updateCache={this.updateCache} />} />
-					<Switch>
-						<Route path='/search' component={Results} />
-						<Route path='/:id' exact render={(props) => <Article {...props} cache={this.cache} />} />
-					</Switch>
-					<Route path='/:id/c' exact render={(props) => <Comments {...props} cache={this.cache} />} />
+	useEffect(() => {
+		document.documentElement.style.fontSize = `${bodyFontSize}rem`;
+	}, [bodyFontSize]);

-					<ForwardDot />
+	const fontMap = {
+		'Sans Serif': 'sans-serif',
+		'Serif': 'serif',
+		'Apparatus SIL': "'Apparatus SIL', sans-serif"
+	};

-					<ScrollToTop />
-				</Router>
-			</div>
-		);
-	}
+	useEffect(() => {
+		document.body.style.fontFamily = fontMap[bodyFont];
+	}, [bodyFont]);
+
+	useEffect(() => {
+		const styleId = 'article-font-family-style';
+		let style = document.getElementById(styleId);
+		if (!style) {
+			style = document.createElement('style');
+			style.id = styleId;
+			document.head.appendChild(style);
+		}
+		style.innerHTML = `.story-text { font-family: ${fontMap[articleFont]} !important; }`;
+	}, [articleFont]);
+
+
+	return (
+		<div className={theme}>
+			<Settings
+				settingsOpen={settingsOpen}
+				setSettingsOpen={setSettingsOpen}
+				theme={theme}
+				setTheme={setTheme}
+				isFullScreen={isFullScreen}
+				filterSmallweb={filterSmallweb}
+				setFilterSmallweb={setFilterSmallweb}
+				feedSources={feedSources}
+				setFeedSources={setFeedSources}
+				bodyFontSize={bodyFontSize}
+				setBodyFontSize={setBodyFontSize}
+				defaultBodyFontSize={defaultBodyFontSize}
+				bodyFont={bodyFont}
+				setBodyFont={setBodyFont}
+				articleFont={articleFont}
+				setArticleFont={setArticleFont}
+			/>
+
+			{waitingWorker &&
+				<div className='update-banner'>
+					Client version mismatch, please refresh:{' '}
+					<button onClick={() => {
+						waitingWorker.postMessage({ type: 'SKIP_WAITING' });
+						const reload = () => window.location.reload();
+						navigator.serviceWorker.addEventListener('controllerchange', reload, { once: true });
+
+						// Fallback for when the controller has already changed (ie. in another tab)
+						navigator.serviceWorker.getRegistration().then(reg => {
+							if (!reg || !reg.waiting) {
+								reload();
+							}
+						});
+					}}>
+						Refresh
+					</button>
+				</div>
+			}
+			<Router>
+				<div className='container menu'>
+					<p>
+						<Link to='/'>QotNews</Link>
+
+						<button className="settings-button" onClick={() => setSettingsOpen(true)}>Settings</button>
+						<br />
+						<span className='slogan'>Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode.</span>
+					</p>
+					<Route path='/(|search)' component={Search} />
+					<Route path='/(|search)' component={Submit} />
+				</div>
+
+				<Route path='/' exact render={(props) => <Feed {...props} updateCache={updateCache} filterSmallweb={filterSmallweb} feedSources={feedSources} />} />
+				<Switch>
+					<Route path='/search' component={Results} />
+					<Route path='/:id' exact render={(props) => <Article {...props} cache={cache.current} />} />
+				</Switch>
+				<Route path='/:id/c' exact render={(props) => <Comments {...props} cache={cache.current} />} />
+
+				<BackwardDot />
+				<ForwardDot />
+
+				<ScrollToTop />
+			</Router>
+		</div>
+	);
 }

 export default App;
@@ -0,0 +1,237 @@
+import React, { useState, useEffect } from 'react';
+import { useParams } from 'react-router-dom';
+import { Helmet } from 'react-helmet';
+import localForage from 'localforage';
+import { sourceLink, similarLink, infoLine, ToggleDot } from './utils.js';
+import Latex from 'react-latex-next';
+import 'katex/dist/katex.min.css';
+
+const VOID_ELEMENTS = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr'];
+const DANGEROUS_TAGS = ['svg', 'math'];
+
+const latexDelimiters = [
+    { left: '$$', right: '$$', display: true },
+    { left: '\\[', right: '\\]', display: true },
+    { left: '$', right: '$', display: false },
+    { left: '\\(', right: '\\)', display: false }
+];
+
+function Article({ cache }) {
+	const { id } = useParams();
+
+	if (id in cache) console.log('cache hit');
+
+	const [story, setStory] = useState(cache[id] || false);
+	const [error, setError] = useState('');
+	const [pConv, setPConv] = useState([]);
+	const [copyButtonText, setCopyButtonText] = useState('\ue92c');
+
+	useEffect(() => {
+		localForage.getItem(id)
+			.then(
+				(value) => {
+					if (value) {
+						setStory(value);
+					}
+				}
+			);
+
+		fetch('/api/' + id)
+			.then(res => {
+				if (!res.ok) {
+					throw new Error(`Server responded with ${res.status} ${res.statusText}`);
+				}
+				return res.json();
+			})
+			.then(
+				(result) => {
+					setStory(result.story);
+					localForage.setItem(id, result.story);
+				},
+				(error) => {
+					const errorMessage = `Failed to fetch new article content (ID: ${id}). Your connection may be down or the server might be experiencing issues. ${error.toString()}.`;
+					setError(errorMessage);
+				}
+			);
+	}, [id]);
+
+	const copyLink = () => {
+		navigator.clipboard.writeText(`${story.title}:\n${window.location.href}`).then(() => {
+			setCopyButtonText('\uea10');
+			setTimeout(() => setCopyButtonText('\ue92c'), 2000);
+		}, () => {
+			setCopyButtonText('\uea0f');
+			setTimeout(() => setCopyButtonText('\ue92c'), 2000);
+		});
+	};
+
+	const pConvert = (n) => {
+		setPConv(prevPConv => [...prevPConv, n]);
+	};
+
+	const isCodeBlock = (v) => {
+		if (v.localName === 'pre') {
+			return true;
+		}
+
+		if (v.localName === 'code') {
+			if (v.closest('p')) {
+				return false;
+			}
+			const parent = v.parentElement;
+			if (parent) {
+				const nonWhitespaceChildren = Array.from(parent.childNodes).filter(n => {
+					return n.nodeType !== Node.TEXT_NODE || n.textContent.trim() !== '';
+				});
+				if (nonWhitespaceChildren.length === 1 && nonWhitespaceChildren[0] === v) {
+					return true;
+				}
+			}
+		}
+		return false;
+	};
+
+	const renderNodes = (nodes, keyPrefix = '') => {
+		return Array.from(nodes).map((v, k) => {
+			const key = `${keyPrefix}${k}`;
+			if (pConv.includes(key)) {
+				return (
+					<React.Fragment key={key}>
+						{v.textContent.split('\n\n').map((x, i) =>
+							<p key={i}>{x}</p>
+						)}
+					</React.Fragment>
+				);
+			}
+
+			if (v.nodeName === '#text') {
+				const text = v.data;
+				if (text.includes('\\[') || text.includes('\\(') || text.includes('$$') || /\$(?:[^$]*[^\s$])\$/.test(text)) {
+					return <Latex key={key} delimiters={latexDelimiters}>{text}</Latex>;
+				}
+
+				// Only wrap top-level text nodes in <p>
+				if (keyPrefix === '' && v.data.trim() !== '') {
+					return <p key={key}>{v.data}</p>;
+				}
+				return v.data;
+			}
+
+			if (v.nodeType !== Node.ELEMENT_NODE) {
+				return null;
+			}
+
+			if (DANGEROUS_TAGS.includes(v.localName)) {
+				return <span key={key} dangerouslySetInnerHTML={{ __html: v.outerHTML }} />;
+			}
+
+			const Tag = v.localName;
+			if (isCodeBlock(v)) {
+				return (
+					<React.Fragment key={key}>
+						<Tag dangerouslySetInnerHTML={{ __html: v.innerHTML }} />
+						<button onClick={() => pConvert(key)}>Convert Code to Paragraph</button>
+					</React.Fragment>
+				);
+			}
+
+			const textContent = v.textContent.trim();
+			const isMath = (textContent.startsWith('\\(') && textContent.endsWith('\\)')) ||
+				(textContent.startsWith('\\[') && textContent.endsWith('\\]')) ||
+				(textContent.startsWith('$$') && textContent.endsWith('$$')) ||
+				(textContent.startsWith('$') && textContent.endsWith('$') && textContent.indexOf('$') !== textContent.lastIndexOf('$') && !/\s/.test(textContent.charAt(textContent.length - 2)));
+
+			const props = { key: key };
+			if (v.hasAttributes()) {
+				for (const attr of v.attributes) {
+					const name = attr.name === 'class' ? 'className' : attr.name;
+					props[name] = attr.value;
+				}
+			}
+
+			if (isMath) {
+				let mathContent = v.textContent;
+				// align environment requires display math mode
+				if (mathContent.includes('\\begin{align')) {
+					const trimmed = mathContent.trim();
+					if (trimmed.startsWith('\\(')) {
+						// Replace \( and \) with \[ and \] to switch to display mode
+						const firstParen = mathContent.indexOf('\\(');
+						const lastParen = mathContent.lastIndexOf('\\)');
+						mathContent = mathContent.substring(0, firstParen) + '\\[' + mathContent.substring(firstParen + 2, lastParen) + '\\]' + mathContent.substring(lastParen + 2);
+					} else if (trimmed.startsWith('$') && !trimmed.startsWith('$$')) {
+						// Replace $ with $$
+						const firstDollar = mathContent.indexOf('$');
+						const lastDollar = mathContent.lastIndexOf('$');
+						if (firstDollar !== lastDollar) {
+							mathContent = mathContent.substring(0, firstDollar) + '$$' + mathContent.substring(firstDollar + 1, lastDollar) + '$$' + mathContent.substring(lastDollar + 1);
+						}
+					}
+				}
+				return <Tag {...props}><Latex delimiters={latexDelimiters}>{mathContent}</Latex></Tag>;
+			}
+
+			if (VOID_ELEMENTS.includes(Tag)) {
+				return <Tag {...props} />;
+			}
+
+			return (
+				<Tag {...props}>
+					{renderNodes(v.childNodes, `${key}-`)}
+				</Tag>
+			);
+		});
+	};
+
+	const nodes = (s) => {
+		if (s && s.text) {
+			let div = document.createElement('div');
+			div.innerHTML = s.text;
+			return div.childNodes;
+		}
+		return null;
+	};
+
+	const storyNodes = nodes(story);
+
+	return (
+		<div className='article-container'>
+			{error &&
+				<details style={{marginBottom: '1rem'}}>
+					<summary>Connection error? Click to expand.</summary>
+					<p>{error}</p>
+					{story && <p>Loaded article from cache.</p>}
+				</details>
+			}
+			{story ?
+				<div className='article'>
+					<Helmet>
+						<title>{story.title} | QotNews</title>
+						<meta name="robots" content="noindex" />
+					</Helmet>
+
+					<h1>{story.title} <button className='copy-button' onClick={copyLink}>{copyButtonText}</button></h1>
+
+					<div className='info'>
+						Source: {sourceLink(story)} | {similarLink(story)}
+					</div>
+
+					{infoLine(story)}
+
+					{storyNodes ?
+						<div className='story-text'>
+							{renderNodes(storyNodes)}
+						</div>
+					:
+						<p>Problem getting article :(</p>
+					}
+				</div>
+			:
+				<p>Loading...</p>
+			}
+			<ToggleDot id={id} article={false} />
+		</div>
+	);
+}
+
+export default Article;
@@ -0,0 +1,140 @@
+import React, { useState, useEffect } from 'react';
+import { Link, useParams } from 'react-router-dom';
+import { HashLink } from 'react-router-hash-link';
+import { Helmet } from 'react-helmet';
+import moment from 'moment';
+import localForage from 'localforage';
+import { infoLine, ToggleDot } from './utils.js';
+
+function countComments(c) {
+	return c.comments.reduce((sum, x) => sum + countComments(x), 1);
+}
+
+function Comments({ cache }) {
+	const { id } = useParams();
+
+	if (id in cache) console.log('cache hit');
+
+	const [story, setStory] = useState(cache[id] || false);
+	const [error, setError] = useState('');
+	const [collapsed, setCollapsed] = useState([]);
+	const [expanded, setExpanded] = useState([]);
+
+	useEffect(() => {
+		localForage.getItem(id)
+			.then(
+				(value) => {
+					if (value) {
+						setStory(value);
+					}
+				}
+			);
+
+		fetch('/api/' + id)
+			.then(res => {
+				if (!res.ok) {
+					throw new Error(`Server responded with ${res.status} ${res.statusText}`);
+				}
+				return res.json();
+			})
+			.then(
+				(result) => {
+					setStory(result.story);
+					localForage.setItem(id, result.story);
+					const hash = window.location.hash.substring(1);
+					if (hash) {
+						setTimeout(() => {
+							const element = document.getElementById(hash);
+							if (element) {
+								element.scrollIntoView();
+							}
+						}, 0);
+					}
+				},
+				(error) => {
+					const errorMessage = `Failed to fetch comments (ID: ${id}). Your connection may be down or the server might be experiencing issues. ${error.toString()}.`;
+					setError(errorMessage);
+				}
+			);
+	}, [id]);
+
+	const collapseComment = (cid) => {
+		setCollapsed(prev => [...prev, cid]);
+		setExpanded(prev => prev.filter(x => x !== cid));
+	};
+
+	const expandComment = (cid) => {
+		setCollapsed(prev => prev.filter(x => x !== cid));
+		setExpanded(prev => [...prev, cid]);
+	};
+
+	const displayComment = (story, c, level) => {
+		const cid = c.author+c.date;
+
+		const isCollapsed = collapsed.includes(cid);
+		const isExpanded = expanded.includes(cid);
+
+		const hidden = isCollapsed || (level == 4 && !isExpanded);
+		const hasChildren = c.comments.length !== 0;
+
+		return (
+			<div className={level ? 'comment lined' : 'comment'} key={cid}>
+				<div className='info'>
+					<p>
+						{c.author === story.author ? '[OP]' : ''} {c.author || '[Deleted]'}
+						{' '} | <HashLink to={'#'+cid} id={cid}>{moment.unix(c.date).fromNow()}</HashLink>
+
+						{hidden || hasChildren &&
+							<button className='collapser pointer' onClick={() => collapseComment(cid)}>–</button>
+						}
+					</p>
+				</div>
+
+				<div className={isCollapsed ? 'text hidden' : 'text'}  dangerouslySetInnerHTML={{ __html: c.text || '<p>[Empty / deleted comment]</p>'}} />
+
+				{hidden && hasChildren ?
+					<button className='comment lined info pointer' onClick={() => expandComment(cid)}>[show {countComments(c)-1} more]</button>
+				:
+					c.comments.map(i => displayComment(story, i, level + 1))
+				}
+			</div>
+		);
+	};
+
+	return (
+		<div className='container'>
+			{error &&
+				<details style={{marginBottom: '1rem'}}>
+					<summary>Connection error? Click to expand.</summary>
+					<p>{error}</p>
+					{story && <p>Loaded comments from cache.</p>}
+				</details>
+			}
+			{story ?
+				<div className='article'>
+					<Helmet>
+						<title>{story.title} | QotNews</title>
+						<meta name="robots" content="noindex" />
+					</Helmet>
+
+					<h1>{story.title}</h1>
+
+					<div className='info'>
+						<Link to={'/' + story.id}>View article</Link>
+					</div>
+
+					{infoLine(story)}
+
+					<div className='comments'>
+						{story.comments.map(c => displayComment(story, c, 0))}
+					</div>
+				</div>
+			:
+				<p>loading...</p>
+			}
+			<ToggleDot id={id} article={true} />
+		</div>
+	);
+}
+
+export default Comments;
@@ -0,0 +1,177 @@
+import React, { useState, useEffect, useRef } from 'react';
+import { Link } from 'react-router-dom';
+import { Helmet } from 'react-helmet';
+import localForage from 'localforage';
+import { sourceLink, infoLine, logos } from './utils.js';
+
+function Feed({ updateCache, filterSmallweb, feedSources }) {
+	const [stories, setStories] = useState(() => JSON.parse(localStorage.getItem('stories')) || false);
+	const [error, setError] = useState('');
+	const [loadingStatus, setLoadingStatus] = useState(null);
+	const isInitialMount = useRef(true);
+
+	useEffect(() => {
+		if (isInitialMount.current) {
+			isInitialMount.current = false;
+		} else {
+			setStories(false);
+		}
+	}, [filterSmallweb, feedSources]);
+
+	useEffect(() => {
+		const controller = new AbortController();
+
+		if ('serviceWorker' in navigator) {
+			navigator.serviceWorker.getRegistration().then(reg => {
+				if (reg) {
+					console.log('Checking for client update...');
+					reg.update();
+				}
+			});
+		}
+
+		const params = new URLSearchParams();
+		if (filterSmallweb) {
+			params.append('smallweb', 'true');
+		}
+
+		const allSources = Object.keys(feedSources);
+		const enabledSources = allSources.filter(key => feedSources[key]);
+
+		if (enabledSources.length > 0 && enabledSources.length < allSources.length) {
+			enabledSources.forEach(source => params.append('source', source));
+		}
+		const apiUrl = `/api?${params.toString()}`;
+
+		fetch(apiUrl, { signal: controller.signal })
+			.then(res => {
+				if (!res.ok) {
+					throw new Error(`Server responded with ${res.status} ${res.statusText}`);
+				}
+				return res.json();
+			})
+			.then(
+				async (result) => {
+					const newApiStories = result.stories;
+
+					const updated = !stories || stories.map(s => s.id).join() !== newApiStories.map(s => s.id).join();
+					console.log('New stories available:', updated);
+
+					if (!updated) return;
+
+					setLoadingStatus({ current: 0, total: newApiStories.length });
+
+					let currentStories = Array.isArray(stories) ? [...stories] : [];
+					let preloadedCount = 0;
+
+					for (const [index, newStory] of newApiStories.entries()) {
+						if (controller.signal.aborted) {
+							break;
+						}
+						try {
+							const storyFetchController = new AbortController();
+							const timeoutId = setTimeout(() => storyFetchController.abort(), 10000); // 10-second timeout
+							const storyRes = await fetch('/api/' + newStory.id, { signal: storyFetchController.signal });
+							clearTimeout(timeoutId);
+
+							if (!storyRes.ok) {
+								throw new Error(`Server responded with ${storyRes.status} ${storyRes.statusText}`);
+							}
+							const storyResult = await storyRes.json();
+							const fullStory = storyResult.story;
+
+							await localForage.setItem(fullStory.id, fullStory);
+							console.log('Preloaded story:', fullStory.id, fullStory.title);
+							updateCache(fullStory.id, fullStory);
+							preloadedCount++;
+							setLoadingStatus({ current: preloadedCount, total: newApiStories.length });
+
+							const existingStoryIndex = currentStories.findIndex(s => s.id === newStory.id);
+							if (existingStoryIndex > -1) {
+								currentStories.splice(existingStoryIndex, 1);
+							}
+							currentStories.splice(index, 0, newStory);
+
+							localStorage.setItem('stories', JSON.stringify(currentStories));
+							setStories(currentStories);
+						} catch (error) {
+							let errorMessage;
+							if (error.name === 'AbortError') {
+								errorMessage = `The request to fetch story '${newStory.title}' (${newStory.id}) timed out after 10 seconds. Your connection may be unstable. (${preloadedCount} / ${newApiStories.length} stories preloaded)`;
+								console.log('Fetch timed out for story:', newStory.id);
+							} else {
+								errorMessage = `An error occurred while fetching story '${newStory.title}' (ID: ${newStory.id}): ${error.toString()}. (${preloadedCount} / ${newApiStories.length} stories preloaded)`;
+								console.log('Fetch failed for story:', newStory.id, error);
+							}
+							setError(errorMessage);
+							break;
+						}
+					}
+
+					const finalStories = currentStories.slice(0, newApiStories.length);
+					const removedStories = currentStories.slice(newApiStories.length);
+					for (const story of removedStories) {
+						console.log('Removed story:', story.id, story.title);
+						localForage.removeItem(story.id);
+					}
+
+					localStorage.setItem('stories', JSON.stringify(finalStories));
+					setStories(finalStories);
+					setLoadingStatus(null);
+				},
+				(error) => {
+					if (error.name === 'AbortError') {
+						console.log('Feed fetch aborted.');
+						return;
+					}
+					const errorMessage = `Failed to fetch the main story list from the API. Your connection may be down or the server might be experiencing issues. ${error.toString()}.`;
+					setError(errorMessage);
+				}
+			);
+
+		return () => controller.abort();
+	}, [updateCache, filterSmallweb, feedSources]);
+
+	return (
+		<div className='container'>
+			<Helmet>
+				<title>QotNews</title>
+				<meta name="robots" content="index" />
+			</Helmet>
+
+			{error &&
+				<details style={{marginBottom: '1rem'}}>
+					<summary>Connection error? Click to expand.</summary>
+					<p>{error}</p>
+					{stories && <p>Loaded feed from cache.</p>}
+				</details>
+			}
+
+			{stories ?
+				<div>
+					{stories.map(x =>
+						<div className='item' key={x.id}>
+							<div className='title'>
+								<Link className='link' to={'/' + x.id}>
+									<img className='source-logo' src={logos[x.source]} alt='source logo' /> {x.title}
+								</Link>
+
+								<span className='source'>
+									({sourceLink(x)})
+								</span>
+							</div>
+
+							{infoLine(x)}
+						</div>
+					)}
+				</div>
+			:
+				<p>Loading...</p>
+			}
+
+			{loadingStatus && <p>Preloading stories {loadingStatus.current} / {loadingStatus.total}...</p>}
+		</div>
+	);
+}
+
+export default Feed;
@@ -0,0 +1,73 @@
+import React, { useState, useEffect } from 'react';
+import { Link, useLocation } from 'react-router-dom';
+import { Helmet } from 'react-helmet';
+import { sourceLink, infoLine, logos } from './utils.js';
+import AbortController from 'abort-controller';
+
+function Results() {
+	const [stories, setStories] = useState(false);
+	const [error, setError] = useState(false);
+	const location = useLocation();
+
+	useEffect(() => {
+		const controller = new AbortController();
+		const signal = controller.signal;
+
+		const search = location.search;
+		fetch('/api/search' + search, { method: 'get', signal: signal })
+			.then(res => res.json())
+			.then(
+				(result) => {
+					setStories(result.hits);
+				},
+				(error) => {
+					if (error.message !== 'The operation was aborted. ') {
+						setError(true);
+					}
+				}
+			);
+
+		return () => {
+			controller.abort();
+		};
+	}, [location.search]);
+
+	return (
+		<div className='container'>
+			<Helmet>
+				<title>Search Results | QotNews</title>
+			</Helmet>
+			{error && <p>Connection error?</p>}
+			{stories ?
+				<>
+					<p>Search results:</p>
+					<div className='comment lined'>
+						{stories.length ?
+							stories.map(x =>
+								<div className='item' key={x.id}>
+									<div className='title'>
+										<Link className='link' to={'/' + x.id}>
+											<img className='source-logo' src={logos[x.source]} alt='source logo' /> {x.title}
+										</Link>
+
+										<span className='source'>
+											({sourceLink(x)})
+										</span>
+									</div>
+
+									{infoLine(x)}
+								</div>
+							)
+						:
+							<p>none</p>
+						}
+					</div>
+				</>
+			:
+				<p>loading...</p>
+			}
+		</div>
+	);
+}
+
+export default Results;
@@ -15,6 +15,7 @@ class ScrollToTop extends React.Component {
 		}

 		window.scrollTo(0, 0);
+		document.body.scrollTop = 0;
 	}

 	render() {
@@ -1,51 +1,46 @@
-import React, { Component } from 'react';
-import { withRouter } from 'react-router-dom';
+import React, { useState, useRef } from 'react';
+import { useHistory, useLocation } from 'react-router-dom';
 import queryString from 'query-string';

-const getSearch = props => queryString.parse(props.location.search).q;
+const getSearch = location => queryString.parse(location.search).q || '';

-class Search extends Component {
-	constructor(props) {
-		super(props);
+function Search() {
+	const history = useHistory();
+	const location = useLocation();

-		this.state = {search: getSearch(this.props)};
-		this.inputRef = React.createRef();
-	}
+	const [search, setSearch] = useState(getSearch(location));
+	const inputRef = useRef(null);

-	searchArticles = (event) => {
-		const search = event.target.value;
-		this.setState({search: search});
-		if (search.length >= 3) {
-			const searchQuery = queryString.stringify({ 'q': search });
-			this.props.history.replace('/search?' + searchQuery);
+	const searchArticles = (event) => {
+		const newSearch = event.target.value;
+		setSearch(newSearch);
+		if (newSearch.length >= 3) {
+			const searchQuery = queryString.stringify({ 'q': newSearch });
+			history.replace('/search?' + searchQuery);
 		} else {
-			this.props.history.replace('/');
+			history.replace('/');
 		}
 	}

-	searchAgain = (event) => {
+	const searchAgain = (event) => {
 		event.preventDefault();
 		const searchString = queryString.stringify({ 'q': event.target[0].value });
-		this.props.history.push('/search?' + searchString);
-		this.inputRef.current.blur();
+		history.push('/search?' + searchString);
+		inputRef.current.blur();
 	}

-	render() {
-		const search = this.state.search;
-
-		return (
-			<span className='search'>
-				<form onSubmit={this.searchAgain}>
-					<input
-						placeholder='Search... (fixed)'
-						value={search}
-						onChange={this.searchArticles}
-						ref={this.inputRef}
-					/>
-				</form>
-			</span>
-		);
-	}
+	return (
+		<span className='search'>
+			<form onSubmit={searchAgain}>
+				<input
+					placeholder='Search...'
+					value={search}
+					onChange={searchArticles}
+					ref={inputRef}
+				/>
+			</form>
+		</span>
+	);
 }

-export default withRouter(Search);
+export default Search;
@@ -0,0 +1,191 @@
+import React from 'react';
+
+function Settings({
+	settingsOpen,
+	setSettingsOpen,
+	theme,
+	setTheme,
+	isFullScreen,
+	filterSmallweb,
+	setFilterSmallweb,
+	feedSources,
+	setFeedSources,
+	bodyFontSize,
+	setBodyFontSize,
+	defaultBodyFontSize,
+	bodyFont,
+	setBodyFont,
+	articleFont,
+	setArticleFont,
+}) {
+	const light = () => {
+		setTheme('');
+		localStorage.setItem('theme', '');
+	};
+
+	const dark = () => {
+		setTheme('dark');
+		localStorage.setItem('theme', 'dark');
+	};
+
+	const black = () => {
+		setTheme('black');
+		localStorage.setItem('theme', 'black');
+	};
+
+	const red = () => {
+		setTheme('red');
+		localStorage.setItem('theme', 'red');
+	};
+
+	const handleFilterChange = e => {
+		const isChecked = e.target.checked;
+		setFilterSmallweb(isChecked);
+		localStorage.setItem('filterSmallweb', isChecked);
+	};
+
+	const handleFeedSourceChange = (source) => {
+		setFeedSources(prevSources => {
+			const newSources = { ...prevSources, [source]: !prevSources[source] };
+			localStorage.setItem('feedSources', JSON.stringify(newSources));
+			return newSources;
+		});
+	};
+
+	const changeBodyFont = (font) => {
+		setBodyFont(font);
+		localStorage.setItem('bodyFont', font);
+	};
+
+	const changeArticleFont = (font) => {
+		setArticleFont(font);
+		localStorage.setItem('articleFont', font);
+	};
+
+	const changeBodyFontSize = (amount) => {
+		const newSize = bodyFontSize + amount;
+		setBodyFontSize(parseFloat(newSize.toFixed(2)));
+		localStorage.setItem('bodyFontSize', newSize.toFixed(2));
+	};
+
+	const resetBodyFontSize = () => {
+		setBodyFontSize(defaultBodyFontSize);
+		localStorage.removeItem('bodyFontSize');
+	};
+
+	const bodyFontSettingsChanged = bodyFontSize !== defaultBodyFontSize;
+
+	const goFullScreen = () => {
+		if ('wakeLock' in navigator) {
+			navigator.wakeLock.request('screen');
+		}
+		document.body.requestFullscreen({ navigationUI: 'hide' });
+	};
+
+	const exitFullScreen = () => {
+		document.exitFullscreen();
+	};
+
+	const fullScreenAvailable = document.fullscreenEnabled ||
+		document.mozFullscreenEnabled ||
+		document.webkitFullscreenEnabled ||
+		document.msFullscreenEnabled;
+
+	if (!settingsOpen) {
+		return null;
+	}
+
+	return (
+		<>
+			<div className="modal-overlay" onClick={() => setSettingsOpen(false)}></div>
+
+			<div className="modal-content" onClick={e => e.stopPropagation()}>
+				<button className="close-modal-button" onClick={() => setSettingsOpen(false)}>&times;</button>
+				<h3>Settings</h3>
+
+				<div className="setting-group">
+					<h4>Theme</h4>
+					<button className={theme === '' ? 'active' : ''} onClick={() => { light(); setSettingsOpen(false); }}>Light</button>
+					<button className={theme === 'dark' ? 'active' : ''} onClick={() => { dark(); setSettingsOpen(false); }}>Dark</button>
+					<button className={theme === 'black' ? 'active' : ''} onClick={() => { black(); setSettingsOpen(false); }}>Black</button>
+					<button className={theme === 'red' ? 'active' : ''} onClick={() => { red(); setSettingsOpen(false); }}>Red</button>
+					{fullScreenAvailable &&
+						<div style={{ marginTop: '0.5rem' }}>
+							{!isFullScreen ?
+								<button onClick={() => { goFullScreen(); setSettingsOpen(false); }}>Enter Fullscreen</button>
+								:
+								<button onClick={() => { exitFullScreen(); setSettingsOpen(false); }}>Exit Fullscreen</button>
+							}
+						</div>
+					}
+				</div>
+
+				<div className="setting-group">
+					<h4>Feed</h4>
+					<div className="font-option gap">
+						<input className="checkbox" type="checkbox" id="filter-smallweb" checked={filterSmallweb} onChange={handleFilterChange} />
+						<label htmlFor="filter-smallweb">Small websites only</label>
+					</div>
+					<div className="font-option">
+						<input className="checkbox" type="checkbox" id="filter-hackernews" name="feed-source" checked={feedSources.hackernews} onChange={() => handleFeedSourceChange('hackernews')} />
+						<label htmlFor="filter-hackernews">Hacker News</label>
+					</div>
+					<div className="font-option">
+						<input className="checkbox" type="checkbox" id="filter-reddit" name="feed-source" checked={feedSources.reddit} onChange={() => handleFeedSourceChange('reddit')} />
+						<label htmlFor="filter-reddit">Reddit</label>
+					</div>
+					<div className="font-option">
+						<input className="checkbox" type="checkbox" id="filter-lobsters" name="feed-source" checked={feedSources.lobsters} onChange={() => handleFeedSourceChange('lobsters')} />
+						<label htmlFor="filter-lobsters">Lobsters</label>
+					</div>
+					<div className="font-option">
+						<input className="checkbox" type="checkbox" id="filter-tildes" name="feed-source" checked={feedSources.tildes} onChange={() => handleFeedSourceChange('tildes')} />
+						<label htmlFor="filter-tildes">Tildes</label>
+					</div>
+				</div>
+
+				<div className="setting-group">
+					<h4>Font Size</h4>
+					<button onClick={() => changeBodyFontSize(-0.05)}>-</button>
+					<span className="font-size-display">{bodyFontSize.toFixed(2)}</span>
+					<button onClick={() => changeBodyFontSize(0.05)}>+</button>
+					<button onClick={resetBodyFontSize} disabled={!bodyFontSettingsChanged}>Reset</button>
+				</div>
+
+				<div className="setting-group">
+					<h4>Body Font</h4>
+					<div className="font-option">
+						<input className="checkbox" type="radio" id="body-sans-serif" name="body-font" value="Sans Serif" checked={bodyFont === 'Sans Serif'} onChange={() => changeBodyFont('Sans Serif')} />
+						<label htmlFor="body-sans-serif">Sans Serif *</label>
+					</div>
+					<div className="font-option">
+						<input className="checkbox" type="radio" id="body-serif" name="body-font" value="Serif" checked={bodyFont === 'Serif'} onChange={() => changeBodyFont('Serif')} />
+						<label htmlFor="body-serif">Serif</label>
+					</div>
+					<div className="font-option">
+						<input className="checkbox" type="radio" id="body-apparatus" name="body-font" value="Apparatus SIL" checked={bodyFont === 'Apparatus SIL'} onChange={() => changeBodyFont('Apparatus SIL')} />
+						<label htmlFor="body-apparatus">Apparatus SIL</label>
+					</div>
+				</div>
+
+				<div className="setting-group">
+					<h4>Article Font</h4>
+					<div className="font-option">
+						<input className="checkbox" type="radio" id="article-sans-serif" name="article-font" value="Sans Serif" checked={articleFont === 'Sans Serif'} onChange={() => changeArticleFont('Sans Serif')} />
+						<label htmlFor="article-sans-serif">Sans Serif</label>
+					</div>
+					<div className="font-option">
+						<input className="checkbox" type="radio" id="article-serif" name="article-font" value="Serif" checked={articleFont === 'Serif'} onChange={() => changeArticleFont('Serif')} />
+						<label htmlFor="article-serif">Serif</label>
+					</div>
+					<div className="font-option">
+						<input className="checkbox" type="radio" id="article-apparatus" name="article-font" value="Apparatus SIL" checked={articleFont === 'Apparatus SIL'} onChange={() => changeArticleFont('Apparatus SIL')} />
+						<label htmlFor="article-apparatus">Apparatus SIL *</label>
+					</div>
+				</div>
+			</div>
+		</>
+	);
+}
+
+export default Settings;
@@ -0,0 +1,103 @@
+.black {
+	color: #ddd;
+}
+
+.black a {
+	color: #ddd;
+}
+
+.black input {
+	color: #ddd;
+	border: 1px solid #828282;
+}
+
+.black .menu button,
+.black .story-text button {
+	background-color: #222222;
+	border-color: #bbb;
+	color: #ddd;
+}
+
+.black .item {
+	color: #828282;
+}
+
+.black .item .source-logo {
+	filter: grayscale(1);
+}
+
+.black .item a {
+	color: #828282;
+}
+
+.black .item a.link {
+	color: #ddd;
+}
+.black .item a.link:visited {
+	color: #828282;
+}
+
+.black .item .info a.hot {
+	color: #cccccc;
+}
+
+.black .article a {
+	border-bottom: 1px solid #aaaaaa;
+}
+
+.black .article u {
+	border-bottom: 1px solid #aaaaaa;
+	text-decoration: none;
+}
+
+.black .story-text video,
+.black .story-text img {
+	filter: brightness(50%);
+}
+
+.black .article .info {
+	color: #828282;
+}
+
+.black .article .info a {
+	border-bottom: none;
+	color: #828282;
+}
+
+.black .comment.lined {
+	border-left: 1px solid #444444;
+}
+
+.black .checkbox:checked + label::after {
+	border-color: #eee;
+}
+
+.black .copy-button {
+	color: #828282;
+}
+
+.black .update-banner {
+	background-color: #333;
+	color: #ddd;
+}
+
+.black .update-banner button {
+	background-color: #222222;
+	border-color: #bbb;
+	color: #ddd;
+}
+
+.black .modal-content {
+	background: #222;
+	border-color: #828282;
+	color: #ddd;
+}
+
+.black .modal-content button {
+	background-color: #222222;
+	border-color: #bbb;
+}
+
+.black .modal-content button.active {
+	background-color: #555;
+}
@@ -11,12 +11,15 @@
 	border: 1px solid #828282;
 }

-.dark .item {
-	color: #828282;
+.dark .menu button,
+.dark .story-text button {
+	background-color: #222222;
+	border-color: #bbb;
+	color: #ddd;
 }

-.dark .item .source-logo {
-	filter: grayscale(1);
+.dark .item {
+	color: #828282;
 }

 .dark .item a {
@@ -43,6 +46,7 @@
 	text-decoration: none;
 }

+.dark .story-text video,
 .dark .story-text img {
 	filter: brightness(50%);
 }
@@ -59,3 +63,37 @@
 .dark .comment.lined {
 	border-left: 1px solid #444444;
 }
+
+.dark .checkbox:checked + label::after {
+	border-color: #eee;
+}
+
+.dark .copy-button {
+	color: #828282;
+}
+
+.dark .update-banner {
+	background-color: #333;
+	color: #ddd;
+}
+
+.dark .update-banner button {
+	background-color: #222222;
+	border-color: #bbb;
+	color: #ddd;
+}
+
+.dark .modal-content {
+	background: #222;
+	border-color: #828282;
+	color: #ddd;
+}
+
+.dark .modal-content button {
+	background-color: #222222;
+	border-color: #bbb;
+}
+
+.dark .modal-content button.active {
+	background-color: #555;
+}
@@ -1,10 +1,32 @@
 body {
 	text-rendering: optimizeLegibility;
-	font: 1rem/1.3 sans-serif;
+	font-size: 1rem;
+	line-height: 1.3;
 	color: #000000;
-	margin-bottom: 100vh;
 	word-break: break-word;
 	font-kerning: normal;
+	margin: 0;
+}
+
+::backdrop {
+	background-color: rgba(0,0,0,0);
+}
+
+body:fullscreen {
+	overflow-y: scroll !important;
+}
+body:-ms-fullscreen {
+	overflow-y: scroll !important;
+}
+body:-webkit-full-screen {
+	overflow-y: scroll !important;
+}
+body:-moz-full-screen {
+	overflow-y: scroll !important;
+}
+
+#root {
+	margin: 8px 8px 100vh 8px !important;
 }

 a {
@@ -22,10 +44,36 @@ input {
 	border-radius: 4px;
 }

+.update-banner {
+	background-color: #ddd;
+	padding: 0.75rem;
+	text-align: center;
+}
+
+.update-banner button {
+	margin-left: 1rem;
+	padding: 0.25rem 0.75rem;
+	border: 1px solid #828282;
+	border-radius: 4px;
+	background-color: transparent;
+	cursor: pointer;
+}
+
+.fullscreen {
+	margin: 0.25rem;
+	padding: 0.25rem;
+}
+
 pre {
 	overflow: auto;
 }

+.comments pre {
+	overflow: auto;
+	white-space: pre-wrap;
+	overflow-wrap: break-word;
+}
+
 .container {
 	margin: 1rem auto;
 	max-width: 64rem;
@@ -38,6 +86,7 @@ pre {

 .slogan {
 	color: #828282;
+	margin-bottom: 0.5rem;
 }

 .theme {
@@ -94,6 +143,13 @@ span.source {
 	border-bottom: 1px solid #222222;
 }

+.article-title {
+	display: flex;
+	align-items: center;
+	margin-top: 0.67em;
+	margin-bottom: 0.67em;
+}
+
 .article h1 {
 	font-size: 1.6rem;
 }
@@ -144,12 +200,20 @@ span.source {
 }

 .story-text {
-	font: 1.2rem/1.5 'Apparatus SIL', sans-serif;
+	font-size: 1.2rem;
+	line-height: 1.5;
 	margin-top: 1em;
 }

 .comments {
 	margin-left: -1.25rem;
+	margin-top: 0;
+	margin-bottom: 0;
+	padding: 0;
+}
+
+.comments dl, .comments dd {
+	margin: 0;
 }

 .comment {
@@ -162,6 +226,11 @@ span.source {

 .comment .text {
 	margin-top: -0.5rem;
+	margin-bottom: 1rem;
+}
+
+.comment .text > * {
+	margin-bottom: 0;
 }

 .comment .text.hidden > p {
@@ -181,20 +250,49 @@ span.source {
 	padding-right: 1.5rem;
 }

+button.collapser {
+	background: transparent;
+	border: none;
+	margin: 0;
+	padding-top: 0;
+	padding-bottom: 0;
+	font: inherit;
+	color: inherit;
+}
+
+button.comment {
+	background: transparent;
+	border-top: none;
+	border-right: none;
+	border-bottom: none;
+	margin: 0;
+	padding-top: 0;
+	padding-right: 0;
+	padding-bottom: 0;
+	font: inherit;
+	color: inherit;
+	text-align: left;
+	width: 100%;
+}
+
 .comment .pointer {
 	cursor: pointer;
 }

-.toggleDot {
+.dot {
+	cursor: pointer;
 	position: fixed;
-	bottom: 1rem;
-	left: 1rem;
 	height: 3rem;
 	width: 3rem;
 	background-color: #828282;
 	border-radius: 50%;
 }

+.toggleDot {
+	bottom: 1rem;
+	left: 1rem;
+}
+
 .toggleDot .button {
 	font: 2rem/1 'icomoon';
 	position: relative;
@@ -203,23 +301,204 @@ span.source {
 }

 .forwardDot {
-	cursor: pointer;
-	position: fixed;
 	bottom: 1rem;
 	right: 1rem;
-	height: 3rem;
-	width: 3rem;
-	background-color: #828282;
-	border-radius: 50%;
 }

 .forwardDot .button {
-	font: 2.5rem/1 'icomoon';
+	font: 2rem/1 'icomoon';
 	position: relative;
-	top: 0.25rem;
-	left: 0.3rem;
+	top: 0.5rem;
+	left: 0.5rem;
+}
+
+.backwardDot {
+	bottom: 1rem;
+	right: 5rem;
+}
+
+.backwardDot .button {
+	font: 2rem/1 'icomoon';
+	position: relative;
+	top: 0.5rem;
+	left: 0.5rem;
 }

 .search form {
 	display: inline;
 }
+
+.copy-button {
+	font: 1.5rem/1 'icomoon2';
+	color: #828282;
+	background: transparent;
+	border: none;
+	cursor: pointer;
+	vertical-align: middle;
+}
+
+.checkbox {
+	-webkit-appearance: none;
+	appearance: none;
+	position: absolute;
+	opacity: 0;
+	cursor: pointer;
+	height: 0;
+	width: 0;
+}
+
+.checkbox + label {
+	position: relative;
+	cursor: pointer;
+	padding-left: 1.75rem;
+	user-select: none;
+}
+
+.checkbox + label::before {
+	content: '';
+	position: absolute;
+	left: 0;
+	top: 0.1em;
+	width: 1rem;
+	height: 1rem;
+	border: 1px solid #828282;
+	background-color: transparent;
+	border-radius: 3px;
+}
+
+.checkbox:checked + label::after {
+	content: "";
+	position: absolute;
+	left: 0.35rem;
+	top: 0.2em;
+	width: 0.3rem;
+	height: 0.6rem;
+	border-style: solid;
+	border-color: #000;
+	border-width: 0 2px 2px 0;
+	transform: rotate(45deg);
+}
+
+.tooltip .tooltiptext {
+	visibility: hidden;
+	width: 140px;
+	background-color: #555;
+	color: #fff;
+	text-align: center;
+	border-radius: 6px;
+	padding: 5px 0;
+	position: absolute;
+	z-index: 1;
+	bottom: 110%;
+	left: 50%;
+	margin-left: -70px;
+	opacity: 0;
+	transition: opacity 0.2s;
+	font-size: 0.9rem;
+	line-height: 1.3;
+}
+
+.forwardDot .tooltiptext {
+	left: auto;
+	right: 0;
+	margin-left: 0;
+}
+
+.tooltip.show-tooltip .tooltiptext {
+	visibility: visible;
+	opacity: 1;
+}
+
+.settings-button {
+	float: right;
+	background: none;
+	border: none;
+	padding: 0;
+	cursor: pointer;
+	color: inherit;
+	font: inherit;
+}
+
+.modal-overlay {
+	position: fixed;
+	top: 0;
+	left: 0;
+	width: 100%;
+	height: 100%;
+	background-color: rgba(0,0,0,0.5);
+	z-index: 100;
+}
+
+.modal-content {
+	position: absolute;
+	top: 1rem;
+	right: 1rem;
+	background: #eee;
+	color: #000;
+	padding: 1rem;
+	border-radius: 4px;
+	z-index: 101;
+	min-width: 250px;
+	border: 1px solid #828282;
+}
+
+.modal-content h3, .modal-content h4 {
+	margin-top: 0;
+	margin-bottom: 0.5rem;
+}
+
+.modal-content .setting-group {
+	margin-bottom: 1rem;
+}
+
+.modal-content button {
+	margin-right: 0.5rem;
+	padding: 0.25rem 0.75rem;
+	border: 1px solid #828282;
+	border-radius: 4px;
+	background-color: transparent;
+	cursor: pointer;
+	font: inherit;
+	color: inherit;
+}
+
+.modal-content button:last-child {
+	margin-right: 0;
+}
+
+.modal-content button.active {
+	background-color: #ccc;
+}
+
+.modal-content button:disabled {
+	opacity: 0.5;
+	cursor: not-allowed;
+}
+
+.modal-content .font-size-display {
+	display: inline-block;
+	width: 50px;
+	text-align: center;
+	margin: 0 0.25rem;
+}
+
+.modal-content .font-option {
+	margin-bottom: 0.25rem;
+}
+
+.modal-content .gap {
+	margin-bottom: 0.75rem;
+}
+
+.close-modal-button {
+	position: absolute;
+	top: 0.5rem;
+	right: 0.75rem;
+	background: transparent;
+	border: none;
+	font-size: 1.5rem;
+	line-height: 1;
+	padding: 0;
+	cursor: pointer;
+	color: inherit;
+}
@@ -0,0 +1,121 @@
+.red {
+	color: #b00;
+	scrollbar-color: #b00 #440000;
+}
+
+.red a {
+	color: #b00;
+}
+
+.red input {
+	color: #b00;
+	border: 1px solid #690000;
+}
+
+.red input::placeholder {
+	color: #690000;
+}
+
+.red hr {
+	background-color: #690000;
+}
+
+.red .menu button,
+.red .story-text button {
+	background-color: #220000;
+	border-color: #b00;
+	color: #b00;
+}
+
+.red .item,
+.red .slogan {
+	color: #690000;
+}
+
+.red .item .source-logo {
+	display: none;
+}
+
+.red .item a {
+	color: #690000;
+}
+
+.red .item a.link {
+	color: #b00;
+}
+.red .item a.link:visited {
+	color: #690000;
+}
+
+.red .item .info a.hot {
+	color: #cc0000;
+}
+
+.red .article a {
+	border-bottom: 1px solid #aa0000;
+}
+
+.red .article u {
+	border-bottom: 1px solid #aa0000;
+	text-decoration: none;
+}
+
+.red .story-text video,
+.red .story-text img {
+	filter: grayscale(100%) brightness(20%) sepia(100%) hue-rotate(-50deg) saturate(600%) contrast(0.8);
+}
+
+.red .article .info {
+	color: #690000;
+}
+
+.red .article .info a {
+	border-bottom: none;
+	color: #690000;
+}
+
+.red .comment.lined {
+	border-left: 1px solid #440000;
+}
+
+.red .dot {
+	background-color: #440000;
+}
+
+.red .checkbox + label::before {
+	border: 1px solid #690000;
+}
+
+.red .checkbox:checked + label::after {
+	border-color: #dd0000;
+}
+
+.red .copy-button {
+	color: #690000;
+}
+
+.red .update-banner {
+	background-color: #300;
+	color: #d00;
+}
+
+.red .update-banner button {
+	background-color: #220000;
+	border-color: #b00;
+	color: #d00;
+}
+
+.red .modal-content {
+	background: #100;
+	border-color: #690000;
+	color: #b00;
+}
+
+.red .modal-content button {
+	background-color: #220000;
+	border-color: #b00;
+}
+
+.red .modal-content button.active {
+	background-color: #550000;
+}
@@ -1,54 +1,53 @@
-import React, { Component } from 'react';
-import { withRouter } from 'react-router-dom';
+import React, { useState, useRef } from 'react';
+import { useHistory } from 'react-router-dom';

-class Submit extends Component {
-	constructor(props) {
-		super(props);
+function Submit() {
+	const [progress, setProgress] = useState(null);
+	const inputRef = useRef(null);
+	const history = useHistory();

-		this.state = {
-			progress: null,
-		};
-
-		this.inputRef = React.createRef();
-	}
-
-	submitArticle = (event) => {
+	const submitArticle = async (event) => {
 		event.preventDefault();
 		const url = event.target[0].value;
-		this.inputRef.current.blur();
+		inputRef.current.blur();

-		this.setState({ progress: 'Submitting...' });
+		setProgress('Submitting...');

 		let data = new FormData();
 		data.append('url', url);

-		fetch('/api/submit', { method: 'POST', body: data })
-			.then(res => res.json())
-			.then(
-				(result) => {
-					this.props.history.replace('/' + result.nid);
-				},
-				(error) => {
-					this.setState({ progress: 'Error' });
+		try {
+			const res = await fetch('/api/submit', { method: 'POST', body: data });
+
+			if (res.ok) {
+				const result = await res.json();
+				history.replace('/' + result.nid);
+			} else {
+				let errorData;
+				try {
+					errorData = await res.json();
+				} catch (jsonError) {
+					// Not a JSON error from our API, so it's a server issue
+					throw new Error(`Server responded with ${res.status} ${res.statusText}`);
 				}
-			);
+				setProgress(errorData.error || 'An unknown error occurred.');
+			}
+		} catch (error) {
+			setProgress(`Error: ${error.toString()}`);
+		}
 	}

-	render() {
-		const progress = this.state.progress;
-
-		return (
-			<span className='search'>
-				<form onSubmit={this.submitArticle}>
-					<input
-						placeholder='Submit Article'
-						ref={this.inputRef}
-					/>
-				</form>
-				{progress ? progress : ''}
-			</span>
-		);
-	}
+	return (
+		<span className='search'>
+			<form onSubmit={submitArticle}>
+				<input
+					placeholder='Submit URL'
+					ref={inputRef}
+				/>
+			</form>
+			{progress && <p>{progress}</p>}
+		</span>
+	);
 }

-export default withRouter(Submit);
+export default Submit;
@@ -1,34 +0,0 @@
-import React from "react";
-import { Link } from "react-router-dom";
-import { sourceLink, infoLine, getLogoUrl } from "../utils.js";
-
-export class StoryItem extends React.Component {
-	constructor(props) {
-		super(props);
-	}
-
-	render() {
-		const story = this.props.story;
-		const { id, title } = story;
-
-		return (
-			<div className="item" key={id}>
-				<div className="title">
-					<Link className="link" to={"/" + id}>
-						<img
-							className="source-logo"
-							src={getLogoUrl(story)}
-							alt="source logo"
-						/>
-						{" "}
-						{title}
-					</Link>
-
-					<span className="source">({sourceLink(story)})</span>
-				</div>
-
-				{infoLine(story)}
-			</div>
-		);
-	}
-}
@@ -26,3 +26,8 @@
    font-family: 'Icomoon';
    src: url('icomoon.ttf') format('truetype');
 }
+
+@font-face {
+    font-family: 'Icomoon2';
+    src: url('icomoon2.ttf') format('truetype');
+}
@@ -3,9 +3,15 @@ import ReactDOM from 'react-dom';
 import App from './App';
 import * as serviceWorker from './serviceWorker';

+// version 4
+
 ReactDOM.render(<App />, document.getElementById('root'));

 // If you want your app to work offline and load faster, you can change
 // // unregister() to register() below. Note this comes with some pitfalls.
 // // Learn more about service workers: https://bit.ly/CRA-PWA
-serviceWorker.register();
+serviceWorker.register({
+  onUpdate: registration => {
+    window.dispatchEvent(new CustomEvent('swUpdate', { detail: registration }));
+  }
+});
@@ -1,112 +0,0 @@
-import React from 'react';
-import { Helmet } from 'react-helmet';
-import localForage from 'localforage';
-import { sourceLink, infoLine, ToggleDot } from '../utils.js';
-
-class Article extends React.Component {
-	constructor(props) {
-		super(props);
-
-		const id = this.props.match ? this.props.match.params.id : 'CLOL';
-		const cache = this.props.cache;
-
-		if (id in cache) console.log('cache hit');
-
-		this.state = {
-			story: cache[id] || false,
-			error: false,
-			pConv: [],
-		};
-	}
-
-	componentDidMount() {
-		const id = this.props.match ? this.props.match.params.id : 'CLOL';
-
-		localForage.getItem(id)
-			.then(
-				(value) => {
-					if (value) {
-						this.setState({ story: value });
-					}
-				}
-			);
-
-		fetch('/api/' + id)
-			.then(res => res.json())
-			.then(
-				(result) => {
-					this.setState({ story: result.story });
-					localForage.setItem(id, result.story);
-				},
-				(error) => {
-					this.setState({ error: true });
-				}
-			);
-	}
-
-	pConvert = (n) => {
-		this.setState({ pConv: [...this.state.pConv, n] });
-	}
-
-	render() {
-		const id = this.props.match ? this.props.match.params.id : 'CLOL';
-		const story = this.state.story;
-		const error = this.state.error;
-		const pConv = this.state.pConv;
-		let nodes = null;
-
-		if (story.text) {
-			let div = document.createElement('div');
-			div.innerHTML = story.text;
-			nodes = div.childNodes;
-		}
-
-		return (
-			<div className='article-container'>
-				{error && <p>Connection error?</p>}
-				{story ?
-					<div className='article'>
-						<Helmet>
-							<title>{story.title} - QotNews</title>
-						</Helmet>
-
-						<h1>{story.title}</h1>
-
-						<div className='info'>
-							Source: {sourceLink(story)}
-						</div>
-
-						{infoLine(story)}
-
-						{nodes ?
-							<div className='story-text'>
-								{Object.entries(nodes).map(([k, v]) =>
-									pConv.includes(k) ?
-										v.innerHTML.split('\n\n').map(x =>
-											<p dangerouslySetInnerHTML={{ __html: x }} />
-										)
-										:
-										(v.nodeName === '#text' ?
-											<p>{v.data}</p>
-											:
-											<>
-												<v.localName dangerouslySetInnerHTML={v.innerHTML ? { __html: v.innerHTML } : null} />
-												{v.localName == 'pre' && <button onClick={() => this.pConvert(k)}>Convert Code to Paragraph</button>}
-											</>
-										)
-								)}
-							</div>
-							:
-							<p>Problem getting article :(</p>
-						}
-					</div>
-					:
-					<p>loading...</p>
-				}
-				<ToggleDot id={id} article={false} />
-			</div>
-		);
-	}
-}
-
-export default Article;
@@ -1,145 +0,0 @@
-import React from 'react';
-import { Link } from 'react-router-dom';
-import { HashLink } from 'react-router-hash-link';
-import { Helmet } from 'react-helmet';
-import moment from 'moment';
-import localForage from 'localforage';
-import { infoLine, ToggleDot } from '../utils.js';
-
-class Article extends React.Component {
-	constructor(props) {
-		super(props);
-
-		const id = this.props.match.params.id;
-		const cache = this.props.cache;
-
-		if (id in cache) console.log('cache hit');
-
-		this.state = {
-			story: cache[id] || false,
-			error: false,
-			collapsed: [],
-			expanded: [],
-		};
-	}
-
-	componentDidMount() {
-		const id = this.props.match.params.id;
-
-		localForage.getItem(id)
-			.then(
-				(value) => {
-					this.setState({ story: value });
-				}
-			);
-
-		fetch('/api/' + id)
-			.then(res => res.json())
-			.then(
-				(result) => {
-					this.setState({ story: result.story }, () => {
-						const hash = window.location.hash.substring(1);
-						if (hash) {
-							document.getElementById(hash).scrollIntoView();
-						}
-					});
-					localForage.setItem(id, result.story);
-				},
-				(error) => {
-					this.setState({ error: true });
-				}
-			);
-	}
-
-	collapseComment(cid) {
-		this.setState(prevState => ({
-			...prevState,
-			collapsed: [...prevState.collapsed, cid],
-			expanded: prevState.expanded.filter(x => x !== cid),
-		}));
-	}
-
-	expandComment(cid) {
-		this.setState(prevState => ({
-			...prevState,
-			collapsed: prevState.collapsed.filter(x => x !== cid),
-			expanded: [...prevState.expanded, cid],
-		}));
-	}
-
-	countComments(c) {
-		return c.comments.reduce((sum, x) => sum + this.countComments(x), 1);
-	}
-
-	displayComment(story, c, level) {
-		const cid = c.author + c.date;
-
-		const collapsed = this.state.collapsed.includes(cid);
-		const expanded = this.state.expanded.includes(cid);
-
-		const hidden = collapsed || (level == 4 && !expanded);
-		const hasChildren = c.comments.length !== 0;
-
-		return (
-			<div className={level ? 'comment lined' : 'comment'} key={cid}>
-				<div className='info'>
-					<p>
-						{c.author === story.author ? '[OP]' : ''} {c.author || '[Deleted]'}
-						{' '} | <HashLink to={'#' + cid} id={cid}>{moment.unix(c.date).fromNow()}</HashLink>
-
-						{hasChildren && (
-							hidden ?
-								<span className='collapser expander pointer' onClick={() => this.expandComment(cid)}>+</span>
-								:
-								<span className='collapser pointer' onClick={() => this.collapseComment(cid)}>–</span>
-						)}
-					</p>
-				</div>
-
-				<div className={collapsed ? 'text hidden' : 'text'} dangerouslySetInnerHTML={{ __html: c.text }} />
-
-				{hidden && hasChildren ?
-					<div className='comment lined info pointer' onClick={() => this.expandComment(cid)}>[show {this.countComments(c) - 1} more]</div>
-					:
-					c.comments.map(i => this.displayComment(story, i, level + 1))
-				}
-			</div>
-		);
-	}
-
-	render() {
-		const id = this.props.match.params.id;
-		const story = this.state.story;
-		const error = this.state.error;
-
-		return (
-			<div className='container'>
-				{error && <p>Connection error?</p>}
-				{story ?
-					<div className='article'>
-						<Helmet>
-							<title>{story.title} - QotNews Comments</title>
-						</Helmet>
-
-						<h1>{story.title}</h1>
-
-						<div className='info'>
-							<Link to={'/' + story.id}>View article</Link>
-						</div>
-
-						{infoLine(story)}
-
-						<div className='comments'>
-							{story.comments.map(c => this.displayComment(story, c, 0))}
-						</div>
-					</div>
-					:
-					<p>loading...</p>
-				}
-				<ToggleDot id={id} article={true} />
-			</div>
-		);
-	}
-}
-
-export default Article;
@@ -1,64 +0,0 @@
-import React from 'react';
-import { Helmet } from 'react-helmet';
-import localForage from 'localforage';
-import { StoryItem } from '../components/StoryItem.js';
-
-class Feed extends React.Component {
-	constructor(props) {
-		super(props);
-
-		this.state = {
-			stories: JSON.parse(localStorage.getItem('stories')) || false,
-			error: false,
-		};
-	}
-
-	componentDidMount() {
-		fetch('/api')
-			.then(res => res.json())
-			.then(
-				(result) => {
-					const updated = !this.state.stories || this.state.stories[0].id !== result.stories[0].id;
-					console.log('updated:', updated);
-
-					const { stories } = result;
-					this.setState({ stories });
-					localStorage.setItem('stories', JSON.stringify(stories));
-
-					if (updated) {
-						localForage.clear();
-						stories.forEach((x, i) => {
-							fetch('/api/' + x.id)
-								.then(res => res.json())
-								.then(({ story }) => {
-									localForage.setItem(x.id, story)
-										.then(console.log('preloaded', x.id, x.title));
-									this.props.updateCache(x.id, story);
-								}, error => { }
-								);
-						});
-					}
-				},
-				(error) => {
-					this.setState({ error: true });
-				}
-			);
-	}
-
-	render() {
-		const stories = this.state.stories;
-		const error = this.state.error;
-
-		return (
-			<div className='container'>
-				<Helmet>
-					<title>Feed - QotNews</title>
-				</Helmet>
-				{error && <p>Connection error?</p>}
-				{stories ? stories.map(story => <StoryItem story={story}></StoryItem>) : <p>loading...</p>}
-			</div>
-		);
-	}
-}
-
-export default Feed;
@@ -1,76 +0,0 @@
-import React from 'react';
-import { Helmet } from 'react-helmet';
-import AbortController from 'abort-controller';
-import { StoryItem } from '../components/StoryItem.js';
-
-class Results extends React.Component {
-	constructor(props) {
-		super(props);
-
-		this.state = {
-			stories: false,
-			error: false,
-		};
-
-		this.controller = null;
-	}
-
-	performSearch = () => {
-		if (this.controller) {
-			this.controller.abort();
-		}
-
-		this.controller = new AbortController();
-		const signal = this.controller.signal;
-
-		const search = this.props.location.search;
-		fetch('/api/search' + search, { method: 'get', signal: signal })
-			.then(res => res.json())
-			.then(
-				(result) => {
-					this.setState({ stories: result.results });
-				},
-				(error) => {
-					if (error.message !== 'The operation was aborted. ') {
-						this.setState({ error: true });
-					}
-				}
-			);
-	}
-
-	componentDidMount() {
-		this.performSearch();
-	}
-
-	componentDidUpdate(prevProps) {
-		if (this.props.location.search !== prevProps.location.search) {
-			this.performSearch();
-		}
-	}
-
-	render() {
-		const stories = this.state.stories;
-		const error = this.state.error;
-
-		return (
-			<div className='container'>
-				<Helmet>
-					<title>Feed - QotNews</title>
-				</Helmet>
-				{error && <p>Connection error?</p>}
-				{stories ?
-					<>
-						<p>Search results:</p>
-						<div className='comment lined'>
-							{stories ? stories.map(story => <StoryItem story={story}></StoryItem>) : <p>loading...</p>}
-						</div>
-					</>
-					:
-					<p>loading...</p>
-				}
-			</div>
-		);
-	}
-}
-
-export default Results;