fix mistake.

add regex to get a unique ref from each sitemap/category based article url.
cosmetic filters for the spinoff.
2020-11-17 12:54:54 +13:00 · 2020-11-17 12:38:28 +13:00 · 2020-11-16 16:49:39 +13:00 · 2020-11-16 15:41:09 +13:00 · 2020-11-16 15:30:33 +13:00 · 2020-11-16 13:17:58 +13:00
79 changed files with 6047 additions and 7764 deletions
@@ -1 +0,0 @@
 .aider*
@@ -0,0 +1,3 @@
 [submodule "readerserver/scraper/browser/scripts/bypass-paywalls-chrome"]
 	path = readerserver/scraper/browser/scripts/bypass-paywalls-chrome
 	url = https://github.com/iamadamdev/bypass-paywalls-chrome.git
@@ -1,6 +1,6 @@
 The MIT License (MIT)
-Copyright (c) 2019 Tanner (tanner.vc)
+Copyright (c) 2019 Tanner Collin
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -20,7 +20,7 @@ $ sudo apt install yarn
 Clone this repo:
 ```text
-$ git clone https://git.tanner.vc/tanner/qotnews.git
+$ git clone https://gogs.tannercollin.com/tanner/qotnews.git
 $ cd qotnews
 ```
@@ -109,6 +109,4 @@ settings.py
 data.db
 data.db.bak
 data/archive/*
 data/backup/*
 qotnews.sqlite
 praw.ini
@@ -1,11 +1,11 @@
-import json
+from datetime import datetime, timedelta
 from sqlalchemy import create_engine, Column, String, ForeignKey, Integer
 from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.orm import sessionmaker
 from sqlalchemy.exc import IntegrityError
 from sqlalchemy.types import JSON
-engine = create_engine('sqlite:///data/qotnews.sqlite', connect_args={'timeout': 360})
+engine = create_engine('sqlite:///data/qotnews.sqlite')
 Session = sessionmaker(bind=engine)
 Base = declarative_base()
@@ -15,8 +15,8 @@ class Story(Base):
    sid = Column(String(16), primary_key=True)
    ref = Column(String(16), unique=True)
-    meta_json = Column(String)
+    meta = Column(JSON)
-    full_json = Column(String)
+    data = Column(JSON)
    title = Column(String)
 class Reflist(Base):
@@ -24,6 +24,7 @@ class Reflist(Base):
    rid = Column(Integer, primary_key=True)
    ref = Column(String(16), unique=True)
    urlref = Column(String)
    sid = Column(String, ForeignKey('stories.sid'), unique=True)
    source = Column(String(16))
@@ -36,19 +37,21 @@ def get_story(sid):
 def put_story(story):
    story = story.copy()
-    full_json = json.dumps(story)
+    data = {}
    data.update(story)
-    story.pop('text', None)
+    meta = {}
-    story.pop('comments', None)
+    meta.update(story)
-    meta_json = json.dumps(story)
+    meta.pop('text', None)
    meta.pop('comments', None)
    try:
        session = Session()
        s = Story(
            sid=story['id'],
            ref=story['ref'],
-            full_json=full_json,
+            data=data,
-            meta_json=meta_json,
+            meta=meta,
            title=story.get('title', None),
        )
        session.merge(s)
@@ -63,25 +66,32 @@ def get_story_by_ref(ref):
    session = Session()
    return session.query(Story).filter(Story.ref==ref).first()
-def get_reflist(amount):
+def get_stories_by_url(url):
    session = Session()
-    q = session.query(Reflist).order_by(Reflist.rid.desc()).limit(amount)
+    return session.query(Story).\
-    return [dict(ref=x.ref, sid=x.sid, source=x.source) for x in q.all()]
+            filter(Story.title != None).\
            filter(Story.meta['url'].as_string() == url).\
            order_by(Story.meta['date'].desc())
-def get_stories(amount, skip=0):
+def get_reflist():
    session = Session()
-    q = session.query(Reflist, Story.meta_json).\
+    q = session.query(Reflist).order_by(Reflist.rid.desc())
-            order_by(Reflist.rid.desc()).\
+    return [dict(ref=x.ref, sid=x.sid, source=x.source, urlref=x.urlref) for x in q.all()]
 def get_stories(maxage=60*60*24*2):
    time = datetime.now().timestamp() - maxage
    session = Session()
    q = session.query(Reflist, Story.meta).\
            join(Story).\
            filter(Story.title != None).\
-            offset(skip).\
+            filter(Story.meta['date'].as_integer() > time).\
-            limit(amount)
+            order_by(Story.meta['date'].desc())
    return [x[1] for x in q]
-def put_ref(ref, sid, source):
+def put_ref(ref, sid, source, urlref):
    try:
        session = Session()
-        r = Reflist(ref=ref, sid=sid, source=source)
+        r = Reflist(ref=ref, sid=sid, source=source, urlref=urlref)
        session.add(r)
        session.commit()
    except:
@@ -101,22 +111,7 @@ def del_ref(ref):
    finally:
        session.close()
 def count_stories():
    try:
        session = Session()
        return session.query(Story).count()
    finally:
        session.close()
 def get_story_list():
    try:
        session = Session()
        return session.query(Story.sid).all()
    finally:
        session.close()
 if __name__ == '__main__':
    init()
-    #print(get_story_by_ref('hgi3sy'))
+    print(get_story_by_ref('hgi3sy'))
    print(len(get_reflist(99999)))
@@ -1,8 +1,6 @@
 import database
 import search
 import sys
 import settings
 import logging
 import json
 import requests
@@ -23,7 +21,7 @@ def database_del_story(sid):
 def search_del_story(sid):
    try:
-        r = requests.delete(settings.MEILI_URL + 'indexes/qotnews/documents/'+sid, timeout=2)
+        r = requests.delete(search.MEILI_URL + 'indexes/qotnews/documents/'+sid, timeout=2)
        if r.status_code != 202:
            raise Exception('Bad response code ' + str(r.status_code))
        return r.json()
@@ -6,84 +6,120 @@ logging.basicConfig(
 import requests
 import time
 from bs4 import BeautifulSoup
 import itertools
 import settings
-from feeds import hackernews, reddit, tildes, manual, lobsters
+from feeds import hackernews, reddit, tildes, substack, manual
-import utils
+from feeds.sitemap import Sitemap
 from feeds.category import Category
 from scrapers import outline, declutter, browser, local
-INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com', 'sec.gov']
+INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com']
-TWO_DAYS = 60*60*24*2
+
 substacks = {}
 for key, value in settings.SUBSTACK.items():
    substacks[key] = substack.Publication(value['url'])
 categories = {}
 for key, value in settings.CATEGORY.items():
    categories[key] = Category(value)
 sitemaps = {}
 for key, value in settings.SITEMAP.items():
    sitemaps[key] = Sitemap(value)
 def get_list():
    feeds = {}
 def list():
    feed = []
    if settings.NUM_HACKERNEWS:
-        feed += [(x, 'hackernews') for x in hackernews.feed()[:settings.NUM_HACKERNEWS]]
+        feeds['hackernews'] = [(x, 'hackernews', x) for x in hackernews.feed()[:settings.NUM_HACKERNEWS]]
    if settings.NUM_LOBSTERS:
        feed += [(x, 'lobsters') for x in lobsters.feed()[:settings.NUM_LOBSTERS]]
    if settings.NUM_REDDIT:
-        feed += [(x, 'reddit') for x in reddit.feed()[:settings.NUM_REDDIT]]
+        feeds['reddit'] = [(x, 'reddit', x) for x in reddit.feed()[:settings.NUM_REDDIT]]
    if settings.NUM_TILDES:
-        feed += [(x, 'tildes') for x in tildes.feed()[:settings.NUM_TILDES]]
+        feeds['tildes'] = [(x, 'tildes', x) for x in tildes.feed()[:settings.NUM_TILDES]]
    if settings.NUM_SUBSTACK:
        feeds['substack'] = [(x, 'substack', x) for x in substack.top.feed()[:settings.NUM_SUBSTACK]]
    for key, publication in substacks.items():
        count = settings.SUBSTACK[key]['count']
        feeds[key] = [(x, key, x) for x in publication.feed()[:count]]
    for key, sites in categories.items():
        count = settings.CATEGORY[key].get('count') or 0
        excludes = settings.CATEGORY[key].get('excludes')
        tz = settings.CATEGORY[key].get('tz')
        feeds[key] = [(x, key, u) for x, u in sites.feed(excludes)[:count]]
    for key, sites in sitemaps.items():
        count = settings.SITEMAP[key].get('count') or 0
        excludes = settings.SITEMAP[key].get('excludes')
        feeds[key] = [(x, key, u) for x, u in sites.feed(excludes)[:count]]
    values = feeds.values()
    feed = itertools.chain.from_iterable(itertools.zip_longest(*values, fillvalue=None))
    feed = list(filter(None, feed))
    return feed
 def get_article(url):
-    if not settings.READER_URL:
+    scrapers = {
-        logging.info('Readerserver not configured, aborting.')
+        'declutter': declutter,
-        return ''
+        'outline': outline,
        'browser': browser,
        'local': local,
    }
    available = settings.SCRAPERS or ['local']
    if 'local' not in available:
        available += ['local']
-    if url.startswith('https://twitter.com'):
+    for scraper in available:
-        logging.info('Replacing twitter.com url with nitter.net')
+        if scraper not in scrapers.keys():
-        url = url.replace('twitter.com', 'nitter.net')
+            continue
-
+        try:
-    try:
+            html = scrapers[scraper].get_html(url)
-        r = requests.post(settings.READER_URL, data=dict(url=url), timeout=20)
+            if html:
-        if r.status_code != 200:
+                return html
-            raise Exception('Bad response code ' + str(r.status_code))
+        except KeyboardInterrupt:
-        return r.text
+            raise
-    except KeyboardInterrupt:
+        except:
-        raise
+            pass
-    except BaseException as e:
+    return ''
        logging.error('Problem getting article: {}'.format(str(e)))
        return ''
 def get_content_type(url):
    try:
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'}
        return requests.get(url, headers=headers, timeout=5).headers['content-type']
    except:
        return ''
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
            'X-Forwarded-For': '66.249.66.1',
        }
-        return requests.get(url, headers=headers, timeout=10).headers['content-type']
+        return requests.get(url, headers=headers, timeout=5).headers['content-type']
    except:
        pass
-def update_story(story, is_manual=False):
+    try:
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'}
        return requests.get(url, headers=headers, timeout=10).headers['content-type']
    except:
        return ''
 def update_story(story, is_manual=False, urlref=None):
    res = {}
-    try:
+    if story['source'] == 'hackernews':
-        if story['source'] == 'hackernews':
+        res = hackernews.story(story['ref'])
-            res = hackernews.story(story['ref'])
+    elif story['source'] == 'reddit':
-        elif story['source'] == 'lobsters':
+        res = reddit.story(story['ref'])
-            res = lobsters.story(story['ref'])
+    elif story['source'] == 'tildes':
-        elif story['source'] == 'reddit':
+        res = tildes.story(story['ref'])
-            res = reddit.story(story['ref'])
+    elif story['source'] == 'substack':
-        elif story['source'] == 'tildes':
+        res = substack.top.story(story['ref'])
-            res = tildes.story(story['ref'])
+    elif story['source'] in categories.keys():
-        elif story['source'] == 'manual':
+        res = categories[story['source']].story(story['ref'], urlref)
-            res = manual.story(story['ref'])
+    elif story['source'] in sitemaps.keys():
-    except BaseException as e:
+        res = sitemaps[story['source']].story(story['ref'], urlref)
-        utils.alert_tanner('Problem updating {} story, ref {}: {}'.format(story['source'], story['ref'], str(e)))
+    elif story['source'] in substacks.keys():
-        logging.exception(e)
+        res = substacks[story['source']].story(story['ref'])
-        return False
+    elif story['source'] == 'manual':
        res = manual.story(story['ref'])
    if res:
        story.update(res) # join dicts
@@ -91,8 +127,8 @@ def update_story(story, is_manual=False):
        logging.info('Story not ready yet')
        return False
-    if story['date'] and not is_manual and story['date'] + TWO_DAYS < time.time():
+    if story['date'] and not is_manual and story['date'] + settings.MAX_STORY_AGE < time.time():
-        logging.info('Story too old, removing. Date: {}'.format(story['date']))
+        logging.info('Story too old, removing')
        return False
    if story.get('url', '') and not story.get('text', ''):
@@ -106,12 +142,6 @@ def update_story(story, is_manual=False):
            logging.info(story['url'])
            return False
        if 'trump' in story['title'].lower() or 'musk' in story['title'].lower() or 'Removed by moderator' in story['title']:
            logging.info('Trump / Musk / removed story, skipping')
            logging.info(story['url'])
            return False
        logging.info('Getting article ' + story['url'])
        story['text'] = get_article(story['url'])
        if not story['text']: return False
@@ -129,7 +159,7 @@ if __name__ == '__main__':
    #print(get_article('https://www.bloomberg.com/news/articles/2019-09-23/xi-s-communists-under-pressure-as-high-prices-hit-china-workers'))
-    a = get_content_type('https://tefkos.comminfo.rutgers.edu/Courses/e530/Readings/Beal%202008%20full%20text%20searching.pdf')
+    a = get_article('https://blog.joinmastodon.org/2019/10/mastodon-3.0/')
    print(a)
    print('done')
@@ -0,0 +1,72 @@
 import logging
 logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        level=logging.DEBUG)
 if __name__ == '__main__':
    import sys
    sys.path.insert(0,'.')
 from bs4 import BeautifulSoup
 import settings
 from utils import clean
 from misc.api import xml
 from misc.news import Base
 def _filter_links(links, category_url, excludes=None):
    links = list(filter(None, [link if link.startswith(category_url) else None for link in links]))
    links = list(filter(None, [link if link != category_url else None for link in links]))
    links = list(set(links))
    if excludes:
        links = list(filter(None, [None if any(e in link for e in excludes) else link for link in links]))
    return links
 def _get_category(category_url, excludes=None):
    base_url = '/'.join(category_url.split('/')[:3])
    markup = xml(lambda x: category_url)
    if not markup: return []
    soup = BeautifulSoup(markup, features='html.parser')
    links = soup.find_all('a', href=True)
    links = [link.get('href') for link in links]
    links = [f"{base_url}{link}" if link.startswith('/') else link for link in links]
    links = _filter_links(links, category_url, excludes)
    return links
 class Category(Base):
    def __init__(self, config):
        self.config = config
        self.category_url = config.get('url')
        self.tz = config.get('tz')
    def feed(self, excludes=None):
        links = []
        if isinstance(self.category_url, str):
            links += _get_category(self.category_url, excludes)
        elif isinstance(self.category_url, list):
            for url in self.category_url:
                links += _get_category(url, excludes)
        links = list(set(links))
        return [(self.get_id(link), link) for link in links]
 # scratchpad so I can quickly develop the parser
 if __name__ == '__main__':
    print("Category: RadioNZ")
    site = Category("https://www.rnz.co.nz/news/")
    excludes = [
        'rnz.co.nz/news/sport',
        'rnz.co.nz/weather',
        'rnz.co.nz/news/weather',
    ]
    posts = site.feed(excludes)
    print(posts[:5])
    print(site.story(posts[0]))
    print("Category: Newsroom")
    site = Category("https://www.newsroom.co.nz/news/", tz='Pacific/Auckland')
    posts = site.feed()
    print(posts[:5])
    print(site.story(posts[0]))
@@ -12,8 +12,7 @@ import requests
 from utils import clean
 API_TOPSTORIES = lambda x: 'https://hacker-news.firebaseio.com/v0/topstories.json'
-ALG_API_ITEM = lambda x : 'https://hn.algolia.com/api/v1/items/{}'.format(x)
+API_ITEM = lambda x : 'https://hn.algolia.com/api/v1/items/{}'.format(x)
 BHN_API_ITEM = lambda x : 'https://api.hnpwa.com/v0/item/{}.json'.format(x)
 SITE_LINK = lambda x : 'https://news.ycombinator.com/item?id={}'.format(x)
 SITE_AUTHOR_LINK = lambda x : 'https://news.ycombinator.com/user?id={}'.format(x)
@@ -43,7 +42,7 @@ def api(route, ref=None):
 def feed():
    return [str(x) for x in api(API_TOPSTORIES) or []]
-def alg_comment(i):
+def comment(i):
    if 'author' not in i:
        return False
@@ -52,25 +51,21 @@ def alg_comment(i):
    c['score'] = i.get('points', 0)
    c['date'] = i.get('created_at_i', 0)
    c['text'] = clean(i.get('text', '') or '')
-    c['comments'] = [alg_comment(j) for j in i['children']]
+    c['comments'] = [comment(j) for j in i['children']]
    c['comments'] = list(filter(bool, c['comments']))
    return c
-def alg_comment_count(i):
+def comment_count(i):
    alive = 1 if i['author'] else 0
-    return sum([alg_comment_count(c) for c in i['comments']]) + alive
+    return sum([comment_count(c) for c in i['comments']]) + alive
-def alg_story(ref):
+def story(ref):
-    r = api(ALG_API_ITEM, ref)
+    r = api(API_ITEM, ref)
-    if not r:
+    if not r: return False
        logging.info('Bad Algolia Hackernews API response.')
        return None
    if 'deleted' in r:
        logging.info('Story was deleted.')
        return False
    elif r.get('type', '') != 'story':
        logging.info('Type "{}" is not "story".'.format(r.get('type', '')))
        return False
    s = {}
@@ -81,88 +76,17 @@ def alg_story(ref):
    s['title'] = r.get('title', '')
    s['link'] = SITE_LINK(ref)
    s['url'] = r.get('url', '')
-    s['comments'] = [alg_comment(i) for i in r['children']]
+    s['comments'] = [comment(i) for i in r['children']]
    s['comments'] = list(filter(bool, s['comments']))
-    s['num_comments'] = alg_comment_count(s) - 1
+    s['num_comments'] = comment_count(s) - 1
    if 'text' in r and r['text']:
        s['text'] = clean(r['text'] or '')
    return s
 def bhn_comment(i):
    if 'user' not in i:
        return False
    c = {}
    c['author'] = i.get('user', '')
    c['score'] = 0   # Not present?
    c['date'] = i.get('time', 0)
    c['text'] = clean(i.get('content', '') or '')
    c['comments'] = [bhn_comment(j) for j in i['comments']]
    c['comments'] = list(filter(bool, c['comments']))
    return c
 def bhn_story(ref):
    r = api(BHN_API_ITEM, ref)
    if not r:
        logging.info('Bad BetterHN Hackernews API response.')
        return None
    if 'deleted' in r:   # TODO: verify
        logging.info('Story was deleted.')
        return False
    elif r.get('dead', False):
        logging.info('Story was deleted.')
        return False
    elif r.get('type', '') != 'link':
        logging.info('Type "{}" is not "link".'.format(r.get('type', '')))
        return False
    s = {}
    s['author'] = r.get('user', '')
    s['author_link'] = SITE_AUTHOR_LINK(r.get('user', ''))
    s['score'] = r.get('points', 0)
    s['date'] = r.get('time', 0)
    s['title'] = r.get('title', '')
    s['link'] = SITE_LINK(ref)
    s['url'] = r.get('url', '')
    if s['url'].startswith('item'):
        s['url'] = SITE_LINK(ref)
    s['comments'] = [bhn_comment(i) for i in r['comments']]
    s['comments'] = list(filter(bool, s['comments']))
    s['num_comments'] = r.get('comments_count', 0)
    if 'content' in r and r['content']:
        s['text'] = clean(r['content'] or '')
    return s
 def story(ref):
    s = alg_story(ref)
    if s is None:
        s = bhn_story(ref)
    if not s:
        return False
    if not s['title']:
        return False
    if s['score'] < 25 and s['num_comments'] < 10:
        logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
        return False
    return s
 # scratchpad so I can quickly develop the parser
 if __name__ == '__main__':
    print(feed())
    #print(story(20763961))
    #print(story(20802050))
    #print(story(42899834))   # type "job"
    #print(story(42900076))   # Ask HN
    #print(story(42898201))   # Show HN
    #print(story(42899703))   # normal
    print(story(42902678))   # bad title?
@@ -1,120 +0,0 @@
 import logging
 logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        level=logging.DEBUG)
 if __name__ == '__main__':
    import sys
    sys.path.insert(0,'.')
 import requests
 from datetime import datetime
 from utils import clean
 API_HOTTEST = lambda x: 'https://lobste.rs/hottest.json'
 API_ITEM = lambda x : 'https://lobste.rs/s/{}.json'.format(x)
 SITE_LINK = lambda x : 'https://lobste.rs/s/{}'.format(x)
 SITE_AUTHOR_LINK = lambda x : 'https://lobste.rs/u/{}'.format(x)
 def api(route, ref=None):
    try:
        r = requests.get(route(ref), timeout=5)
        if r.status_code != 200:
            raise Exception('Bad response code ' + str(r.status_code))
        return r.json()
    except KeyboardInterrupt:
        raise
    except BaseException as e:
        logging.error('Problem hitting lobsters API: {}, trying again'.format(str(e)))
    try:
        r = requests.get(route(ref), timeout=15)
        if r.status_code != 200:
            raise Exception('Bad response code ' + str(r.status_code))
        return r.json()
    except KeyboardInterrupt:
        raise
    except BaseException as e:
        logging.error('Problem hitting lobsters API: {}'.format(str(e)))
        return False
 def feed():
    return [x['short_id'] for x in api(API_HOTTEST) or []]
 def unix(date_str):
    date_str = date_str.replace(':', '')
    return int(datetime.strptime(date_str, '%Y-%m-%dT%H%M%S.%f%z').timestamp())
 def make_comment(i):
    c = {}
    try:
        c['author'] = i['commenting_user']
    except KeyError:
        c['author'] = ''
    c['score'] = i.get('score', 0)
    try:
        c['date'] = unix(i['created_at'])
    except KeyError:
        c['date'] = 0
    c['text'] = clean(i.get('comment', '') or '')
    c['comments'] = []
    return c
 def iter_comments(flat_comments):
    nested_comments = []
    parent_stack = []
    for comment in flat_comments:
        c = make_comment(comment)
        indent = comment['depth']
        if indent == 0:
            nested_comments.append(c)
            parent_stack = [c]
        else:
            parent_stack = parent_stack[:indent]
            p = parent_stack[-1]
            p['comments'].append(c)
            parent_stack.append(c)
    return nested_comments
 def story(ref):
    r = api(API_ITEM, ref)
    if not r:
        logging.info('Bad Lobsters API response.')
        return False
    s = {}
    try:
        s['author'] = r['submitter_user']
        s['author_link'] = SITE_AUTHOR_LINK(s['author'])
    except KeyError:
        s['author'] = ''
        s['author_link'] = ''
    s['score'] = r.get('score', 0)
    try:
        s['date'] = unix(r['created_at'])
    except KeyError:
        s['date'] = 0
    s['title'] = r.get('title', '')
    s['link'] = SITE_LINK(ref)
    s['url'] = r.get('url', '')
    s['comments'] = iter_comments(r['comments'])
    s['num_comments'] = r['comment_count']
    if s['score'] < 15 and s['num_comments'] < 10:
        logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
        return False
    if 'description' in r and r['description']:
        s['text'] = clean(r['description'] or '')
    return s
 # scratchpad so I can quickly develop the parser
 if __name__ == '__main__':
    #print(feed())
    import json
    print(json.dumps(story('fzvd1v'), indent=4))
    #print(json.dumps(story('ixyv5u'), indent=4))
@@ -7,6 +7,8 @@ import requests
 import time
 from bs4 import BeautifulSoup
 import settings
 USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'
 def api(route):
@@ -27,15 +29,13 @@ def api(route):
 def story(ref):
    html = api(ref)
-    if not html:
+    if not html: return False
        logging.info('Bad http GET response.')
        return False
    soup = BeautifulSoup(html, features='html.parser')
    s = {}
    s['author'] = 'manual submission'
-    s['author_link'] = 'https://news.t0.vc'
+    s['author_link'] = 'https://{}'.format(settings.HOSTNAME)
    s['score'] = 0
    s['date'] = int(time.time())
    s['title'] = str(soup.title.string) if soup.title else ref
@@ -32,8 +32,11 @@ def feed():
        return [x.id for x in reddit.subreddit(subs).hot()]
    except KeyboardInterrupt:
        raise
-    except BaseException as e:
+    except PRAWException as e:
-        logging.critical('Problem hitting reddit API: {}'.format(str(e)))
+        logging.error('Problem hitting reddit API: {}'.format(str(e)))
        return []
    except PrawcoreException as e:
        logging.error('Problem hitting reddit API: {}'.format(str(e)))
        return []
 def comment(i):
@@ -56,9 +59,7 @@ def comment(i):
 def story(ref):
    try:
        r = reddit.submission(ref)
-        if not r:
+        if not r: return False
            logging.info('Bad Reddit API response.')
            return False
        s = {}
        s['author'] = r.author.name if r.author else '[Deleted]'
@@ -72,8 +73,7 @@ def story(ref):
        s['comments'] = list(filter(bool, s['comments']))
        s['num_comments'] = r.num_comments
-        if s['score'] < 25 and s['num_comments'] < 10:
+        if s['score'] < settings.REDDIT_SCORE_THRESHOLD and s['num_comments'] < settings.REDDIT_COMMENT_THRESHOLD:
            logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
            return False
        if r.selftext:
@@ -84,10 +84,10 @@ def story(ref):
    except KeyboardInterrupt:
        raise
    except PRAWException as e:
-        logging.critical('Problem hitting reddit API: {}'.format(str(e)))
+        logging.error('Problem hitting reddit API: {}'.format(str(e)))
        return False
    except PrawcoreException as e:
-        logging.critical('Problem hitting reddit API: {}'.format(str(e)))
+        logging.error('Problem hitting reddit API: {}'.format(str(e)))
        return False
 # scratchpad so I can quickly develop the parser
@@ -0,0 +1,99 @@
 import logging
 logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        level=logging.DEBUG)
 if __name__ == '__main__':
    import sys
    sys.path.insert(0,'.')
 from datetime import datetime
 from bs4 import BeautifulSoup
 import settings
 from utils import clean
 from misc.time import unix
 from misc.api import xml
 from misc.news import Base
 def _get_sitemap_date(a):
    if a.find('lastmod'):
        return a.find('lastmod').text
    if a.find('news:publication_date'):
        return a.find('news:publication_date').text
    if a.find('ns2:publication_date'):
        return a.find('ns2:publication_date').text
    return ''
 def _filter_links(links, excludes=None):
    too_old = datetime.now().timestamp() - settings.MAX_STORY_AGE
    links = list(filter(None, [a if _get_sitemap_date(a) else None for a in links]))
    links = list(filter(None, [a if unix(_get_sitemap_date(a)) > too_old else None for a in links]))
    links.sort(key=lambda a: unix(_get_sitemap_date(a)), reverse=True)
    links = [x.find('loc').text for x in links] or []
    links = list(set(links))
    if excludes:
        links = list(filter(None, [None if any(e in link for e in excludes) else link for link in links]))
    return links
 def _get_sitemap(feed_url, excludes=None):
    markup = xml(lambda x: feed_url)
    if not markup: return []
    soup = BeautifulSoup(markup, features='lxml')
    links = []
    feed_urls = []
    if soup.find('sitemapindex'):
        sitemap = soup.find('sitemapindex').findAll('sitemap')
        feed_urls = list(filter(None, [a if a.find('loc') else None for a in sitemap]))
    if soup.find('urlset'):
        sitemap = soup.find('urlset').findAll('url')
        links = list(filter(None, [a if a.find('loc') else None for a in sitemap]))
    feed_urls = _filter_links(feed_urls, excludes)
    links = _filter_links(links, excludes)
    for url in feed_urls:
        links += _get_sitemap(url, excludes)
    return list(set(links))
 class Sitemap(Base):
    def __init__(self, config):
        self.config = config
        self.sitemap_url = config.get('url')
        self.tz = config.get('tz')
    def feed(self, excludes=None):
        links = []
        if isinstance(self.sitemap_url, str):
            links += _get_sitemap(self.sitemap_url, excludes)
        elif isinstance(self.sitemap_url, list):
            for url in self.sitemap_url:
                links += _get_sitemap(url, excludes)
        links = list(set(links))
        return [(self.get_id(link), link) for link in links]
 # scratchpad so I can quickly develop the parser
 if __name__ == '__main__':
    print("Sitemap: The Spinoff")
    site = Sitemap("https://thespinoff.co.nz/sitemap.xml")
    excludes = [
        'thespinoff.co.nz/sitemap-misc.xml',
        'thespinoff.co.nz/sitemap-authors.xml',
        'thespinoff.co.nz/sitemap-tax-category.xml',
    ]
    posts = site.feed(excludes)
    print(posts[:5])
    print(site.story(posts[0]))
    print("Sitemap: Newshub")
    site = Sitemap([
        'https://www.newshub.co.nz/home/politics.gnewssitemap.xml',
        'https://www.newshub.co.nz/home/new-zealand.gnewssitemap.xml',
        'https://www.newshub.co.nz/home/world.gnewssitemap.xml',
        'https://www.newshub.co.nz/home/money.gnewssitemap.xml',
    ])
    posts = site.feed()
    print(posts[:5])
    print(site.story(posts[0]))
    print(site.story(posts[:-1]))
@@ -0,0 +1,165 @@
 import logging
 logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        level=logging.DEBUG)
 if __name__ == '__main__':
    import sys
    sys.path.insert(0,'.')
 import requests
 from datetime import datetime
 from utils import clean
 SUBSTACK_REFERER = 'https://substack.com'
 SUBSTACK_API_TOP_POSTS = lambda x: "https://substack.com/api/v1/reader/top-posts"
 def author_link(author_id, base_url):
    return f"{base_url}/people/{author_id}"
 def api_comments(post_id, base_url):
    return f"{base_url}/api/v1/post/{post_id}/comments?all_comments=true&sort=best_first"
 def api_stories(x, base_url): 
    return f"{base_url}/api/v1/archive?sort=new&search=&offset=0&limit=100"
 def unix(date_str):
    return int(datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S.%fZ').timestamp())
 def api(route, ref=None, referer=None):
    headers = {'Referer': referer} if referer else None
    try:
        r = requests.get(route(ref), headers=headers, timeout=10)
        if r.status_code != 200:
            raise Exception('Bad response code ' + str(r.status_code))
        return r.json()
    except KeyboardInterrupt:
        raise
    except BaseException as e:
        logging.error('Problem hitting Substack API: {}, trying again'.format(str(e)))
    try:
        r = requests.get(route(ref), headers=headers, timeout=20)
        if r.status_code != 200:
            raise Exception('Bad response code ' + str(r.status_code))
        return r.json()
    except KeyboardInterrupt:
        raise
    except BaseException as e:
        logging.error('Problem hitting Substack API: {}'.format(str(e)))
        return False
 def comment(i):
    if 'body' not in i:
        return False
    c = {}
    c['date'] = unix(i.get('date'))
    c['author'] = i.get('name', '')
    c['score'] = i.get('reactions').get('❤')
    c['text'] = clean(i.get('body', '') or '')
    c['comments'] = [comment(j) for j in i['children']]
    c['comments'] = list(filter(bool, c['comments']))
    return c
 class Publication:
    def __init__(self, domain):
        self.BASE_DOMAIN = domain
    def feed(self):
        stories = api(lambda x: api_stories(x, self.BASE_DOMAIN), referer=self.BASE_DOMAIN)
        if not stories: return []
        stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
        return [str(i.get("id")) for i in stories or []]
    def story(self, ref):
        stories = api(lambda x: api_stories(x, self.BASE_DOMAIN), referer=self.BASE_DOMAIN)
        if not stories: return False
        stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
        stories = list(filter(None, [i if str(i.get('id')) == ref else None for i in stories]))
        if len(stories) == 0:
            return False
        r = stories[0]
        if not r:
            return False
        s = {}
        s['author'] = ''
        s['author_link'] = ''
        s['date'] = unix(r.get('post_date'))
        s['score'] = r.get('reactions').get('❤')
        s['title'] = r.get('title', '')
        s['link'] = r.get('canonical_url', '')
        s['url'] = r.get('canonical_url', '')
        comments = api(lambda x: api_comments(x, self.BASE_DOMAIN), r.get('id'), referer=self.BASE_DOMAIN)
        s['comments'] = [comment(i) for i in comments.get('comments')]
        s['comments'] = list(filter(bool, s['comments']))
        s['num_comments'] = r.get('comment_count', 0)
        authors = list(filter(None, [self._bylines(byline) for byline in r.get('publishedBylines')]))
        if len(authors):
            s['author'] = authors[0].get('name')
            s['author_link'] = authors[0].get('link')
        return s
    def _bylines(self, b):
        if 'id' not in b:
            return None
        a = {}
        a['name'] = b.get('name')
        a['link'] = author_link(b.get('id'), self.BASE_DOMAIN)
        return a
 class Top:
    def feed(self):
        stories = api(SUBSTACK_API_TOP_POSTS, referer=SUBSTACK_REFERER)
        if not stories: return []
        stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
        return [str(i.get("id")) for i in stories or []]
    def story(self, ref):
        stories = api(SUBSTACK_API_TOP_POSTS, referer=SUBSTACK_REFERER)
        if not stories: return False
        stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
        stories = list(filter(None, [i if str(i.get('id')) == ref else None for i in stories]))
        if len(stories) == 0:
            return False
        r = stories[0]
        if not r:
            return False
        s = {}
        pub = r.get('pub')
        base_url = pub.get('base_url')
        s['author'] = pub.get('author_name')
        s['author_link'] = author_link(pub.get('author_id'), base_url)
        s['date'] = unix(r.get('post_date'))
        s['score'] = r.get('score')
        s['title'] = r.get('title', '')
        s['link'] = r.get('canonical_url', '')
        s['url'] = r.get('canonical_url', '')
        comments = api(lambda x: api_comments(x, base_url), r.get('id'), referer=SUBSTACK_REFERER)
        s['comments'] = [comment(i) for i in comments.get('comments')]
        s['comments'] = list(filter(bool, s['comments']))
        s['num_comments'] = r.get('comment_count', 0)
        return s
 top = Top()        
 # scratchpad so I can quickly develop the parser
 if __name__ == '__main__':
    top_posts = top.feed()
    print(top.story(top_posts[0]))
    webworm = Publication("https://www.webworm.co/")
    posts = webworm.feed()
    print(webworm.story(posts[0]))
@@ -16,7 +16,7 @@ from utils import clean
 # cache the topic groups to prevent redirects
 group_lookup = {}
-USER_AGENT = 'qotnews scraper (github:tanner37)'
+USER_AGENT = 'qotnews scraper (github:tannercollin)'
 API_TOPSTORIES = lambda : 'https://tildes.net'
 API_ITEM = lambda x : 'https://tildes.net/shortener/{}'.format(x)
@@ -34,7 +34,7 @@ def api(route):
    except KeyboardInterrupt:
        raise
    except BaseException as e:
-        logging.critical('Problem hitting tildes website: {}'.format(str(e)))
+        logging.error('Problem hitting tildes website: {}'.format(str(e)))
        return False
 def feed():
@@ -71,15 +71,11 @@ def story(ref):
        html = api(SITE_LINK(group_lookup[ref], ref))
    else:
        html = api(API_ITEM(ref))
-    if not html:
+    if not html: return False
        logging.info('Bad Tildes API response.')
        return False
    soup = BeautifulSoup(html, features='html.parser')
    a = soup.find('article', class_='topic-full')
-    if a is None:
+    if a is None: return False
        logging.info('Tildes <article> element not found.')
        return False
    h = a.find('header')
    lu = h.find('a', class_='link-user')
@@ -87,7 +83,6 @@ def story(ref):
    error = a.find('div', class_='text-error')
    if error:
        if 'deleted' in error.string or 'removed' in error.string:
            logging.info('Article was deleted or removed.')
            return False
    s = {}
@@ -107,21 +102,7 @@ def story(ref):
    ch = a.find('header', class_='topic-comments-header')
    s['num_comments'] = int(ch.h2.string.split(' ')[0]) if ch else 0
-    if s['group'].split('.')[0] not in [
+    if s['score'] < 8 and s['num_comments'] < 6:
        '~arts',
        '~comp',
        '~creative',
        '~design',
        '~engineering',
        '~finance',
        '~science',
        '~tech',
    ]:
        logging.info('Group ({}) not in whitelist.'.format(s['group']))
        return False
    if s['score'] < 15 and s['num_comments'] < 10:
        logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
        return False
    td = a.find('div', class_='topic-full-text')
@@ -132,7 +113,7 @@ def story(ref):
 # scratchpad so I can quickly develop the parser
 if __name__ == '__main__':
-    print(feed())
+    #print(feed())
    #normal = story('gxt')
    #print(normal)
    #no_comments = story('gxr')
@@ -141,8 +122,8 @@ if __name__ == '__main__':
    #print(self_post)
    #li_comment = story('gqx')
    #print(li_comment)
-    #broken = story('q4y')
+    broken = story('q4y')
-    #print(broken)
+    print(broken)
    # make sure there's no self-reference
    #import copy
@@ -0,0 +1,35 @@
 import logging
 logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        level=logging.DEBUG)
 import requests
 USER_AGENT = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
 FORWARD_IP = '66.249.66.1'
 def xml(route, ref=None):
    try:
        headers = {'User-Agent': USER_AGENT, 'X-Forwarded-For': FORWARD_IP}
        r = requests.get(route(ref), headers=headers, timeout=5)
        if r.status_code != 200:
            raise Exception('Bad response code ' + str(r.status_code))
        return r.text
    except KeyboardInterrupt:
        raise
    except BaseException as e:
        logging.error('Problem hitting URL: {}'.format(str(e)))
        return False
 def json(route, ref=None):
    try:
        headers = {'User-Agent': USER_AGENT, 'X-Forwarded-For': FORWARD_IP}
        r = requests.get(route(ref), headers=headers, timeout=5)
        if r.status_code != 200:
            raise Exception('Bad response code ' + str(r.status_code))
        return r.json()
    except KeyboardInterrupt:
        raise
    except BaseException as e:
        logging.error('Problem hitting URL: {}'.format(str(e)))
        return False
@@ -0,0 +1,69 @@
 def parse_extruct(s, data):
    rdfa_keys = {
        'title': [
            'http://ogp.me/ns#title',
            'https://ogp.me/ns#title',
        ],
        'date': [
            'http://ogp.me/ns/article#modified_time',
            'https://ogp.me/ns/article#modified_time',
            'http://ogp.me/ns/article#published_time',
            'https://ogp.me/ns/article#published_time',
        ]
    }
    for rdfa in data['rdfa']:
        for key, props in rdfa.items():
            for attribute, properties in rdfa_keys.items():
                for prop in properties:
                    if prop in props:
                        for values in props[prop]:
                            s[attribute] = values['@value']
    for og in data['opengraph']:
        titles = list(filter(None, [value if 'og:title' in key else None for key, value in og['properties']]))
        modified = list(filter(None, [value if 'article:modified_time' in key else None for key, value in og['properties']]))
        published = list(filter(None, [value if 'article:published_time' in key else None for key, value in og['properties']]))
        if len(modified):
            s['date'] = modified[0]
        if len(published):
            s['date'] = published[0]
        if len(titles):
            s['title'] = titles[0]
    for md in data['microdata']:
        if md['type'] in ['https://schema.org/NewsArticle', 'http://schema.org/NewsArticle']:
            props = md['properties']
            s['title'] = props['headline']
            if props['dateModified']:
                s['date'] = props['dateModified']
            if props['datePublished']:
                s['date'] = props['datePublished']
            if 'author' in props and props['author']:
                if 'properties' in props['author']:
                    s['author'] = props['author']['properties']['name']
                elif isinstance(props['author'], list):
                    s['author'] = props['author'][0]['properties']['name']
    for ld in data['json-ld']:
        if '@type' in ld and ld['@type'] in ['Article', 'NewsArticle']:
            s['title'] = ld['headline']
            if ld['dateModified']:
                s['date'] = ld['dateModified']
            if ld['datePublished']:
                s['date'] = ld['datePublished']
            if 'author' in ld and ld['author']:
                if 'name' in ld['author']:
                    s['author'] = ld['author']['name']
                elif isinstance(ld['author'], list):
                    s['author'] = ld['author'][0]['name']
        if '@graph' in ld:
            for gld in ld['@graph']:
                if '@type' in gld and gld['@type'] in ['Article', 'NewsArticle']:
                    s['title'] = gld['headline']
                    if gld['dateModified']:
                        s['date'] = gld['dateModified']
                    if gld['datePublished']:
                        s['date'] = gld['datePublished']
    return s
@@ -0,0 +1,101 @@
 import logging
 logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        level=logging.DEBUG)
 import re
 import requests
 from bs4 import BeautifulSoup
 from scrapers import declutter
 import extruct
 import settings
 from utils import clean
 from misc.metadata import parse_extruct
 from misc.time import unix
 from misc.api import xml
 def comment(i):
    if 'author' not in i:
        return False
    c = {}
    c['author'] = i.get('author', '')
    c['score'] = i.get('points', 0)
    c['date'] = unix(i.get('date', 0))
    c['text'] = clean(i.get('text', '') or '')
    c['comments'] = [comment(j) for j in i['children']]
    c['comments'] = list(filter(bool, c['comments']))
    return c
 def comment_count(i):
    alive = 1 if i['author'] else 0
    return sum([comment_count(c) for c in i['comments']]) + alive
 class Base:
    def __init__(config):
        self.config = config
        self.url = config.get('url')
        self.tz = config.get('tz')
    def get_id(self, link):
        patterns = self.config.get('patterns')
        if not patterns:
            return link
        patterns = [re.compile(p) for p in patterns]
        patterns = list(filter(None, [p.match(link) for p in patterns]))
        patterns = list(set([':'.join(p.groups()) for p in patterns]))
        if not patterns:
            return link
        return patterns[0]
    def feed(self, excludes=None):
        return []
    def story(self, ref, urlref):
        if urlref is None:
            return False
        markup = xml(lambda x: urlref)
        if not markup:
            return False
        s = {}
        s['author_link'] = ''
        s['score'] = 0
        s['comments'] = []
        s['num_comments'] = 0
        s['link'] = urlref
        s['url'] = urlref
        s['date'] = 0
        soup = BeautifulSoup(markup, features='html.parser')
        icon32 = soup.find_all('link', rel="icon", href=True, sizes="32x32")
        icon16 = soup.find_all('link', rel="icon", href=True, sizes="16x16")
        favicon = soup.find_all('link', rel="shortcut icon", href=True)
        others = soup.find_all('link', rel="icon", href=True)
        icons = icon32 + icon16 + favicon + others
        base_url = '/'.join(urlref.split('/')[:3])
        icons = list(set([i.get('href') for i in icons]))
        icons = [i if i.startswith('http') else base_url + i for i in icons]
        if icons:
            s['icon'] = icons[0]
        data = extruct.extract(markup)
        s = parse_extruct(s, data)
        if s['date']:
            s['date'] = unix(s['date'], tz=self.tz)
        if 'disqus' in markup:
            try:
                s['comments'] = declutter.get_comments(urlref)
                c['comments'] = list(filter(bool, c['comments']))
                s['num_comments'] = comment_count(s['comments'])
            except KeyboardInterrupt:
                raise
            except:
                pass
        if not s['date']:
            return False
        return s
@@ -0,0 +1,18 @@
 import pytz
 import dateutil.parser
 TZINFOS = {
    'NZDT': pytz.timezone('Pacific/Auckland'),
    'NZST': pytz.timezone('Pacific/Auckland')
 }
 def unix(date_str, tz=None, tzinfos=TZINFOS):
    try:
        dt = dateutil.parser.parse(date_str, tzinfos=tzinfos)
        if tz:
            dt = pytz.timezone(tz).localize(dt)
        return int(dt.timestamp())
    except:
        pass
    return 0
@@ -4,19 +4,21 @@ certifi==2020.6.20
 chardet==3.0.4
 click==7.1.2
 commonmark==0.9.1
 extruct==0.10.0
 Flask==1.1.2
 Flask-Cors==3.0.8
 gevent==20.6.2
 greenlet==0.4.16
 humanize==4.10.0
 idna==2.10
 itsdangerous==1.1.0
 Jinja2==2.11.2
 lxml==4.6.1
 MarkupSafe==1.1.1
 packaging==20.4
 praw==6.4.0
 prawcore==1.4.0
 pyparsing==2.4.7
 pytz==2020.4
 requests==2.24.0
 six==1.15.0
 soupsieve==2.0.1
@@ -28,3 +30,4 @@ websocket-client==0.57.0
 Werkzeug==1.0.1
 zope.event==4.4
 zope.interface==5.1.0
 python-dateutil==2.8.1
@@ -0,0 +1,41 @@
 import logging
 logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        level=logging.DEBUG)
 import requests
 READ_API = 'http://127.0.0.1:33843/browser/details'
 READ_COMMENT__API = 'http://127.0.0.1:33843/browser/commentd'
 TIMEOUT = 60
 def get_html(url):
    logging.info(f"Reader Scraper: {url}")
    details = get_details(url)
    if not details:
        return ''
    return details['content']
 def get_details(url):
    try:
        r = requests.post(READ_API, data=dict(url=url), timeout=TIMEOUT)
        if r.status_code != 200:
            raise Exception('Bad response code ' + str(r.status_code))
        return r.json()
    except KeyboardInterrupt:
        raise
    except BaseException as e:
        logging.error('Problem Scraping article: {}'.format(str(e)))
        return None
 def get_comments(url):
    try:
        r = requests.post(READ_COMMENT_API, data=dict(url=url), timeout=TIMEOUT)
        if r.status_code != 200:
            raise Exception('Bad response code ' + str(r.status_code))
        return r.json()
    except KeyboardInterrupt:
        raise
    except BaseException as e:
        logging.error('Problem getting comments for article: {}'.format(str(e)))
        return None
@@ -0,0 +1,41 @@
 import logging
 logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        level=logging.DEBUG)
 import requests
 DECLUTTER_API = 'https://declutter.1j.nz/details'
 DECLUTTER_COMMENT_API = 'https://declutter.1j.nz/comments'
 TIMEOUT = 30
 def get_html(url):
    logging.info(f"Declutter Scraper: {url}")
    details = get_details(url)
    if not details:
        return ''
    return details['content']
 def get_details(url):
    try:
        r = requests.post(DECLUTTER_API, data=dict(url=url), timeout=TIMEOUT)
        if r.status_code != 200:
            raise Exception('Bad response code ' + str(r.status_code))
        return r.json()
    except KeyboardInterrupt:
        raise
    except BaseException as e:
        logging.error('Problem decluttering article: {}'.format(str(e)))
        return None
 def get_comments(url):
    try:
        r = requests.post(DECLUTTER_COMMENT_API, data=dict(url=url), timeout=TIMEOUT)
        if r.status_code != 200:
            raise Exception('Bad response code ' + str(r.status_code))
        return r.json()
    except KeyboardInterrupt:
        raise
    except BaseException as e:
        logging.error('Problem getting comments for article: {}'.format(str(e)))
        return None
@@ -0,0 +1,27 @@
 import logging
 logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        level=logging.DEBUG)
 import requests
 READ_API = 'http://127.0.0.1:33843/details'
 TIMEOUT = 20
 def get_html(url):
    logging.info(f"Local Scraper: {url}")
    details = get_details(url)
    if not details:
        return ''
    return details['content']
 def get_details(url):
    try:
        r = requests.post(READ_API, data=dict(url=url), timeout=TIMEOUT)
        if r.status_code != 200:
            raise Exception('Bad response code ' + str(r.status_code))
        return r.json()
    except KeyboardInterrupt:
        raise
    except BaseException as e:
        logging.error('Problem getting article: {}'.format(str(e)))
        return None
@@ -0,0 +1,37 @@
 import logging
 logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        level=logging.DEBUG)
 import requests
 OUTLINE_REFERER = 'https://outline.com/'
 OUTLINE_API = 'https://api.outline.com/v3/parse_article'
 TIMEOUT = 20
 def get_html(url):
    details = get_details(url)
    if not details:
        return ''
    return details['html']
 def get_details(url):
    try:
        logging.info(f"Outline Scraper: {url}")
        params = {'source_url': url}
        headers = {'Referer': OUTLINE_REFERER}
        r = requests.get(OUTLINE_API, params=params, headers=headers, timeout=TIMEOUT)
        if r.status_code == 429:
            logging.info('Rate limited by outline, sleeping 30s and skipping...')
            time.sleep(30)
            return None
        if r.status_code != 200:
            raise Exception('Bad response code ' + str(r.status_code))
        data = r.json()['data']
        if 'URL is not supported by Outline' in data['html']:
            raise Exception('URL not supported by Outline')
        return data
    except KeyboardInterrupt:
        raise
    except BaseException as e:
        logging.error('Problem outlining article: {}'.format(str(e)))
        return None
@@ -1,58 +0,0 @@
 import time
 import json
 import logging
 import feed
 import database
 import search
 database.init()
 def fix_gzip_bug(story_list):
    FIX_THRESHOLD = 150
    count = 1
    for sid in story_list:
        try:
            sid = sid[0]
            story = database.get_story(sid)
            full_json = json.loads(story.full_json)
            meta_json = json.loads(story.meta_json)
            text = full_json.get('text', '')
            count = text.count('�')
            if not count: continue
            ratio = count / len(text) * 1000
            print('Bad story:', sid, 'Num ?:', count, 'Ratio:', ratio)
            if ratio < FIX_THRESHOLD: continue
            print('Attempting to fix...')
            valid = feed.update_story(meta_json, is_manual=True)
            if valid:
                database.put_story(meta_json)
                search.put_story(meta_json)
                print('Success')
            else:
                print('Story was not valid')
            time.sleep(3)
        except KeyboardInterrupt:
            raise
        except BaseException as e:
            logging.exception(e)
            breakpoint()
 if __name__ == '__main__':
    num_stories = database.count_stories()
    print('Fix {} stories?'.format(num_stories))
    print('Press ENTER to continue, ctrl-c to cancel')
    input()
    story_list = database.get_story_list()
    fix_gzip_bug(story_list)
@@ -1,62 +0,0 @@
 import logging
 logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        level=logging.INFO)
 import database
 from sqlalchemy import select
 import search
 import sys
 import time
 import json
 import requests
 database.init()
 search.init()
 BATCH_SIZE = 5000
 def put_stories(stories):
    return search.meili_api(requests.post, 'indexes/qotnews/documents', stories)
 def get_update(update_id):
    return search.meili_api(requests.get, 'tasks/{}'.format(update_id))
 if __name__ == '__main__':
    num_stories = database.count_stories()
    print('Reindex {} stories?'.format(num_stories))
    print('Press ENTER to continue, ctrl-c to cancel')
    input()
    story_list = database.get_story_list()
    count = 1
    while len(story_list):
        stories = []
        for _ in range(BATCH_SIZE):
            try:
                sid = story_list.pop()
            except IndexError:
                break
            story = database.get_story(sid)
            print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title))
            story_obj = json.loads(story.meta_json)
            stories.append(story_obj)
            count += 1
        res = put_stories(stories)
        update_id = res['uid']
        print('Waiting for processing', end='')
        while get_update(update_id)['status'] != 'succeeded':
            time.sleep(0.5)
            print('.', end='', flush=True)
        print()
    print('Done.')
@@ -1,23 +0,0 @@
 import time
 import requests
 def test_search_api():
    num_tests = 100
    total_time = 0
    for i in range(num_tests):
        start = time.time()
        res = requests.get('http://127.0.0.1:33842/api/search?q=iphone')
        res.raise_for_status()
        duration = time.time() - start
        total_time += duration
    avg_time = total_time / num_tests
    print('Average search time:', avg_time)
 if __name__ == '__main__':
    test_search_api()
@@ -4,57 +4,83 @@ logging.basicConfig(
        level=logging.DEBUG)
 import requests
 import settings
-SEARCH_ENABLED = bool(settings.MEILI_URL)
+MEILI_URL = 'http://127.0.0.1:7700/'
-def meili_api(method, route, json=None, params=None, parse_json=True):
+def create_index():
    try:
-        headers = {'Authorization': 'Bearer ' + settings.MEILI_API_KEY}
+        json = dict(name='qotnews', uid='qotnews')
-        r = method(settings.MEILI_URL + route, json=json, params=params, timeout=4)
+        r = requests.post(MEILI_URL + 'indexes', json=json, timeout=2)
-        if r.status_code > 299:
+        if r.status_code != 201:
            raise Exception('Bad response code ' + str(r.status_code))
-        if parse_json:
+        return r.json()
            return r.json()
        else:
            r.encoding = 'utf-8'
            return r.text
    except KeyboardInterrupt:
        raise
    except BaseException as e:
-        logging.error('Problem with MeiliSearch api route: %s: %s', route, str(e))
+        logging.error('Problem creating MeiliSearch index: {}'.format(str(e)))
        return False
-def create_index():
+def update_rankings():
-    json = dict(uid='qotnews', primaryKey='id')
+    try:
-    return meili_api(requests.post, 'indexes', json=json)
+        json = ['typo', 'words', 'proximity', 'attribute', 'desc(date)', 'wordsPosition', 'exactness']
        r = requests.post(MEILI_URL + 'indexes/qotnews/settings/ranking-rules', json=json, timeout=2)
        if r.status_code != 202:
            raise Exception('Bad response code ' + str(r.status_code))
        return r.json()
    except KeyboardInterrupt:
        raise
    except BaseException as e:
        logging.error('Problem setting MeiliSearch ranking rules: {}'.format(str(e)))
        return False
-def update_settings():
+def update_attributes():
-    json = {
+    try:
-        'rankingRules': ['typo', 'words', 'proximity', 'date:desc', 'exactness'],
+        json = ['title', 'url', 'author', 'link', 'id', 'source']
-        'searchableAttributes': ['title', 'url', 'author'],
+        r = requests.post(MEILI_URL + 'indexes/qotnews/settings/searchable-attributes', json=json, timeout=2)
-        'displayedAttributes': ['id', 'ref', 'source', 'author', 'author_link', 'score', 'date', 'title', 'link', 'url', 'num_comments'],
+        if r.status_code != 202:
-    }
+            raise Exception('Bad response code ' + str(r.status_code))
-    return meili_api(requests.post, 'indexes/qotnews/settings', json=json)
+        requests.delete(MEILI_URL + 'indexes/qotnews/settings/displayed-attributes', timeout=2)
        return r.json()
    except KeyboardInterrupt:
        raise
    except BaseException as e:
        logging.error('Problem setting MeiliSearch searchable attributes: {}'.format(str(e)))
        return False
 def init():
-    if not SEARCH_ENABLED:
+    create_index()
-        logging.info('Search is not enabled, skipping init.')
+    update_rankings()
-        return
+    update_attributes()
    print(create_index())
    update_settings()
 def put_story(story):
-    if not SEARCH_ENABLED: return
+    story = story.copy()
-    return meili_api(requests.post, 'indexes/qotnews/documents', [story])
+    story.pop('text', None)
    story.pop('comments', None)
    try:
        r = requests.post(MEILI_URL + 'indexes/qotnews/documents', json=[story], timeout=2)
        if r.status_code != 202:
            raise Exception('Bad response code ' + str(r.status_code))
        return r.json()
    except KeyboardInterrupt:
        raise
    except BaseException as e:
        logging.error('Problem putting MeiliSearch story: {}'.format(str(e)))
        return False
 def search(q):
-    if not SEARCH_ENABLED: return []
+    try:
-    json = dict(q=q, limit=settings.FEED_LENGTH)
+        params = dict(q=q, limit=250)
-    r = meili_api(requests.post, 'indexes/qotnews/search', json=json, parse_json=False)
+        r = requests.get(MEILI_URL + 'indexes/qotnews/search', params=params, timeout=2)
-    return r
+        if r.status_code != 200:
            raise Exception('Bad response code ' + str(r.status_code))
        return r.json()['hits']
    except KeyboardInterrupt:
        raise
    except BaseException as e:
        logging.error('Problem searching MeiliSearch: {}'.format(str(e)))
        return False
 if __name__ == '__main__':
-    init()
+    create_index()
-    print(search('facebook'))
+    print(search('the'))
@@ -1,8 +1,7 @@
-import os, logging
+import logging
 DEBUG = os.environ.get('DEBUG')
 logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-        level=logging.DEBUG if DEBUG else logging.INFO)
+        level=logging.INFO)
 import gevent
 from gevent import monkey
@@ -14,154 +13,53 @@ import json
 import threading
 import traceback
 import time
 import datetime
 import humanize
 import urllib.request
 from urllib.parse import urlparse, parse_qs
 import settings
 import database
 import search
 import feed
-from utils import gen_rand_id, NUM_ID_CHARS
+from utils import gen_rand_id
 from flask import abort, Flask, request, render_template, stream_with_context, Response
 from werkzeug.exceptions import NotFound
 from flask_cors import CORS
 smallweb_set = set()
 def load_smallweb_list():
    EXCLUDED = [
        'github.com',
    ]
    global smallweb_set
    try:
        url = 'https://raw.githubusercontent.com/kagisearch/smallweb/refs/heads/main/smallweb.txt'
        with urllib.request.urlopen(url, timeout=10) as response:
            urls = response.read().decode('utf-8').splitlines()
            hosts = {urlparse(u).hostname for u in urls if u and urlparse(u).hostname}
            smallweb_set = {h.replace('www.', '') for h in hosts if h not in EXCLUDED}
            logging.info('Loaded {} smallweb domains.'.format(len(smallweb_set)))
    except Exception as e:
        logging.error('Failed to load smallweb list: {}'.format(e))
 load_smallweb_list()
 database.init()
 search.init()
 news_index = 0
 ref_list = []
 current_item = {}
 def new_id():
    nid = gen_rand_id()
    while database.get_story(nid):
        nid = gen_rand_id()
    return nid
-
+build_folder = '../webclient/build'
 def fromnow(ts):
    return humanize.naturaltime(datetime.datetime.fromtimestamp(ts))
 build_folder = './build'
 flask_app = Flask(__name__, template_folder=build_folder, static_folder=build_folder, static_url_path='')
 flask_app.jinja_env.filters['fromnow'] = fromnow
 cors = CORS(flask_app)
@flask_app.route('/api')
 def api():
-    skip = request.args.get('skip', 0)
+    stories = database.get_stories(settings.MAX_STORY_AGE)
-    limit = request.args.get('limit', settings.FEED_LENGTH)
+    res = Response(json.dumps({"stories": stories}))
    is_smallweb_filter = request.args.get('smallweb') == 'true' and smallweb_set
    sources_filter = request.args.getlist('source')
    if not is_smallweb_filter and not sources_filter:
        stories = database.get_stories(limit, skip)
    else:
        limit = int(limit)
        skip = int(skip)
        filtered_stories = []
        current_skip = skip
        while len(filtered_stories) < limit:
            stories_batch = database.get_stories(limit, current_skip)
            if not stories_batch:
                break
            for story_str in stories_batch:
                story = json.loads(story_str)
                if is_smallweb_filter:
                    story_url = story.get('url') or story.get('link') or ''
                    if not story_url:
                        continue
                    hostname = urlparse(story_url).hostname
                    if not hostname or hostname.replace('www.', '') not in smallweb_set:
                        continue
                if sources_filter:
                    if story.get('source') not in sources_filter:
                        continue
                filtered_stories.append(story_str)
                if len(filtered_stories) == limit:
                    break
            if len(filtered_stories) == limit:
                break
            current_skip += limit
        stories = filtered_stories
    # hacky nested json
    res = Response('{"stories":[' + ','.join(stories) + ']}')
    res.headers['content-type'] = 'application/json'
    return res
@flask_app.route('/api/stats', strict_slashes=False)
 def apistats():
    stats = {
        'news_index': news_index,
        'ref_list': ref_list,
        'len_ref_list': len(ref_list),
        'current_item': current_item,
        'total_stories': database.count_stories(),
        'id_space': 26**NUM_ID_CHARS,
    }
    return stats
@flask_app.route('/api/search', strict_slashes=False)
 def apisearch():
    q = request.args.get('q', '')
    if len(q) >= 3:
        results = search.search(q)
    else:
-        results = '[]'
+        results = []
-    res = Response(results)
+    return dict(results=results)
    res.headers['content-type'] = 'application/json'
    return res
@flask_app.route('/api/submit', methods=['POST'], strict_slashes=False)
 def submit():
    try:
        url = request.form['url']
        for prefix in ['http://', 'https://']:
            if url.lower().startswith(prefix):
                break
        else:  # for
            url = 'http://' + url
        nid = new_id()
        logging.info('Manual submission: ' + url)
        parse = urlparse(url)
        if 'news.ycombinator.com' in parse.hostname:
            source = 'hackernews'
@@ -169,24 +67,16 @@ def submit():
        elif 'tildes.net' in parse.hostname and '~' in url:
            source = 'tildes'
            ref = parse.path.split('/')[2]
        elif 'lobste.rs' in parse.hostname and '/s/' in url:
            source = 'lobsters'
            ref = parse.path.split('/')[2]
        elif 'reddit.com' in parse.hostname and 'comments' in url:
            source = 'reddit'
            ref = parse.path.split('/')[4]
-        elif 'news.t0.vc' in parse.hostname:
+        elif settings.HOSTNAME in parse.hostname:
            raise Exception('Invalid article')
        else:
            source = 'manual'
            ref = url
        existing = database.get_story_by_ref(ref)
        if existing and DEBUG:
            ref = ref + '#' + str(time.time())
            existing = False
        if existing:
            return {'nid': existing.sid}
        else:
@@ -195,28 +85,23 @@ def submit():
            if valid:
                database.put_story(story)
                search.put_story(story)
                if DEBUG:
                    logging.info('Adding manual ref: {}, id: {}, source: {}'.format(ref, nid, source))
                    database.put_ref(ref, nid, source)
                return {'nid': nid}
            else:
                raise Exception('Invalid article')
-    except Exception as e:
+    except BaseException as e:
-        msg = 'Problem with article submission: {} - {}'.format(e.__class__.__name__, str(e))
+        logging.error('Problem with article submission: {} - {}'.format(e.__class__.__name__, str(e)))
        logging.error(msg)
        print(traceback.format_exc())
-        return {'error': msg.split('\n')[0]}, 400
+        abort(400)
@flask_app.route('/api/<sid>')
 def story(sid):
    story = database.get_story(sid)
    if story:
-        # hacky nested json
+        related = database.get_stories_by_url(story.meta['url'])
-        res = Response('{"story":' + story.full_json + '}')
+        related = [r.meta for r in related]
        res = Response(json.dumps({"story": story.data, "related": related}))
        res.headers['content-type'] = 'application/json'
        return res
    else:
@@ -225,19 +110,10 @@ def story(sid):
@flask_app.route('/')
@flask_app.route('/search')
 def index():
    stories_json = database.get_stories(settings.FEED_LENGTH, 0)
    stories = [json.loads(s) for s in stories_json]
    for s in stories:
        url = urlparse(s.get('url') or s.get('link') or '').hostname or ''
        s['hostname'] = url.replace('www.', '')
    return render_template('index.html',
-        title='QotNews',
+            title='Feed',
-        url='news.t0.vc',
+            url=settings.HOSTNAME,
-        description='Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode',
+            description='Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode')
        robots='index',
        stories=stories,
    )
@flask_app.route('/<sid>', strict_slashes=False)
@flask_app.route('/<sid>/c', strict_slashes=False)
@@ -247,9 +123,9 @@ def static_story(sid):
    except NotFound:
        pass
-    story_obj = database.get_story(sid)
+    story = database.get_story(sid)
-    if not story_obj: return abort(404)
+    if not story: return abort(404)
-    story = json.loads(story_obj.full_json)
+    story = story.data
    score = story['score']
    num_comments = story['num_comments']
@@ -258,77 +134,69 @@ def static_story(sid):
            score, 's' if score != 1 else '',
            num_comments, 's' if num_comments != 1 else '',
            source)
-    url = urlparse(story.get('url') or story.get('link') or '').hostname or ''
+    url = urlparse(story['url']).hostname or urlparse(story['link']).hostname or ''
    url = url.replace('www.', '')
    return render_template('index.html',
-        title=story['title'] + ' | QotNews',
+            title=story['title'],
-        url=url,
+            url=url,
-        description=description,
+            description=description)
        robots='noindex',
        story=story,
        show_comments=request.path.endswith('/c'),
    )
 http_server = WSGIServer(('', 33842), flask_app)
-def feed_thread():
+def _add_new_refs():
-    global news_index, ref_list, current_item
+    for ref, source, urlref in feed.get_list():
        if database.get_story_by_ref(ref):
            continue
        try:
            nid = new_id()
            database.put_ref(ref, nid, source, urlref)
            logging.info('Added ref ' + ref)
        except database.IntegrityError:
            continue
 def _update_current_story(item):
    try:
        story = database.get_story(item['sid']).data
    except AttributeError:
        story = dict(id=item['sid'], ref=item['ref'], source=item['source'])
    logging.info('Updating story: {}'.format(str(story['ref'])))
    valid = feed.update_story(story, urlref=item['urlref'])
    if valid:
        database.put_story(story)
        search.put_story(story)
    else:
        database.del_ref(item['ref'])
        logging.info('Removed ref {}'.format(item['ref']))
 def feed_thread():
    ref_list = []
    try:
        while True:
            # onboard new stories
-            if news_index == 0:
+            if not len(ref_list):
-                for ref, source in feed.list():
+                _add_new_refs()
-                    if database.get_story_by_ref(ref):
+                ref_list = database.get_reflist()
                        continue
                    try:
                        nid = new_id()
                        logging.info('Adding ref: {}, id: {}, source: {}'.format(ref, nid, source))
                        database.put_ref(ref, nid, source)
                    except database.IntegrityError:
                        logging.info('Already have ID / ref, skipping.')
                        continue
            ref_list = database.get_reflist(settings.FEED_LENGTH)
            # update current stories
-            if news_index < len(ref_list):
+            if len(ref_list):
-                current_item = ref_list[news_index]
+                item = ref_list.pop(0)
-
+                _update_current_story(item)
                try:
                    story_json = database.get_story(current_item['sid']).full_json
                    story = json.loads(story_json)
                except AttributeError:
                    story = dict(id=current_item['sid'], ref=current_item['ref'], source=current_item['source'])
                logging.info('Updating {} story: {}, index: {}'.format(story['source'], story['ref'], news_index))
                valid = feed.update_story(story)
                if valid:
                    database.put_story(story)
                    search.put_story(story)
                else:
                    database.del_ref(current_item['ref'])
                    logging.info('Removed ref {}'.format(current_item['ref']))
            else:
                logging.info('Skipping index: ' + str(news_index))
            gevent.sleep(6)
            news_index += 1
            if news_index == settings.FEED_LENGTH: news_index = 0
    except KeyboardInterrupt:
        logging.info('Ending feed thread...')
    except ValueError as e:
-        logging.critical('feed_thread error: {} {}'.format(e.__class__.__name__, e))
+        logging.error('feed_thread error: {} {}'.format(e.__class__.__name__, e))
        http_server.stop()
-logging.info('Starting Feed thread...')
+print('Starting Feed thread...')
 gevent.spawn(feed_thread)
-logging.info('Starting HTTP thread...')
+print('Starting HTTP thread...')
 try:
    http_server.serve_forever()
 except KeyboardInterrupt:
@@ -1,23 +1,57 @@
 # QotNews settings
 # edit this file and save it as settings.py
 HOSTNAME = 'news.t0.vc'
 MAX_STORY_AGE = 3*24*60*60
 # Feed Lengths
 # Number of top items from each site to pull
 # set to 0 to disable that site
 FEED_LENGTH = 75
 NUM_HACKERNEWS = 15
-NUM_LOBSTERS = 10
+NUM_REDDIT = 10
 NUM_REDDIT = 15
 NUM_TILDES = 5
 NUM_SUBSTACK = 10
-# Meilisearch server URL
+SITEMAP = {}
-# Leave blank if not using search
+# SITEMAP['nzherald'] = {
-#MEILI_URL = 'http://127.0.0.1:7700/'
+#     'url': "https://www.nzherald.co.nz/arcio/news-sitemap/",
-MEILI_URL = ''
+#     'count': 20,
 #     'patterns': [
 #         r'^https:\/\/www\.(nzherald\.co\.nz)\/.*\/([^/]+)\/?$',
 #     ],
 #     'excludes': [
 #         'driven.co.nz',
 #         'oneroof.co.nz',
 #         'nzherald.co.nz/sponsored-stories',
 #         'nzherald.co.nz/entertainment/',
 #         'nzherald.co.nz/lifestyle/',
 #         'nzherald.co.nz/travel/',
 #         'nzherald.co.nz/sport/',
 #         'nzherald.co.nz/promotions/',
 #         'nzherald.co.nzhttp',
 #         'herald-afternoon-quiz',
 #         'herald-morning-quiz'
 #     ],
 # }
-# Readerserver URL
+SUBSTACK = {}
-# Leave blank if not using, but that defeats the whole point
+# SUBSTACK['webworm'] = { 'url': "https://www.webworm.co", 'count': 10},
-READER_URL = 'http://127.0.0.1:33843/'
+# SUBSTACK['the bulletin'] = { 'url': "https://thespinoff.substack.com", 'count': 10},
 CATEGORY = {}
 # CATEGORY['radionz'] = {
 #     'url': "https://www.rnz.co.nz/news/",
 #     'count': 20,
 #     'patterns': [
 #         r'https:\/\/www\.(rnz\.co\.nz)\/news\/[^\/]+\/(\d+)\/[^\/]+\/?'
 #     ],
 #     'excludes': [
 #         'rnz.co.nz/news/sport',
 #         'rnz.co.nz/weather',
 #     ],
 # }
 SCRAPERS = ['browser', 'declutter', 'outline', 'local']
 # Reddit account info
 # leave blank if not using Reddit
@@ -25,6 +59,10 @@ REDDIT_CLIENT_ID = ''
 REDDIT_CLIENT_SECRET = ''
 REDDIT_USER_AGENT = ''
 # Minimum points or number of comments before including a thread:
 REDDIT_COMMENT_THRESHOLD = 10
 REDDIT_SCORE_THRESHOLD = 25
 SUBREDDITS = [
    'Economics',
    'AcademicPhilosophy',
@@ -33,9 +71,13 @@ SUBREDDITS = [
    'HistoryofIdeas',
    'LaymanJournals',
    'PhilosophyofScience',
    'PoliticsPDFs',
    'Scholar',
    'StateOfTheUnion',
    'TheAgora',
    'TrueFilm',
    'TrueReddit',
    'UniversityofReddit',
    'culturalstudies',
    'hardscience',
    'indepthsports',
@@ -44,7 +86,4 @@ SUBREDDITS = [
    'neurophilosophy',
    'resilientcommunities',
    'worldevents',
    'StallmanWasRight',
    'EverythingScience',
    'longevity',
 ]
@@ -8,17 +8,8 @@ import string
 from bleach.sanitizer import Cleaner
 def alert_tanner(message):
    try:
        logging.info('Alerting Tanner: ' + message)
        params = dict(qotnews=message)
        requests.get('https://tbot.tanner.vc/message', params=params, timeout=4)
    except BaseException as e:
        logging.error('Problem alerting Tanner: ' + str(e))
 NUM_ID_CHARS = 4
 def gen_rand_id():
-    return ''.join(random.choice(string.ascii_uppercase) for _ in range(NUM_ID_CHARS))
+    return ''.join(random.choice(string.ascii_uppercase) for _ in range(4))
 def render_md(md):
    if md:
@@ -1,53 +1,29 @@
 const port = 33843;
 const express = require('express');
 const app = express();
-const port = 33843;
+const simple = require('./scraper/simple');
-
+const browser = require('./scraper/browser');
 const request = require('request');
 const JSDOM = require('jsdom').JSDOM;
 const { Readability } = require('readability');
 app.use(express.urlencoded({ extended: true }));
 app.get('/', (req, res) => {
-	res.send('<form method="POST" accept-charset="UTF-8"><input name="url"><button type="submit">SUBMIT</button></form>');
+	const routes = ['/', '/details', '/browser', '/browser/details', '/browser/comments'];
-});
+
-
+	const html = routes.map(route => `
-const requestCallback = (url, res) => (error, response, body) => {
+	<form method="POST" action="${route}" accept-charset="UTF-8">
-	if (!error && response.statusCode == 200) {
+		<fieldset>
-		console.log('Response OK.');
+			<legend>route: POST ${route}</legend>
-
+			<input name="url">
-		const doc = new JSDOM(body, {url: url});
+			<button type="submit">SUBMIT</button>
-		const reader = new Readability(doc.window.document);
+		</fieldset>
-		const article = reader.parse();
+	</form>`).join('<hr />');
-
+	res.send(html);
 		if (article && article.content) {
 			res.send(article.content);
 		} else {
 			res.sendStatus(404);
 		}
 	} else {
 		console.log('Response error:', error ? error.toString() : response.statusCode);
 		res.sendStatus(response ? response.statusCode : 404);
 	}
 };
 app.post('/', (req, res) => {
 	const url = req.body.url;
 	const requestOptions = {
 		url: url,
 		gzip: true,
 		//headers: {'User-Agent': 'Googlebot/2.1 (+http://www.google.com/bot.html)'},
 		//headers: {'User-Agent': 'Twitterbot/1.0'},
 		headers: {
 			'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0',
 			'X-Forwarded-For': '66.249.66.1',
 		},
 	};
 	console.log('Parse request for:', url);
 	request(requestOptions, requestCallback(url, res));
 });
 app.post('/', simple.scrape);
 app.post('/details', simple.details);
 app.post('/browser', browser.scrape);
 app.post('/browser/details', browser.details);
 app.post('/browser/comments', browser.comments);
 app.listen(port, () => {
 	console.log(`Example app listening on port ${port}!`);
@@ -4,10 +4,12 @@
  "main": "main.js",
  "license": "MIT",
  "dependencies": {
    "@mozilla/readability": "^0.3.0",
    "dompurify": "^1.0.11",
    "express": "^4.17.1",
    "jsdom": "^15.1.1",
-    "readability": "https://github.com/mozilla/readability",
+    "node-fetch": "^2.6.1",
    "playwright": "^1.5.2",
    "request": "^2.88.0"
  }
 }
@@ -0,0 +1,45 @@
 const { firefox } = require("playwright");
 const { JSDOM } = require("jsdom");
 const { Readability } = require("@mozilla/readability");
 const { getUserAgent } = require('../../utils/user-agent');
 const { blockedRegexes, matchUrlDomain } = require("../../utils/sites");
 module.exports.getDetails = async (url) => {
 	const { userAgent, headers } = getUserAgent(url);
 	const browser = await firefox.launch({ args: [], headless: true });
 	const tab = await browser.newPage({
 		extraHTTPHeaders: headers,
 		userAgent,
 		viewport: { width: 2000, height: 10000 },
 	});
 	try {
 		await tab.route(/.*/, (route) => {
 			const routeUrl = route.request().url();
 			const blockedDomains = Object.keys(blockedRegexes);
 			const domain = matchUrlDomain(blockedDomains, routeUrl);
 			if (domain && routeUrl.match(blockedRegexes[domain])) {
 				return route.abort();
 			}
 			return route.continue();
 		});
 		await tab.addInitScript({ path: "scraper/browser/scripts/bypass-paywalls-chrome/src/js/contentScript.js" });
 		await tab.addInitScript({ path: "scraper/browser/scripts/cosmetic-filters.js" });
 		await tab.addInitScript({ path: "scraper/browser/scripts/fix-relative-links.js" });
 		await tab.goto(url, { timeout: 60000, waitUntil: "domcontentloaded" });
 		await tab.waitForTimeout(2000);
 		const body = await tab.content();
 		const doc = new JSDOM(body, { url });
 		const reader = new Readability(doc.window.document);
 		const article = reader.parse();
 		return article;
 	} catch (e) {
 		throw e;
 	} finally {
 		await tab.close();
 		await browser.close();
 	}
 };
@@ -0,0 +1,34 @@
 const { JSDOM } = require("jsdom");
 const { firefox } = require("playwright");
 const { getUserAgent } = require('../../utils/user-agent');
 const { disqusThread } = require('../../utils/disqus-thread');
 const DISQUS_EMBED = 'https://disqus.com/embed/comments/';
 module.exports.getComments = async (url) => {
 	const { userAgent, headers } = getUserAgent(url);
 	const browser = await firefox.launch({ args: [], headless: true });
 	const tab = await browser.newPage({
 		extraHTTPHeaders: headers,
 		userAgent,
 		viewport: { width: 2000, height: 10000 },
 	});
 	try {
 		await tab.goto(url, { timeout: 60000, waitUntil: "domcontentloaded" });
 		const response = await tab.waitForResponse(response => response.url().includes(DISQUS_EMBED));
 		const text = await response.text();
 		const dom = new JSDOM(text, response.url());
 		const script = dom.window.document.querySelector('#disqus-threadData')
 		const data = JSON.parse(script.innerHTML);
 		return disqusThread(data);
 	} catch (e) {
 		throw e;
 	} finally {
 		await tab.close();
 		await browser.close();
 	}
 };
@@ -0,0 +1,40 @@
 const { getDetails } = require('./_browser');
 const { getComments } = require('./_comments');
 module.exports.scrape = async (req, res) => {
 	try {
 		const article = await getDetails(req.body.url);
 		if (!article || !article.content) {
 			throw new Error('failed to get details.');
 		}
 		return res.send(article.content);
 	} catch (e) {
 		return res.sendStatus(500);
 	}
 };
 module.exports.details = async (req, res) => {
 	try {
 		const article = await getDetails(req.body.url);
 		if (!article) {
 			throw new Error('failed to get details.');
 		}
 		return res.send(article);
 	} catch (e) {
 		console.log(e);
 		return res.sendStatus(500);
 	}
 };
 module.exports.comments = async (req, res) => {
 	try {
 		const comments = await getComments(req.body.url);
 		if (!comments) {
 			throw new Error('failed to get comments.');
 		}
 		return res.send(comments);
 	} catch (e) {
 		console.log(e);
 		return res.sendStatus(500);
 	}
 };
@@ -0,0 +1,99 @@
 (function () {
 	removeHiddenElements();
 	if (matchDomain("stuff.co.nz")) {
 		removeSelectors([
 			".support-brief-container",
 			'[class*="donation-in-"]',
 			".sics-component__sharebar",
 			".breaking-news-pointer",
 			".bigbyline-container",
 			[
 				".sics-component__html-injector.sics-component__story__paragraph",
 				"READ MORE:",
 			],
 		]);
 	}
 	if (matchDomain("nzherald.co.nz")) {
 		removeSelectors([
 			"[href$='#commenting-widget']",
 			".related-articles",
 			".article__print-button",
 			".share-bar",
 			".c-suggest-links.read-more-links",
 			".website-of-year",
 			".meta-data",
 			".article__kicker",
 			".author__image",
 		]);
 	}
 	if (matchDomain(["rnz.co.nz", "radionz.co.nz"])) {
 		removeSelectors([".c-advert-app", ".c-sub-nav"]);
 	}
 	if (matchDomain(["newsroom.co.nz"])) {
 		removeSelectors([".article_content__section", ".bio"]);
 	}
 	if (matchDomain(["newshub.co.nz"])) {
 		removeSelectors([".c-ArticleHeading-authorPicture", ".relatedarticles"]);
 	}
 	if (matchDomain(["tvnz.co.nz"])) {
 		removeSelectors([".signup-container container"]);
 	}
 	if (matchDomain(["thespinoff.co.nz"])) {
 		removeSelectors([".the-spinoff-club-interruptive", ".bulletin-signup"]);
 	}
 	function matchDomain(domains) {
 		const hostname = window.location.hostname;
 		if (typeof domains === "string") {
 			domains = [domains];
 		}
 		return domains.some(
 			(domain) => hostname === domain || hostname.endsWith("." + domain)
 		);
 	}
 	function removeDOMElement(...elements) {
 		for (const element of elements) {
 			if (element) {
 				element.remove();
 			}
 		}
 	}
 	function pageContains(selector, text) {
 		const elements = document.querySelectorAll(selector);
 		return Array.prototype.filter.call(elements, function (element) {
 			return RegExp(text).test(element.textContent);
 		});
 	}
 	function removeHiddenElements() {
 		window.setTimeout(function () {
 			const selector = "*:not(script):not(head):not(meta):not(link):not(style)";
 			Array.from(document.querySelectorAll(selector))
 				.filter((element) => {
 					const computed = getComputedStyle(element);
 					const displayNone = computed["display"] === "none";
 					const visibilityHidden = computed["visibility"] === "hidden";
 					return displayNone || visibilityHidden;
 				})
 				.forEach((element) => element && element.remove());
 		}, 1000);
 	}
 	function removeSelectors(selectors) {
 		window.setTimeout(function () {
 			const elements = selectors.flatMap((s) => {
 				if (typeof s === "string") {
 					return Array.from(document.querySelectorAll(s));
 				}
 				if (s && s.constructor.name === "Array") {
 					return pageContains(...s);
 				}
 				return undefined;
 			});
 			removeDOMElement(...elements);
 		}, 1000);
 	}
 })();
@@ -0,0 +1,14 @@
 (function () {
 	const { host, protocol } = window.location;
 	const url = `${protocol}//${host}`;
 	[
 		['[src^="/"]', 'src'],
 		['[href^="/"]', 'href']
 	].forEach(([selector, attribute]) => {
 		Array.from(document.querySelectorAll(selector))
 			.filter(e => e.attributes[attribute] && /^\/[^\/]/.test(e.attributes[attribute].value))
 			.forEach((e) => {
 				e.attributes[attribute].value = `${url}${e.attributes[attribute].value}`;
 			});
 	});
 })();
@@ -0,0 +1,59 @@
 const fetch = require('node-fetch');
 const { JSDOM } = require('jsdom');
 const { Readability } = require('@mozilla/readability');
 const { getUserAgent } = require('../utils/user-agent');
 const extract = (url, body) => {
 	const doc = new JSDOM(body, { url: url });
 	const reader = new Readability(doc.window.document);
 	return reader.parse();
 };
 module.exports.scrape = async (req, res) => {
 	try {
 		const { userAgent, headers } = getUserAgent(req.body.url);
 		const response = await fetch(req.body.url, {
 			headers: {
 				...headers,
 				'User-Agent': userAgent
 			}
 		});
 		if (!response.ok) {
 			return res.sendStatus(response.statusCode);
 		}
 		const html = await response.text();
 		const article = await extract(req.body.url, html);
 		if (article && article.content) {
 			return res.send(article.content);
 		}
 		return res.sendStatus(404);
 	} catch (e) {
 		console.error(e);
 		return res.sendStatus(500);
 	}
 };
 module.exports.details = async (req, res) => {
 	try {
 		const { userAgent, headers } = getUserAgent(req.body.url);
 		const response = await fetch(req.body.url, {
 			headers: {
 				...headers,
 				'User-Agent': userAgent
 			}
 		});
 		if (!response.ok) {
 			return res.sendStatus(response.statusCode);
 		}
 		const html = await response.text();
 		const article = await extract(req.body.url, html);
 		if (article) {
 			return res.send(article);
 		}
 		return res.sendStatus(404);
 	} catch (e) {
 		console.error(e);
 		return res.sendStatus(500);
 	}
 };
@@ -0,0 +1,11 @@
 const googleBotUserAgent = 'Googlebot/2.1 (+http://www.google.com/bot.html)';
 const googleBotIp = '66.249.66.1';
 module.exports.googleBot = {
 	userAgent: googleBotUserAgent,
 	ip: googleBotIp,
 	headers: {
 		'User-Agent': googleBotUserAgent,
 		'X-Forwarded-For': googleBotIp,
 	}
 }
@@ -0,0 +1,21 @@
 module.exports.disqusThread = data => {
 	const comments = data.response.posts.reduce((c, post) => ({
 		...c,
 		[post.id.toString()]: {
 			author: post.author.name,
 			authorLink: post.author.profileUrl,
 			date: post.createdAt,
 			text: post.raw_message,
 			score: post.points,
 			children: [],
 			id: post.id.toString(),
 			parent: (post.parent || '').toString(),
 		}
 	}), {});
 	Object.keys(comments).filter(id => !!comments[id].parent).forEach(id => {
 		const comment = comments[id];
 		comments[comment.parent].children.push(comment);
 	});
 	const parents = Object.keys(comments).filter(id => comments[id].parent).map(id => comments[id]);
 	return parents;
 };
@@ -0,0 +1,98 @@
 module.exports.blockedRegexes = {
 	"adweek.com": /.+\.lightboxcdn\.com\/.+/,
 	"afr.com": /afr\.com\/assets\/vendorsReactRedux_client.+\.js/,
 	"businessinsider.com": /(.+\.tinypass\.com\/.+|cdn\.onesignal\.com\/sdks\/.+\.js)/,
 	"chicagotribune.com": /.+:\/\/.+\.tribdss\.com\//,
 	"economist.com": /(.+\.tinypass\.com\/.+|economist\.com\/engassets\/_next\/static\/chunks\/framework.+\.js)/,
 	"editorialedomani.it": /(js\.pelcro\.com\/.+|editorialedomani.it\/pelcro\.js)/,
 	"foreignpolicy.com": /.+\.tinypass\.com\/.+/,
 	"fortune.com": /.+\.tinypass\.com\/.+/,
 	"haaretz.co.il": /haaretz\.co\.il\/htz\/js\/inter\.js/,
 	"haaretz.com": /haaretz\.com\/hdc\/web\/js\/minified\/header-scripts-int.js.+/,
 	"inquirer.com": /.+\.tinypass\.com\/.+/,
 	"lastampa.it": /.+\.repstatic\.it\/minify\/sites\/lastampa\/.+\/config\.cache\.php\?name=social_js/,
 	"lrb.co.uk": /.+\.tinypass\.com\/.+/,
 	"nzherald.co.nz": /(.+nzherald\.co\.nz\/.+\/subs\/p\.js|.+nzherald\.co\.nz\/.+\/react\.js|.+nzherald\.co\.nz\/.+\/appear\.js|.+nzherald\.co\.nz\/.+\/tracking\/.+|.+nzherald\.co\.nz\/.+\/default\.js|.+\/newsbarscript\.js)/,
 	"medscape.com": /.+\.medscapestatic\.com\/.*medscape-library\.js/,
 	"interest.co.nz": /(.+\.presspatron\.com.+|.+interest\.co\.nz.+pp-ablock-banner\.js)/,
 	"repubblica.it": /scripts\.repubblica\.it\/pw\/pw\.js.+/,
 	"spectator.co.uk": /.+\.tinypass\.com\/.+/,
 	"spectator.com.au": /.+\.tinypass\.com\/.+/,
 	"telegraph.co.uk": /.+telegraph\.co\.uk.+martech.+/,
 	"thecourier.com.au": /.+cdn-au\.piano\.io\/api\/tinypass.+\.js/,
 	"thenation.com": /thenation\.com\/.+\/paywall-script\.php/,
 	"thenational.scot": /(.+\.tinypass\.com\/.+|.+thenational\.scot.+omniture\.js|.+thenational\.scot.+responsive-sync.+)/,
 	"thewrap.com": /thewrap\.com\/.+\/wallkit\.js/,
 	"wsj.com": /cdn\.ampproject\.org\/v\d\/amp-access-.+\.js/,
 	"historyextra.com": /.+\.evolok\.net\/.+\/authorize\/.+/,
 	"barrons.com": /cdn\.ampproject\.org\/v\d\/amp-access-.+\.js/,
 	"irishtimes.com": /cdn\.ampproject\.org\/v\d\/amp-access-.+\.js/,
 	"elmercurio.com": /(merreader\.emol\.cl\/assets\/js\/merPramV2.js|staticmer\.emol\.cl\/js\/inversiones\/PramModal.+\.js)/,
 	"sloanreview.mit.edu": /(.+\.tinypass\.com\/.+|.+\.netdna-ssl\.com\/wp-content\/themes\/smr\/assets\/js\/libs\/welcome-ad\.js)/,
 	"latercera.com": /.+\.cxense\.com\/+/,
 	"lesechos.fr": /.+\.tinypass\.com\/.+/,
 	"washingtonpost.com": /.+\.washingtonpost\.com\/.+\/pwapi-proxy\.min\.js/,
 	"thehindu.com": /ajax\.cloudflare\.com\/cdn-cgi\/scripts\/.+\/cloudflare-static\/rocket-loader\.min\.js/,
 	"technologyreview.com": /.+\.blueconic\.net\/.+/,
 };
 module.exports.useGoogleBotSites = [
 	"adelaidenow.com.au",
 	"barrons.com",
 	"couriermail.com.au",
 	"dailytelegraph.com.au",
 	"fd.nl",
 	"genomeweb.com",
 	"haaretz.co.il",
 	"haaretz.com",
 	"heraldsun.com.au",
 	"mexiconewsdaily.com",
 	"ntnews.com.au",
 	"quora.com",
 	"seekingalpha.com",
 	"telegraph.co.uk",
 	"theaustralian.com.au",
 	"themarker.com",
 	"themercury.com.au",
 	"thenational.scot",
 	"thetimes.co.uk",
 	"wsj.com",
 	"kansascity.com",
 	"republic.ru",
 	"nzz.ch",
 	"handelsblatt.com",
 	"washingtonpost.com",
 	"df.cl",
 ];
 function matchDomain(domains, hostname) {
 	let matchedDomain = false;
 	if (typeof domains === "string") {
 		domains = [domains];
 	}
 	domains.some(
 		(domain) =>
 			(hostname === domain || hostname.endsWith("." + domain)) &&
 			(matchedDomain = domain)
 	);
 	return matchedDomain;
 }
 function matchUrlDomain(domains, url) {
 	return matchDomain(domains, urlHost(url));
 }
 function urlHost(url) {
 	if (url && url.startsWith("http")) {
 		try {
 			return new URL(url).hostname;
 		} catch (e) {
 			console.log(`url not valid: ${url} error: ${e}`);
 		}
 	}
 	return url;
 }
 module.exports.matchDomain = matchDomain;
 module.exports.matchUrlDomain = matchUrlDomain;
 module.exports.urlHost = urlHost;
@@ -0,0 +1,18 @@
 const { googleBot } = require('./constants');
 const { matchUrlDomain, useGoogleBotSites } = require("./sites");
 module.exports.getUserAgent = (url) => {
 	const useGoogleBot = useGoogleBotSites.some(function (item) {
 		return typeof item === "string" && matchUrlDomain(item, url);
 	});
 	if (!useGoogleBot) {
 		return {};
 	}
 	return {
 		userAgent: googleBot.userAgent,
 		headers: {
 			"X-Forwarded-For": googleBot.ip
 		}
 	}
 };
@@ -3,7 +3,7 @@
 Download MeiliSearch with:
 ```
-wget https://github.com/meilisearch/meilisearch/releases/download/v0.27.0/meilisearch-linux-amd64
+wget https://github.com/meilisearch/MeiliSearch/releases/download/v0.11.1/meilisearch-linux-amd64
 chmod +x meilisearch-linux-amd64
 ```
@@ -4,14 +4,12 @@
  "private": true,
  "dependencies": {
    "abort-controller": "^3.0.0",
    "katex": "^0.16.25",
    "localforage": "^1.7.3",
    "moment": "^2.24.0",
    "query-string": "^6.8.3",
    "react": "^16.9.0",
    "react-dom": "^16.9.0",
    "react-helmet": "^5.2.1",
    "react-latex-next": "^3.0.0",
    "react-router-dom": "^5.0.1",
    "react-router-hash-link": "^1.2.2",
    "react-scripts": "3.1.1"
@@ -8,8 +8,6 @@
 			content="{{ description }}"
 		/>
 		<meta content="{{ url }}" name="og:site_name">
 		<meta name="robots" content="{{ robots }}">
 		<link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
 		<link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">
@@ -28,137 +26,26 @@
 			work correctly both with client-side routing and a non-root public URL.
 			Learn how to configure a non-root public URL by running `npm run build`.
 		-->
-		<title>{{ title }}</title>
+		<title>{{ title }} - QotNews</title>
 		<script>document.documentElement.className = 'js-enabled';</script>
 		<style>
 			.js-enabled .static-content {
 				display: none;
 			}
 			html {
 				overflow-y: scroll;
 			}
 			body {
-				background: #eeeeee;
+				background: #000;
 			}
 			.nojs {
 				color: white;
 			}
 		</style>
 	</head>
 	<body>
-		<script>
+		<div class="nojs">
-			(function() {
+			<noscript>You need to enable JavaScript to run this app.</noscript>
 				try {
 					var theme = localStorage.getItem('theme');
 					if (theme === 'dark') {
 						document.body.style.backgroundColor = '#1a1a1a';
 					} else if (theme === 'black' || theme === 'red') {
 						document.body.style.backgroundColor = '#000';
 					}
 				} catch (e) {}
 			})();
 		</script>
 		<div id="root">
 			<div class="static-content">
 				{% if False %}
 				<noscript>
 					<meta http-equiv="refresh" content="0;url=?/no.script">
 				</noscript>
 				{% endif %}
 				<div class="container menu">
 					<p>
 						<a href="/">QotNews</a>
 						<br />
 						<span class="slogan">Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode.</span>
 					</p>
 				</div>
 				{% if story %}
 					<div class="{% if show_comments %}container{% else %}article-container{% endif %}">
 						<div class="article">
 							<h1>{{ story.title }}</h1>
 							{% if show_comments %}
 								<div class="info">
 									<a href="/{{ story.id }}">View article</a>
 								</div>
 							{% else %}
 								<div class="info">
 									Source: <a class="source" href="{{ story.url or story.link }}">{{ url }}</a>
 								</div>
 							{% endif %}
 							<div class="info">
 								{{ story.score }} points
 								by <a href="{{ story.author_link }}">{{ story.author }}</a>
 								{{ story.date | fromnow }}
 								on <a href="{{ story.link }}">{{ story.source }}</a> |
 								<a href="/{{ story.id }}/c">
 									{{ story.num_comments }} comment{{ 's' if story.num_comments != 1 }}
 								</a>
 							</div>
 							{% if not show_comments and story.text %}
 								<div class="story-text">{{ story.text | safe }}</div>
 							{% elif show_comments %}
 								{% macro render_comment(comment, level) %}
 									<dt></dt>
 									<dd class="comment{% if level > 0 %} lined{% endif %}">
 										<div class="info">
 											<p>
 												{% if comment.author == story.author %}[OP] {% endif %}{{ comment.author or '[Deleted]' }} | <a href="#{{ comment.author }}{{ comment.date }}" id="{{ comment.author }}{{ comment.date }}">{{ comment.date | fromnow }}</a>
 											</p>
 										</div>
 										<div class="text">{{ (comment.text | safe) if comment.text else '<p>[Empty / deleted comment]</p>' }}</div>
 										{% if comment.comments %}
 											<dl>
 												{% for reply in comment.comments %}
 													{{ render_comment(reply, level + 1) }}
 												{% endfor %}
 											</dl>
 										{% endif %}
 									</dd>
 								{% endmacro %}
 								<dl class="comments">
 									{% for comment in story.comments %}{{ render_comment(comment, 0) }}{% endfor %}
 								</dl>
 							{% endif %}
 						</div>
 						<div class='dot toggleDot'>
 							<div class='button'>
 								<a href="/{{ story.id }}{{ '/c' if not show_comments else '' }}">
 									{{ '' if not show_comments else '' }}
 								</a>
 							</div>
 						</div>
 					</div>
 				{% elif stories %}
 					<div class="container">
 						{% for story in stories %}
 							<div class='item'>
 								<div class='title'>
 									<a class='link' href='/{{ story.id }}'>
 										<img class='source-logo' src='/logos/{{ story.source }}.png' alt='{{ story.source }}:' /> {{ story.title }}
 									</a>
 									<span class='source'>
 										(<a class='source' href='{{ story.url or story.link }}'>{{ story.hostname }}</a>)
 									</span>
 								</div>
 								<div class='info'>
 									{{ story.score }} points
 									by <a href="{{ story.author_link }}">{{ story.author }}</a>
 									{{ story.date | fromnow }}
 									on <a href="{{ story.link }}">{{ story.source }}</a> |
 									<a class="{{ 'hot' if story.num_comments > 99 else '' }}" href="/{{ story.id }}/c">
 										{{ story.num_comments }} comment{{ 's' if story.num_comments != 1 }}
 									</a>
 								</div>
 							</div>
 						{% endfor %}
 					</div>
 				{% endif %}
 			</div>
 		</div>
 		<div id="root"></div>
 		<!--
 			This HTML file is a template.
 			If you open it directly in the browser, you will see an empty page.
@@ -1,175 +1,85 @@
-import React, { useState, useLayoutEffect, useEffect, useRef, useCallback } from 'react';
+import React from 'react';
 import { BrowserRouter as Router, Route, Link, Switch } from 'react-router-dom';
 import localForage from 'localforage';
 import './Style-light.css';
 import './Style-dark.css';
 import './Style-black.css';
 import './Style-red.css';
 import './fonts/Fonts.css';
-import { BackwardDot, ForwardDot } from './utils.js';
+import { ForwardDot } from './utils.js';
 import Feed from './Feed.js';
 import Article from './Article.js';
 import Comments from './Comments.js';
 import Search from './Search.js';
 import Submit from './Submit.js';
 import Results from './Results.js';
 import ScrollToTop from './ScrollToTop.js';
-import Settings from './Settings.js';
+import Feed from './pages/Feed.js';
 import Article from './pages/Article.js';
 import Comments from './pages/Comments.js';
 import Results from './pages/Results.js';
-function App() {
+
-	const [theme, setTheme] = useState(localStorage.getItem('theme') || '');
+class App extends React.Component {
-	const cache = useRef({});
+	constructor(props) {
-	const [isFullScreen, setIsFullScreen] = useState(!!document.fullscreenElement);
+		super(props);
-	const [waitingWorker, setWaitingWorker] = useState(null);
+
-	const [settingsOpen, setSettingsOpen] = useState(false);
+		this.state = {
-	const defaultBodyFontSize = 1.0;
+			theme: localStorage.getItem('theme') || '',
 	const [bodyFontSize, setBodyFontSize] = useState(Number(localStorage.getItem('bodyFontSize')) || defaultBodyFontSize);
 	const [bodyFont, setBodyFont] = useState(localStorage.getItem('bodyFont') || 'Sans Serif');
 	const [articleFont, setArticleFont] = useState(localStorage.getItem('articleFont') || 'Apparatus SIL');
 	const [filterSmallweb, setFilterSmallweb] = useState(() => localStorage.getItem('filterSmallweb') === 'true');
 	const [feedSources, setFeedSources] = useState(() => {
 		const saved = localStorage.getItem('feedSources');
 		return saved ? JSON.parse(saved) : {
 			hackernews: true,
 			reddit: true,
 			lobsters: true,
 			tildes: true,
 		};
 	});
-	const updateCache = useCallback((key, value) => {
+		this.cache = {};
-		cache.current[key] = value;
+	}
 	}, []);
-	useEffect(() => {
+	updateCache = (key, value) => {
-		const onSWUpdate = e => {
+		this.cache[key] = value;
-			setWaitingWorker(e.detail.waiting);
+	}
 		};
 		window.addEventListener('swUpdate', onSWUpdate);
 		return () => window.removeEventListener('swUpdate', onSWUpdate);
 	}, []);
-	useEffect(() => {
+	light() {
-		if (Object.keys(cache.current).length === 0) {
+		this.setState({ theme: '' });
 		localStorage.setItem('theme', '');
 	}
 	dark() {
 		this.setState({ theme: 'dark' });
 		localStorage.setItem('theme', 'dark');
 	}
 	componentDidMount() {
 		if (!this.cache.length) {
 			localForage.iterate((value, key) => {
-				updateCache(key, value);
+				this.updateCache(key, value);
 			}).then(() => {
 				console.log('loaded cache from localforage');
 			});
 			console.log('loaded cache from localforage');
 		}
-	}, [updateCache]);
+	}
-	useEffect(() => {
+	render() {
-		const onFullScreenChange = () => setIsFullScreen(!!document.fullscreenElement);
+		const theme = this.state.theme;
-		document.addEventListener('fullscreenchange', onFullScreenChange);
+		document.body.style.backgroundColor = theme === 'dark' ? '#000' : '#eeeeee';
 		return () => document.removeEventListener('fullscreenchange', onFullScreenChange);
 	}, []);
-	useLayoutEffect(() => {
+		return (
-		if (theme === 'dark') {
+			<div className={theme}>
-			document.body.style.backgroundColor = '#1a1a1a';
+				<Router>
-		} else if (theme === 'black') {
+					<div className='container menu'>
-			document.body.style.backgroundColor = '#000';
+						<p>
-		} else if (theme === 'red') {
+							<Link to='/'>QotNews - Feed</Link>
-			document.body.style.backgroundColor = '#000';
+							<span className='theme'>Theme: <a href='#' onClick={() => this.light()}>Light</a> - <a href='#' onClick={() => this.dark()}>Dark</a></span>
-		} else {
+							<br />
-			document.body.style.backgroundColor = '#eeeeee';
+							<span className='slogan'>Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode.</span>
-		}
+						</p>
-	}, [theme]);
+						<Route path='/(|search)' component={Search} />
 						<Route path='/(|search)' component={Submit} />
 					</div>
-	useEffect(() => {
+					<Route path='/' exact render={(props) => <Feed {...props} updateCache={this.updateCache} />} />
-		document.documentElement.style.fontSize = `${bodyFontSize}rem`;
+					<Switch>
-	}, [bodyFontSize]);
+						<Route path='/search' component={Results} />
 						<Route path='/:id' exact render={(props) => <Article {...props} cache={this.cache} />} />
 					</Switch>
 					<Route path='/:id/c' exact render={(props) => <Comments {...props} cache={this.cache} />} />
-	const fontMap = {
+					<ForwardDot />
 		'Sans Serif': 'sans-serif',
 		'Serif': 'serif',
 		'Apparatus SIL': "'Apparatus SIL', sans-serif"
 	};
-	useEffect(() => {
+					<ScrollToTop />
-		document.body.style.fontFamily = fontMap[bodyFont];
+				</Router>
-	}, [bodyFont]);
+			</div>
-
+		);
-	useEffect(() => {
+	}
 		const styleId = 'article-font-family-style';
 		let style = document.getElementById(styleId);
 		if (!style) {
 			style = document.createElement('style');
 			style.id = styleId;
 			document.head.appendChild(style);
 		}
 		style.innerHTML = `.story-text { font-family: ${fontMap[articleFont]} !important; }`;
 	}, [articleFont]);
 	return (
 		<div className={theme}>
 			<Settings
 				settingsOpen={settingsOpen}
 				setSettingsOpen={setSettingsOpen}
 				theme={theme}
 				setTheme={setTheme}
 				isFullScreen={isFullScreen}
 				filterSmallweb={filterSmallweb}
 				setFilterSmallweb={setFilterSmallweb}
 				feedSources={feedSources}
 				setFeedSources={setFeedSources}
 				bodyFontSize={bodyFontSize}
 				setBodyFontSize={setBodyFontSize}
 				defaultBodyFontSize={defaultBodyFontSize}
 				bodyFont={bodyFont}
 				setBodyFont={setBodyFont}
 				articleFont={articleFont}
 				setArticleFont={setArticleFont}
 			/>
 			{waitingWorker &&
 				<div className='update-banner'>
 					Client version mismatch, please refresh:{' '}
 					<button onClick={() => {
 						waitingWorker.postMessage({ type: 'SKIP_WAITING' });
 						const reload = () => window.location.reload();
 						navigator.serviceWorker.addEventListener('controllerchange', reload, { once: true });
 						// Fallback for when the controller has already changed (ie. in another tab)
 						navigator.serviceWorker.getRegistration().then(reg => {
 							if (!reg || !reg.waiting) {
 								reload();
 							}
 						});
 					}}>
 						Refresh
 					</button>
 				</div>
 			}
 			<Router>
 				<div className='container menu'>
 					<p>
 						<Link to='/'>QotNews</Link>
 						<button className="settings-button" onClick={() => setSettingsOpen(true)}>Settings</button>
 						<br />
 						<span className='slogan'>Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode.</span>
 					</p>
 					<Route path='/(|search)' component={Search} />
 					<Route path='/(|search)' component={Submit} />
 				</div>
 				<Route path='/' exact render={(props) => <Feed {...props} updateCache={updateCache} filterSmallweb={filterSmallweb} feedSources={feedSources} />} />
 				<Switch>
 					<Route path='/search' component={Results} />
 					<Route path='/:id' exact render={(props) => <Article {...props} cache={cache.current} />} />
 				</Switch>
 				<Route path='/:id/c' exact render={(props) => <Comments {...props} cache={cache.current} />} />
 				<BackwardDot />
 				<ForwardDot />
 				<ScrollToTop />
 			</Router>
 		</div>
 	);
 }
 export default App;
@@ -1,237 +0,0 @@
 import React, { useState, useEffect } from 'react';
 import { useParams } from 'react-router-dom';
 import { Helmet } from 'react-helmet';
 import localForage from 'localforage';
 import { sourceLink, similarLink, infoLine, ToggleDot } from './utils.js';
 import Latex from 'react-latex-next';
 import 'katex/dist/katex.min.css';
 const VOID_ELEMENTS = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr'];
 const DANGEROUS_TAGS = ['svg', 'math'];
 const latexDelimiters = [
    { left: '$$', right: '$$', display: true },
    { left: '\\[', right: '\\]', display: true },
    { left: '$', right: '$', display: false },
    { left: '\\(', right: '\\)', display: false }
 ];
 function Article({ cache }) {
 	const { id } = useParams();
 	if (id in cache) console.log('cache hit');
 	const [story, setStory] = useState(cache[id] || false);
 	const [error, setError] = useState('');
 	const [pConv, setPConv] = useState([]);
 	const [copyButtonText, setCopyButtonText] = useState('\ue92c');
 	useEffect(() => {
 		localForage.getItem(id)
 			.then(
 				(value) => {
 					if (value) {
 						setStory(value);
 					}
 				}
 			);
 		fetch('/api/' + id)
 			.then(res => {
 				if (!res.ok) {
 					throw new Error(`Server responded with ${res.status} ${res.statusText}`);
 				}
 				return res.json();
 			})
 			.then(
 				(result) => {
 					setStory(result.story);
 					localForage.setItem(id, result.story);
 				},
 				(error) => {
 					const errorMessage = `Failed to fetch new article content (ID: ${id}). Your connection may be down or the server might be experiencing issues. ${error.toString()}.`;
 					setError(errorMessage);
 				}
 			);
 	}, [id]);
 	const copyLink = () => {
 		navigator.clipboard.writeText(`${story.title}:\n${window.location.href}`).then(() => {
 			setCopyButtonText('\uea10');
 			setTimeout(() => setCopyButtonText('\ue92c'), 2000);
 		}, () => {
 			setCopyButtonText('\uea0f');
 			setTimeout(() => setCopyButtonText('\ue92c'), 2000);
 		});
 	};
 	const pConvert = (n) => {
 		setPConv(prevPConv => [...prevPConv, n]);
 	};
 	const isCodeBlock = (v) => {
 		if (v.localName === 'pre') {
 			return true;
 		}
 		if (v.localName === 'code') {
 			if (v.closest('p')) {
 				return false;
 			}
 			const parent = v.parentElement;
 			if (parent) {
 				const nonWhitespaceChildren = Array.from(parent.childNodes).filter(n => {
 					return n.nodeType !== Node.TEXT_NODE || n.textContent.trim() !== '';
 				});
 				if (nonWhitespaceChildren.length === 1 && nonWhitespaceChildren[0] === v) {
 					return true;
 				}
 			}
 		}
 		return false;
 	};
 	const renderNodes = (nodes, keyPrefix = '') => {
 		return Array.from(nodes).map((v, k) => {
 			const key = `${keyPrefix}${k}`;
 			if (pConv.includes(key)) {
 				return (
 					<React.Fragment key={key}>
 						{v.textContent.split('\n\n').map((x, i) =>
 							<p key={i}>{x}</p>
 						)}
 					</React.Fragment>
 				);
 			}
 			if (v.nodeName === '#text') {
 				const text = v.data;
 				if (text.includes('\\[') || text.includes('\\(') || text.includes('$$') || /\$(?:[^$]*[^\s$])\$/.test(text)) {
 					return <Latex key={key} delimiters={latexDelimiters}>{text}</Latex>;
 				}
 				// Only wrap top-level text nodes in <p>
 				if (keyPrefix === '' && v.data.trim() !== '') {
 					return <p key={key}>{v.data}</p>;
 				}
 				return v.data;
 			}
 			if (v.nodeType !== Node.ELEMENT_NODE) {
 				return null;
 			}
 			if (DANGEROUS_TAGS.includes(v.localName)) {
 				return <span key={key} dangerouslySetInnerHTML={{ __html: v.outerHTML }} />;
 			}
 			const Tag = v.localName;
 			if (isCodeBlock(v)) {
 				return (
 					<React.Fragment key={key}>
 						<Tag dangerouslySetInnerHTML={{ __html: v.innerHTML }} />
 						<button onClick={() => pConvert(key)}>Convert Code to Paragraph</button>
 					</React.Fragment>
 				);
 			}
 			const textContent = v.textContent.trim();
 			const isMath = (textContent.startsWith('\\(') && textContent.endsWith('\\)')) ||
 				(textContent.startsWith('\\[') && textContent.endsWith('\\]')) ||
 				(textContent.startsWith('$$') && textContent.endsWith('$$')) ||
 				(textContent.startsWith('$') && textContent.endsWith('$') && textContent.indexOf('$') !== textContent.lastIndexOf('$') && !/\s/.test(textContent.charAt(textContent.length - 2)));
 			const props = { key: key };
 			if (v.hasAttributes()) {
 				for (const attr of v.attributes) {
 					const name = attr.name === 'class' ? 'className' : attr.name;
 					props[name] = attr.value;
 				}
 			}
 			if (isMath) {
 				let mathContent = v.textContent;
 				// align environment requires display math mode
 				if (mathContent.includes('\\begin{align')) {
 					const trimmed = mathContent.trim();
 					if (trimmed.startsWith('\\(')) {
 						// Replace \( and \) with \[ and \] to switch to display mode
 						const firstParen = mathContent.indexOf('\\(');
 						const lastParen = mathContent.lastIndexOf('\\)');
 						mathContent = mathContent.substring(0, firstParen) + '\\[' + mathContent.substring(firstParen + 2, lastParen) + '\\]' + mathContent.substring(lastParen + 2);
 					} else if (trimmed.startsWith('$') && !trimmed.startsWith('$$')) {
 						// Replace $ with $$
 						const firstDollar = mathContent.indexOf('$');
 						const lastDollar = mathContent.lastIndexOf('$');
 						if (firstDollar !== lastDollar) {
 							mathContent = mathContent.substring(0, firstDollar) + '$$' + mathContent.substring(firstDollar + 1, lastDollar) + '$$' + mathContent.substring(lastDollar + 1);
 						}
 					}
 				}
 				return <Tag {...props}><Latex delimiters={latexDelimiters}>{mathContent}</Latex></Tag>;
 			}
 			if (VOID_ELEMENTS.includes(Tag)) {
 				return <Tag {...props} />;
 			}
 			return (
 				<Tag {...props}>
 					{renderNodes(v.childNodes, `${key}-`)}
 				</Tag>
 			);
 		});
 	};
 	const nodes = (s) => {
 		if (s && s.text) {
 			let div = document.createElement('div');
 			div.innerHTML = s.text;
 			return div.childNodes;
 		}
 		return null;
 	};
 	const storyNodes = nodes(story);
 	return (
 		<div className='article-container'>
 			{error &&
 				<details style={{marginBottom: '1rem'}}>
 					<summary>Connection error? Click to expand.</summary>
 					<p>{error}</p>
 					{story && <p>Loaded article from cache.</p>}
 				</details>
 			}
 			{story ?
 				<div className='article'>
 					<Helmet>
 						<title>{story.title} | QotNews</title>
 						<meta name="robots" content="noindex" />
 					</Helmet>
 					<h1>{story.title} <button className='copy-button' onClick={copyLink}>{copyButtonText}</button></h1>
 					<div className='info'>
 						Source: {sourceLink(story)} | {similarLink(story)}
 					</div>
 					{infoLine(story)}
 					{storyNodes ?
 						<div className='story-text'>
 							{renderNodes(storyNodes)}
 						</div>
 					:
 						<p>Problem getting article :(</p>
 					}
 				</div>
 			:
 				<p>Loading...</p>
 			}
 			<ToggleDot id={id} article={false} />
 		</div>
 	);
 }
 export default Article;
@@ -1,140 +0,0 @@
 import React, { useState, useEffect } from 'react';
 import { Link, useParams } from 'react-router-dom';
 import { HashLink } from 'react-router-hash-link';
 import { Helmet } from 'react-helmet';
 import moment from 'moment';
 import localForage from 'localforage';
 import { infoLine, ToggleDot } from './utils.js';
 function countComments(c) {
 	return c.comments.reduce((sum, x) => sum + countComments(x), 1);
 }
 function Comments({ cache }) {
 	const { id } = useParams();
 	if (id in cache) console.log('cache hit');
 	const [story, setStory] = useState(cache[id] || false);
 	const [error, setError] = useState('');
 	const [collapsed, setCollapsed] = useState([]);
 	const [expanded, setExpanded] = useState([]);
 	useEffect(() => {
 		localForage.getItem(id)
 			.then(
 				(value) => {
 					if (value) {
 						setStory(value);
 					}
 				}
 			);
 		fetch('/api/' + id)
 			.then(res => {
 				if (!res.ok) {
 					throw new Error(`Server responded with ${res.status} ${res.statusText}`);
 				}
 				return res.json();
 			})
 			.then(
 				(result) => {
 					setStory(result.story);
 					localForage.setItem(id, result.story);
 					const hash = window.location.hash.substring(1);
 					if (hash) {
 						setTimeout(() => {
 							const element = document.getElementById(hash);
 							if (element) {
 								element.scrollIntoView();
 							}
 						}, 0);
 					}
 				},
 				(error) => {
 					const errorMessage = `Failed to fetch comments (ID: ${id}). Your connection may be down or the server might be experiencing issues. ${error.toString()}.`;
 					setError(errorMessage);
 				}
 			);
 	}, [id]);
 	const collapseComment = (cid) => {
 		setCollapsed(prev => [...prev, cid]);
 		setExpanded(prev => prev.filter(x => x !== cid));
 	};
 	const expandComment = (cid) => {
 		setCollapsed(prev => prev.filter(x => x !== cid));
 		setExpanded(prev => [...prev, cid]);
 	};
 	const displayComment = (story, c, level) => {
 		const cid = c.author+c.date;
 		const isCollapsed = collapsed.includes(cid);
 		const isExpanded = expanded.includes(cid);
 		const hidden = isCollapsed || (level == 4 && !isExpanded);
 		const hasChildren = c.comments.length !== 0;
 		return (
 			<div className={level ? 'comment lined' : 'comment'} key={cid}>
 				<div className='info'>
 					<p>
 						{c.author === story.author ? '[OP]' : ''} {c.author || '[Deleted]'}
 						{' '} | <HashLink to={'#'+cid} id={cid}>{moment.unix(c.date).fromNow()}</HashLink>
 						{hidden || hasChildren &&
 							<button className='collapser pointer' onClick={() => collapseComment(cid)}>–</button>
 						}
 					</p>
 				</div>
 				<div className={isCollapsed ? 'text hidden' : 'text'}  dangerouslySetInnerHTML={{ __html: c.text || '<p>[Empty / deleted comment]</p>'}} />
 				{hidden && hasChildren ?
 					<button className='comment lined info pointer' onClick={() => expandComment(cid)}>[show {countComments(c)-1} more]</button>
 				:
 					c.comments.map(i => displayComment(story, i, level + 1))
 				}
 			</div>
 		);
 	};
 	return (
 		<div className='container'>
 			{error &&
 				<details style={{marginBottom: '1rem'}}>
 					<summary>Connection error? Click to expand.</summary>
 					<p>{error}</p>
 					{story && <p>Loaded comments from cache.</p>}
 				</details>
 			}
 			{story ?
 				<div className='article'>
 					<Helmet>
 						<title>{story.title} | QotNews</title>
 						<meta name="robots" content="noindex" />
 					</Helmet>
 					<h1>{story.title}</h1>
 					<div className='info'>
 						<Link to={'/' + story.id}>View article</Link>
 					</div>
 					{infoLine(story)}
 					<div className='comments'>
 						{story.comments.map(c => displayComment(story, c, 0))}
 					</div>
 				</div>
 			:
 				<p>loading...</p>
 			}
 			<ToggleDot id={id} article={true} />
 		</div>
 	);
 }
 export default Comments;
@@ -1,177 +0,0 @@
 import React, { useState, useEffect, useRef } from 'react';
 import { Link } from 'react-router-dom';
 import { Helmet } from 'react-helmet';
 import localForage from 'localforage';
 import { sourceLink, infoLine, logos } from './utils.js';
 function Feed({ updateCache, filterSmallweb, feedSources }) {
 	const [stories, setStories] = useState(() => JSON.parse(localStorage.getItem('stories')) || false);
 	const [error, setError] = useState('');
 	const [loadingStatus, setLoadingStatus] = useState(null);
 	const isInitialMount = useRef(true);
 	useEffect(() => {
 		if (isInitialMount.current) {
 			isInitialMount.current = false;
 		} else {
 			setStories(false);
 		}
 	}, [filterSmallweb, feedSources]);
 	useEffect(() => {
 		const controller = new AbortController();
 		if ('serviceWorker' in navigator) {
 			navigator.serviceWorker.getRegistration().then(reg => {
 				if (reg) {
 					console.log('Checking for client update...');
 					reg.update();
 				}
 			});
 		}
 		const params = new URLSearchParams();
 		if (filterSmallweb) {
 			params.append('smallweb', 'true');
 		}
 		const allSources = Object.keys(feedSources);
 		const enabledSources = allSources.filter(key => feedSources[key]);
 		if (enabledSources.length > 0 && enabledSources.length < allSources.length) {
 			enabledSources.forEach(source => params.append('source', source));
 		}
 		const apiUrl = `/api?${params.toString()}`;
 		fetch(apiUrl, { signal: controller.signal })
 			.then(res => {
 				if (!res.ok) {
 					throw new Error(`Server responded with ${res.status} ${res.statusText}`);
 				}
 				return res.json();
 			})
 			.then(
 				async (result) => {
 					const newApiStories = result.stories;
 					const updated = !stories || stories.map(s => s.id).join() !== newApiStories.map(s => s.id).join();
 					console.log('New stories available:', updated);
 					if (!updated) return;
 					setLoadingStatus({ current: 0, total: newApiStories.length });
 					let currentStories = Array.isArray(stories) ? [...stories] : [];
 					let preloadedCount = 0;
 					for (const [index, newStory] of newApiStories.entries()) {
 						if (controller.signal.aborted) {
 							break;
 						}
 						try {
 							const storyFetchController = new AbortController();
 							const timeoutId = setTimeout(() => storyFetchController.abort(), 10000); // 10-second timeout
 							const storyRes = await fetch('/api/' + newStory.id, { signal: storyFetchController.signal });
 							clearTimeout(timeoutId);
 							if (!storyRes.ok) {
 								throw new Error(`Server responded with ${storyRes.status} ${storyRes.statusText}`);
 							}
 							const storyResult = await storyRes.json();
 							const fullStory = storyResult.story;
 							await localForage.setItem(fullStory.id, fullStory);
 							console.log('Preloaded story:', fullStory.id, fullStory.title);
 							updateCache(fullStory.id, fullStory);
 							preloadedCount++;
 							setLoadingStatus({ current: preloadedCount, total: newApiStories.length });
 							const existingStoryIndex = currentStories.findIndex(s => s.id === newStory.id);
 							if (existingStoryIndex > -1) {
 								currentStories.splice(existingStoryIndex, 1);
 							}
 							currentStories.splice(index, 0, newStory);
 							localStorage.setItem('stories', JSON.stringify(currentStories));
 							setStories(currentStories);
 						} catch (error) {
 							let errorMessage;
 							if (error.name === 'AbortError') {
 								errorMessage = `The request to fetch story '${newStory.title}' (${newStory.id}) timed out after 10 seconds. Your connection may be unstable. (${preloadedCount} / ${newApiStories.length} stories preloaded)`;
 								console.log('Fetch timed out for story:', newStory.id);
 							} else {
 								errorMessage = `An error occurred while fetching story '${newStory.title}' (ID: ${newStory.id}): ${error.toString()}. (${preloadedCount} / ${newApiStories.length} stories preloaded)`;
 								console.log('Fetch failed for story:', newStory.id, error);
 							}
 							setError(errorMessage);
 							break;
 						}
 					}
 					const finalStories = currentStories.slice(0, newApiStories.length);
 					const removedStories = currentStories.slice(newApiStories.length);
 					for (const story of removedStories) {
 						console.log('Removed story:', story.id, story.title);
 						localForage.removeItem(story.id);
 					}
 					localStorage.setItem('stories', JSON.stringify(finalStories));
 					setStories(finalStories);
 					setLoadingStatus(null);
 				},
 				(error) => {
 					if (error.name === 'AbortError') {
 						console.log('Feed fetch aborted.');
 						return;
 					}
 					const errorMessage = `Failed to fetch the main story list from the API. Your connection may be down or the server might be experiencing issues. ${error.toString()}.`;
 					setError(errorMessage);
 				}
 			);
 		return () => controller.abort();
 	}, [updateCache, filterSmallweb, feedSources]);
 	return (
 		<div className='container'>
 			<Helmet>
 				<title>QotNews</title>
 				<meta name="robots" content="index" />
 			</Helmet>
 			{error &&
 				<details style={{marginBottom: '1rem'}}>
 					<summary>Connection error? Click to expand.</summary>
 					<p>{error}</p>
 					{stories && <p>Loaded feed from cache.</p>}
 				</details>
 			}
 			{stories ?
 				<div>
 					{stories.map(x =>
 						<div className='item' key={x.id}>
 							<div className='title'>
 								<Link className='link' to={'/' + x.id}>
 									<img className='source-logo' src={logos[x.source]} alt='source logo' /> {x.title}
 								</Link>
 								<span className='source'>
 									({sourceLink(x)})
 								</span>
 							</div>
 							{infoLine(x)}
 						</div>
 					)}
 				</div>
 			:
 				<p>Loading...</p>
 			}
 			{loadingStatus && <p>Preloading stories {loadingStatus.current} / {loadingStatus.total}...</p>}
 		</div>
 	);
 }
 export default Feed;
@@ -1,73 +0,0 @@
 import React, { useState, useEffect } from 'react';
 import { Link, useLocation } from 'react-router-dom';
 import { Helmet } from 'react-helmet';
 import { sourceLink, infoLine, logos } from './utils.js';
 import AbortController from 'abort-controller';
 function Results() {
 	const [stories, setStories] = useState(false);
 	const [error, setError] = useState(false);
 	const location = useLocation();
 	useEffect(() => {
 		const controller = new AbortController();
 		const signal = controller.signal;
 		const search = location.search;
 		fetch('/api/search' + search, { method: 'get', signal: signal })
 			.then(res => res.json())
 			.then(
 				(result) => {
 					setStories(result.hits);
 				},
 				(error) => {
 					if (error.message !== 'The operation was aborted. ') {
 						setError(true);
 					}
 				}
 			);
 		return () => {
 			controller.abort();
 		};
 	}, [location.search]);
 	return (
 		<div className='container'>
 			<Helmet>
 				<title>Search Results | QotNews</title>
 			</Helmet>
 			{error && <p>Connection error?</p>}
 			{stories ?
 				<>
 					<p>Search results:</p>
 					<div className='comment lined'>
 						{stories.length ?
 							stories.map(x =>
 								<div className='item' key={x.id}>
 									<div className='title'>
 										<Link className='link' to={'/' + x.id}>
 											<img className='source-logo' src={logos[x.source]} alt='source logo' /> {x.title}
 										</Link>
 										<span className='source'>
 											({sourceLink(x)})
 										</span>
 									</div>
 									{infoLine(x)}
 								</div>
 							)
 						:
 							<p>none</p>
 						}
 					</div>
 				</>
 			:
 				<p>loading...</p>
 			}
 		</div>
 	);
 }
 export default Results;
@@ -15,7 +15,6 @@ class ScrollToTop extends React.Component {
 		}
 		window.scrollTo(0, 0);
 		document.body.scrollTop = 0;
 	}
 	render() {
@@ -1,46 +1,51 @@
-import React, { useState, useRef } from 'react';
+import React, { Component } from 'react';
-import { useHistory, useLocation } from 'react-router-dom';
+import { withRouter } from 'react-router-dom';
 import queryString from 'query-string';
-const getSearch = location => queryString.parse(location.search).q || '';
+const getSearch = props => queryString.parse(props.location.search).q;
-function Search() {
+class Search extends Component {
-	const history = useHistory();
+	constructor(props) {
-	const location = useLocation();
+		super(props);
-	const [search, setSearch] = useState(getSearch(location));
+		this.state = {search: getSearch(this.props)};
-	const inputRef = useRef(null);
+		this.inputRef = React.createRef();
 	}
-	const searchArticles = (event) => {
+	searchArticles = (event) => {
-		const newSearch = event.target.value;
+		const search = event.target.value;
-		setSearch(newSearch);
+		this.setState({search: search});
-		if (newSearch.length >= 3) {
+		if (search.length >= 3) {
-			const searchQuery = queryString.stringify({ 'q': newSearch });
+			const searchQuery = queryString.stringify({ 'q': search });
-			history.replace('/search?' + searchQuery);
+			this.props.history.replace('/search?' + searchQuery);
 		} else {
-			history.replace('/');
+			this.props.history.replace('/');
 		}
 	}
-	const searchAgain = (event) => {
+	searchAgain = (event) => {
 		event.preventDefault();
 		const searchString = queryString.stringify({ 'q': event.target[0].value });
-		history.push('/search?' + searchString);
+		this.props.history.push('/search?' + searchString);
-		inputRef.current.blur();
+		this.inputRef.current.blur();
 	}
-	return (
+	render() {
-		<span className='search'>
+		const search = this.state.search;
-			<form onSubmit={searchAgain}>
+
-				<input
+		return (
-					placeholder='Search...'
+			<span className='search'>
-					value={search}
+				<form onSubmit={this.searchAgain}>
-					onChange={searchArticles}
+					<input
-					ref={inputRef}
+						placeholder='Search... (fixed)'
-				/>
+						value={search}
-			</form>
+						onChange={this.searchArticles}
-		</span>
+						ref={this.inputRef}
-	);
+					/>
 				</form>
 			</span>
 		);
 	}
 }
-export default Search;
+export default withRouter(Search);
@@ -1,191 +0,0 @@
 import React from 'react';
 function Settings({
 	settingsOpen,
 	setSettingsOpen,
 	theme,
 	setTheme,
 	isFullScreen,
 	filterSmallweb,
 	setFilterSmallweb,
 	feedSources,
 	setFeedSources,
 	bodyFontSize,
 	setBodyFontSize,
 	defaultBodyFontSize,
 	bodyFont,
 	setBodyFont,
 	articleFont,
 	setArticleFont,
 }) {
 	const light = () => {
 		setTheme('');
 		localStorage.setItem('theme', '');
 	};
 	const dark = () => {
 		setTheme('dark');
 		localStorage.setItem('theme', 'dark');
 	};
 	const black = () => {
 		setTheme('black');
 		localStorage.setItem('theme', 'black');
 	};
 	const red = () => {
 		setTheme('red');
 		localStorage.setItem('theme', 'red');
 	};
 	const handleFilterChange = e => {
 		const isChecked = e.target.checked;
 		setFilterSmallweb(isChecked);
 		localStorage.setItem('filterSmallweb', isChecked);
 	};
 	const handleFeedSourceChange = (source) => {
 		setFeedSources(prevSources => {
 			const newSources = { ...prevSources, [source]: !prevSources[source] };
 			localStorage.setItem('feedSources', JSON.stringify(newSources));
 			return newSources;
 		});
 	};
 	const changeBodyFont = (font) => {
 		setBodyFont(font);
 		localStorage.setItem('bodyFont', font);
 	};
 	const changeArticleFont = (font) => {
 		setArticleFont(font);
 		localStorage.setItem('articleFont', font);
 	};
 	const changeBodyFontSize = (amount) => {
 		const newSize = bodyFontSize + amount;
 		setBodyFontSize(parseFloat(newSize.toFixed(2)));
 		localStorage.setItem('bodyFontSize', newSize.toFixed(2));
 	};
 	const resetBodyFontSize = () => {
 		setBodyFontSize(defaultBodyFontSize);
 		localStorage.removeItem('bodyFontSize');
 	};
 	const bodyFontSettingsChanged = bodyFontSize !== defaultBodyFontSize;
 	const goFullScreen = () => {
 		if ('wakeLock' in navigator) {
 			navigator.wakeLock.request('screen');
 		}
 		document.body.requestFullscreen({ navigationUI: 'hide' });
 	};
 	const exitFullScreen = () => {
 		document.exitFullscreen();
 	};
 	const fullScreenAvailable = document.fullscreenEnabled ||
 		document.mozFullscreenEnabled ||
 		document.webkitFullscreenEnabled ||
 		document.msFullscreenEnabled;
 	if (!settingsOpen) {
 		return null;
 	}
 	return (
 		<>
 			<div className="modal-overlay" onClick={() => setSettingsOpen(false)}></div>
 			<div className="modal-content" onClick={e => e.stopPropagation()}>
 				<button className="close-modal-button" onClick={() => setSettingsOpen(false)}>&times;</button>
 				<h3>Settings</h3>
 				<div className="setting-group">
 					<h4>Theme</h4>
 					<button className={theme === '' ? 'active' : ''} onClick={() => { light(); setSettingsOpen(false); }}>Light</button>
 					<button className={theme === 'dark' ? 'active' : ''} onClick={() => { dark(); setSettingsOpen(false); }}>Dark</button>
 					<button className={theme === 'black' ? 'active' : ''} onClick={() => { black(); setSettingsOpen(false); }}>Black</button>
 					<button className={theme === 'red' ? 'active' : ''} onClick={() => { red(); setSettingsOpen(false); }}>Red</button>
 					{fullScreenAvailable &&
 						<div style={{ marginTop: '0.5rem' }}>
 							{!isFullScreen ?
 								<button onClick={() => { goFullScreen(); setSettingsOpen(false); }}>Enter Fullscreen</button>
 								:
 								<button onClick={() => { exitFullScreen(); setSettingsOpen(false); }}>Exit Fullscreen</button>
 							}
 						</div>
 					}
 				</div>
 				<div className="setting-group">
 					<h4>Feed</h4>
 					<div className="font-option gap">
 						<input className="checkbox" type="checkbox" id="filter-smallweb" checked={filterSmallweb} onChange={handleFilterChange} />
 						<label htmlFor="filter-smallweb">Small websites only</label>
 					</div>
 					<div className="font-option">
 						<input className="checkbox" type="checkbox" id="filter-hackernews" name="feed-source" checked={feedSources.hackernews} onChange={() => handleFeedSourceChange('hackernews')} />
 						<label htmlFor="filter-hackernews">Hacker News</label>
 					</div>
 					<div className="font-option">
 						<input className="checkbox" type="checkbox" id="filter-reddit" name="feed-source" checked={feedSources.reddit} onChange={() => handleFeedSourceChange('reddit')} />
 						<label htmlFor="filter-reddit">Reddit</label>
 					</div>
 					<div className="font-option">
 						<input className="checkbox" type="checkbox" id="filter-lobsters" name="feed-source" checked={feedSources.lobsters} onChange={() => handleFeedSourceChange('lobsters')} />
 						<label htmlFor="filter-lobsters">Lobsters</label>
 					</div>
 					<div className="font-option">
 						<input className="checkbox" type="checkbox" id="filter-tildes" name="feed-source" checked={feedSources.tildes} onChange={() => handleFeedSourceChange('tildes')} />
 						<label htmlFor="filter-tildes">Tildes</label>
 					</div>
 				</div>
 				<div className="setting-group">
 					<h4>Font Size</h4>
 					<button onClick={() => changeBodyFontSize(-0.05)}>-</button>
 					<span className="font-size-display">{bodyFontSize.toFixed(2)}</span>
 					<button onClick={() => changeBodyFontSize(0.05)}>+</button>
 					<button onClick={resetBodyFontSize} disabled={!bodyFontSettingsChanged}>Reset</button>
 				</div>
 				<div className="setting-group">
 					<h4>Body Font</h4>
 					<div className="font-option">
 						<input className="checkbox" type="radio" id="body-sans-serif" name="body-font" value="Sans Serif" checked={bodyFont === 'Sans Serif'} onChange={() => changeBodyFont('Sans Serif')} />
 						<label htmlFor="body-sans-serif">Sans Serif *</label>
 					</div>
 					<div className="font-option">
 						<input className="checkbox" type="radio" id="body-serif" name="body-font" value="Serif" checked={bodyFont === 'Serif'} onChange={() => changeBodyFont('Serif')} />
 						<label htmlFor="body-serif">Serif</label>
 					</div>
 					<div className="font-option">
 						<input className="checkbox" type="radio" id="body-apparatus" name="body-font" value="Apparatus SIL" checked={bodyFont === 'Apparatus SIL'} onChange={() => changeBodyFont('Apparatus SIL')} />
 						<label htmlFor="body-apparatus">Apparatus SIL</label>
 					</div>
 				</div>
 				<div className="setting-group">
 					<h4>Article Font</h4>
 					<div className="font-option">
 						<input className="checkbox" type="radio" id="article-sans-serif" name="article-font" value="Sans Serif" checked={articleFont === 'Sans Serif'} onChange={() => changeArticleFont('Sans Serif')} />
 						<label htmlFor="article-sans-serif">Sans Serif</label>
 					</div>
 					<div className="font-option">
 						<input className="checkbox" type="radio" id="article-serif" name="article-font" value="Serif" checked={articleFont === 'Serif'} onChange={() => changeArticleFont('Serif')} />
 						<label htmlFor="article-serif">Serif</label>
 					</div>
 					<div className="font-option">
 						<input className="checkbox" type="radio" id="article-apparatus" name="article-font" value="Apparatus SIL" checked={articleFont === 'Apparatus SIL'} onChange={() => changeArticleFont('Apparatus SIL')} />
 						<label htmlFor="article-apparatus">Apparatus SIL *</label>
 					</div>
 				</div>
 			</div>
 		</>
 	);
 }
 export default Settings;
@@ -1,103 +0,0 @@
 .black {
 	color: #ddd;
 }
 .black a {
 	color: #ddd;
 }
 .black input {
 	color: #ddd;
 	border: 1px solid #828282;
 }
 .black .menu button,
 .black .story-text button {
 	background-color: #222222;
 	border-color: #bbb;
 	color: #ddd;
 }
 .black .item {
 	color: #828282;
 }
 .black .item .source-logo {
 	filter: grayscale(1);
 }
 .black .item a {
 	color: #828282;
 }
 .black .item a.link {
 	color: #ddd;
 }
 .black .item a.link:visited {
 	color: #828282;
 }
 .black .item .info a.hot {
 	color: #cccccc;
 }
 .black .article a {
 	border-bottom: 1px solid #aaaaaa;
 }
 .black .article u {
 	border-bottom: 1px solid #aaaaaa;
 	text-decoration: none;
 }
 .black .story-text video,
 .black .story-text img {
 	filter: brightness(50%);
 }
 .black .article .info {
 	color: #828282;
 }
 .black .article .info a {
 	border-bottom: none;
 	color: #828282;
 }
 .black .comment.lined {
 	border-left: 1px solid #444444;
 }
 .black .checkbox:checked + label::after {
 	border-color: #eee;
 }
 .black .copy-button {
 	color: #828282;
 }
 .black .update-banner {
 	background-color: #333;
 	color: #ddd;
 }
 .black .update-banner button {
 	background-color: #222222;
 	border-color: #bbb;
 	color: #ddd;
 }
 .black .modal-content {
 	background: #222;
 	border-color: #828282;
 	color: #ddd;
 }
 .black .modal-content button {
 	background-color: #222222;
 	border-color: #bbb;
 }
 .black .modal-content button.active {
 	background-color: #555;
 }
@@ -11,17 +11,14 @@
 	border: 1px solid #828282;
 }
 .dark .menu button,
 .dark .story-text button {
 	background-color: #222222;
 	border-color: #bbb;
 	color: #ddd;
 }
 .dark .item {
 	color: #828282;
 }
 .dark .item .source-logo {
 	filter: grayscale(1);
 }
 .dark .item a {
 	color: #828282;
 }
@@ -46,7 +43,6 @@
 	text-decoration: none;
 }
 .dark .story-text video,
 .dark .story-text img {
 	filter: brightness(50%);
 }
@@ -63,37 +59,3 @@
 .dark .comment.lined {
 	border-left: 1px solid #444444;
 }
 .dark .checkbox:checked + label::after {
 	border-color: #eee;
 }
 .dark .copy-button {
 	color: #828282;
 }
 .dark .update-banner {
 	background-color: #333;
 	color: #ddd;
 }
 .dark .update-banner button {
 	background-color: #222222;
 	border-color: #bbb;
 	color: #ddd;
 }
 .dark .modal-content {
 	background: #222;
 	border-color: #828282;
 	color: #ddd;
 }
 .dark .modal-content button {
 	background-color: #222222;
 	border-color: #bbb;
 }
 .dark .modal-content button.active {
 	background-color: #555;
 }
@@ -1,32 +1,10 @@
 body {
 	text-rendering: optimizeLegibility;
-	font-size: 1rem;
+	font: 1rem/1.3 sans-serif;
 	line-height: 1.3;
 	color: #000000;
 	margin-bottom: 100vh;
 	word-break: break-word;
 	font-kerning: normal;
 	margin: 0;
 }
 ::backdrop {
 	background-color: rgba(0,0,0,0);
 }
 body:fullscreen {
 	overflow-y: scroll !important;
 }
 body:-ms-fullscreen {
 	overflow-y: scroll !important;
 }
 body:-webkit-full-screen {
 	overflow-y: scroll !important;
 }
 body:-moz-full-screen {
 	overflow-y: scroll !important;
 }
 #root {
 	margin: 8px 8px 100vh 8px !important;
 }
 a {
@@ -44,36 +22,10 @@ input {
 	border-radius: 4px;
 }
 .update-banner {
 	background-color: #ddd;
 	padding: 0.75rem;
 	text-align: center;
 }
 .update-banner button {
 	margin-left: 1rem;
 	padding: 0.25rem 0.75rem;
 	border: 1px solid #828282;
 	border-radius: 4px;
 	background-color: transparent;
 	cursor: pointer;
 }
 .fullscreen {
 	margin: 0.25rem;
 	padding: 0.25rem;
 }
 pre {
 	overflow: auto;
 }
 .comments pre {
 	overflow: auto;
 	white-space: pre-wrap;
 	overflow-wrap: break-word;
 }
 .container {
 	margin: 1rem auto;
 	max-width: 64rem;
@@ -86,7 +38,6 @@ pre {
 .slogan {
 	color: #828282;
 	margin-bottom: 0.5rem;
 }
 .theme {
@@ -143,13 +94,6 @@ span.source {
 	border-bottom: 1px solid #222222;
 }
 .article-title {
 	display: flex;
 	align-items: center;
 	margin-top: 0.67em;
 	margin-bottom: 0.67em;
 }
 .article h1 {
 	font-size: 1.6rem;
 }
@@ -200,20 +144,12 @@ span.source {
 }
 .story-text {
-	font-size: 1.2rem;
+	font: 1.2rem/1.5 'Apparatus SIL', sans-serif;
 	line-height: 1.5;
 	margin-top: 1em;
 }
 .comments {
 	margin-left: -1.25rem;
 	margin-top: 0;
 	margin-bottom: 0;
 	padding: 0;
 }
 .comments dl, .comments dd {
 	margin: 0;
 }
 .comment {
@@ -226,11 +162,6 @@ span.source {
 .comment .text {
 	margin-top: -0.5rem;
 	margin-bottom: 1rem;
 }
 .comment .text > * {
 	margin-bottom: 0;
 }
 .comment .text.hidden > p {
@@ -250,49 +181,20 @@ span.source {
 	padding-right: 1.5rem;
 }
 button.collapser {
 	background: transparent;
 	border: none;
 	margin: 0;
 	padding-top: 0;
 	padding-bottom: 0;
 	font: inherit;
 	color: inherit;
 }
 button.comment {
 	background: transparent;
 	border-top: none;
 	border-right: none;
 	border-bottom: none;
 	margin: 0;
 	padding-top: 0;
 	padding-right: 0;
 	padding-bottom: 0;
 	font: inherit;
 	color: inherit;
 	text-align: left;
 	width: 100%;
 }
 .comment .pointer {
 	cursor: pointer;
 }
-.dot {
+.toggleDot {
 	cursor: pointer;
 	position: fixed;
 	bottom: 1rem;
 	left: 1rem;
 	height: 3rem;
 	width: 3rem;
 	background-color: #828282;
 	border-radius: 50%;
 }
 .toggleDot {
 	bottom: 1rem;
 	left: 1rem;
 }
 .toggleDot .button {
 	font: 2rem/1 'icomoon';
 	position: relative;
@@ -301,204 +203,23 @@ button.comment {
 }
 .forwardDot {
 	cursor: pointer;
 	position: fixed;
 	bottom: 1rem;
 	right: 1rem;
 	height: 3rem;
 	width: 3rem;
 	background-color: #828282;
 	border-radius: 50%;
 }
 .forwardDot .button {
-	font: 2rem/1 'icomoon';
+	font: 2.5rem/1 'icomoon';
 	position: relative;
-	top: 0.5rem;
+	top: 0.25rem;
-	left: 0.5rem;
+	left: 0.3rem;
 }
 .backwardDot {
 	bottom: 1rem;
 	right: 5rem;
 }
 .backwardDot .button {
 	font: 2rem/1 'icomoon';
 	position: relative;
 	top: 0.5rem;
 	left: 0.5rem;
 }
 .search form {
 	display: inline;
 }
 .copy-button {
 	font: 1.5rem/1 'icomoon2';
 	color: #828282;
 	background: transparent;
 	border: none;
 	cursor: pointer;
 	vertical-align: middle;
 }
 .checkbox {
 	-webkit-appearance: none;
 	appearance: none;
 	position: absolute;
 	opacity: 0;
 	cursor: pointer;
 	height: 0;
 	width: 0;
 }
 .checkbox + label {
 	position: relative;
 	cursor: pointer;
 	padding-left: 1.75rem;
 	user-select: none;
 }
 .checkbox + label::before {
 	content: '';
 	position: absolute;
 	left: 0;
 	top: 0.1em;
 	width: 1rem;
 	height: 1rem;
 	border: 1px solid #828282;
 	background-color: transparent;
 	border-radius: 3px;
 }
 .checkbox:checked + label::after {
 	content: "";
 	position: absolute;
 	left: 0.35rem;
 	top: 0.2em;
 	width: 0.3rem;
 	height: 0.6rem;
 	border-style: solid;
 	border-color: #000;
 	border-width: 0 2px 2px 0;
 	transform: rotate(45deg);
 }
 .tooltip .tooltiptext {
 	visibility: hidden;
 	width: 140px;
 	background-color: #555;
 	color: #fff;
 	text-align: center;
 	border-radius: 6px;
 	padding: 5px 0;
 	position: absolute;
 	z-index: 1;
 	bottom: 110%;
 	left: 50%;
 	margin-left: -70px;
 	opacity: 0;
 	transition: opacity 0.2s;
 	font-size: 0.9rem;
 	line-height: 1.3;
 }
 .forwardDot .tooltiptext {
 	left: auto;
 	right: 0;
 	margin-left: 0;
 }
 .tooltip.show-tooltip .tooltiptext {
 	visibility: visible;
 	opacity: 1;
 }
 .settings-button {
 	float: right;
 	background: none;
 	border: none;
 	padding: 0;
 	cursor: pointer;
 	color: inherit;
 	font: inherit;
 }
 .modal-overlay {
 	position: fixed;
 	top: 0;
 	left: 0;
 	width: 100%;
 	height: 100%;
 	background-color: rgba(0,0,0,0.5);
 	z-index: 100;
 }
 .modal-content {
 	position: absolute;
 	top: 1rem;
 	right: 1rem;
 	background: #eee;
 	color: #000;
 	padding: 1rem;
 	border-radius: 4px;
 	z-index: 101;
 	min-width: 250px;
 	border: 1px solid #828282;
 }
 .modal-content h3, .modal-content h4 {
 	margin-top: 0;
 	margin-bottom: 0.5rem;
 }
 .modal-content .setting-group {
 	margin-bottom: 1rem;
 }
 .modal-content button {
 	margin-right: 0.5rem;
 	padding: 0.25rem 0.75rem;
 	border: 1px solid #828282;
 	border-radius: 4px;
 	background-color: transparent;
 	cursor: pointer;
 	font: inherit;
 	color: inherit;
 }
 .modal-content button:last-child {
 	margin-right: 0;
 }
 .modal-content button.active {
 	background-color: #ccc;
 }
 .modal-content button:disabled {
 	opacity: 0.5;
 	cursor: not-allowed;
 }
 .modal-content .font-size-display {
 	display: inline-block;
 	width: 50px;
 	text-align: center;
 	margin: 0 0.25rem;
 }
 .modal-content .font-option {
 	margin-bottom: 0.25rem;
 }
 .modal-content .gap {
 	margin-bottom: 0.75rem;
 }
 .close-modal-button {
 	position: absolute;
 	top: 0.5rem;
 	right: 0.75rem;
 	background: transparent;
 	border: none;
 	font-size: 1.5rem;
 	line-height: 1;
 	padding: 0;
 	cursor: pointer;
 	color: inherit;
 }
@@ -1,121 +0,0 @@
 .red {
 	color: #b00;
 	scrollbar-color: #b00 #440000;
 }
 .red a {
 	color: #b00;
 }
 .red input {
 	color: #b00;
 	border: 1px solid #690000;
 }
 .red input::placeholder {
 	color: #690000;
 }
 .red hr {
 	background-color: #690000;
 }
 .red .menu button,
 .red .story-text button {
 	background-color: #220000;
 	border-color: #b00;
 	color: #b00;
 }
 .red .item,
 .red .slogan {
 	color: #690000;
 }
 .red .item .source-logo {
 	display: none;
 }
 .red .item a {
 	color: #690000;
 }
 .red .item a.link {
 	color: #b00;
 }
 .red .item a.link:visited {
 	color: #690000;
 }
 .red .item .info a.hot {
 	color: #cc0000;
 }
 .red .article a {
 	border-bottom: 1px solid #aa0000;
 }
 .red .article u {
 	border-bottom: 1px solid #aa0000;
 	text-decoration: none;
 }
 .red .story-text video,
 .red .story-text img {
 	filter: grayscale(100%) brightness(20%) sepia(100%) hue-rotate(-50deg) saturate(600%) contrast(0.8);
 }
 .red .article .info {
 	color: #690000;
 }
 .red .article .info a {
 	border-bottom: none;
 	color: #690000;
 }
 .red .comment.lined {
 	border-left: 1px solid #440000;
 }
 .red .dot {
 	background-color: #440000;
 }
 .red .checkbox + label::before {
 	border: 1px solid #690000;
 }
 .red .checkbox:checked + label::after {
 	border-color: #dd0000;
 }
 .red .copy-button {
 	color: #690000;
 }
 .red .update-banner {
 	background-color: #300;
 	color: #d00;
 }
 .red .update-banner button {
 	background-color: #220000;
 	border-color: #b00;
 	color: #d00;
 }
 .red .modal-content {
 	background: #100;
 	border-color: #690000;
 	color: #b00;
 }
 .red .modal-content button {
 	background-color: #220000;
 	border-color: #b00;
 }
 .red .modal-content button.active {
 	background-color: #550000;
 }
@@ -1,53 +1,54 @@
-import React, { useState, useRef } from 'react';
+import React, { Component } from 'react';
-import { useHistory } from 'react-router-dom';
+import { withRouter } from 'react-router-dom';
-function Submit() {
+class Submit extends Component {
-	const [progress, setProgress] = useState(null);
+	constructor(props) {
-	const inputRef = useRef(null);
+		super(props);
 	const history = useHistory();
-	const submitArticle = async (event) => {
+		this.state = {
 			progress: null,
 		};
 		this.inputRef = React.createRef();
 	}
 	submitArticle = (event) => {
 		event.preventDefault();
 		const url = event.target[0].value;
-		inputRef.current.blur();
+		this.inputRef.current.blur();
-		setProgress('Submitting...');
+		this.setState({ progress: 'Submitting...' });
 		let data = new FormData();
 		data.append('url', url);
-		try {
+		fetch('/api/submit', { method: 'POST', body: data })
-			const res = await fetch('/api/submit', { method: 'POST', body: data });
+			.then(res => res.json())
-
+			.then(
-			if (res.ok) {
+				(result) => {
-				const result = await res.json();
+					this.props.history.replace('/' + result.nid);
-				history.replace('/' + result.nid);
+				},
-			} else {
+				(error) => {
-				let errorData;
+					this.setState({ progress: 'Error' });
 				try {
 					errorData = await res.json();
 				} catch (jsonError) {
 					// Not a JSON error from our API, so it's a server issue
 					throw new Error(`Server responded with ${res.status} ${res.statusText}`);
 				}
-				setProgress(errorData.error || 'An unknown error occurred.');
+			);
 			}
 		} catch (error) {
 			setProgress(`Error: ${error.toString()}`);
 		}
 	}
-	return (
+	render() {
-		<span className='search'>
+		const progress = this.state.progress;
-			<form onSubmit={submitArticle}>
+
-				<input
+		return (
-					placeholder='Submit URL'
+			<span className='search'>
-					ref={inputRef}
+				<form onSubmit={this.submitArticle}>
-				/>
+					<input
-			</form>
+						placeholder='Submit Article'
-			{progress && <p>{progress}</p>}
+						ref={this.inputRef}
-		</span>
+					/>
-	);
+				</form>
 				{progress ? progress : ''}
 			</span>
 		);
 	}
 }
-export default Submit;
+export default withRouter(Submit);
@@ -0,0 +1,34 @@
 import React from "react";
 import { Link } from "react-router-dom";
 import { sourceLink, infoLine, getLogoUrl } from "../utils.js";
 export class StoryItem extends React.Component {
 	constructor(props) {
 		super(props);
 	}
 	render() {
 		const story = this.props.story;
 		const { id, title } = story;
 		return (
 			<div className="item" key={id}>
 				<div className="title">
 					<Link className="link" to={"/" + id}>
 						<img
 							className="source-logo"
 							src={getLogoUrl(story)}
 							alt="source logo"
 						/>
 						{" "}
 						{title}
 					</Link>
 					<span className="source">({sourceLink(story)})</span>
 				</div>
 				{infoLine(story)}
 			</div>
 		);
 	}
 }
@@ -26,8 +26,3 @@
    font-family: 'Icomoon';
    src: url('icomoon.ttf') format('truetype');
 }
@font-face {
    font-family: 'Icomoon2';
    src: url('icomoon2.ttf') format('truetype');
 }
@@ -3,15 +3,9 @@ import ReactDOM from 'react-dom';
 import App from './App';
 import * as serviceWorker from './serviceWorker';
 // version 4
 ReactDOM.render(<App />, document.getElementById('root'));
 // If you want your app to work offline and load faster, you can change
 // // unregister() to register() below. Note this comes with some pitfalls.
 // // Learn more about service workers: https://bit.ly/CRA-PWA
-serviceWorker.register({
+serviceWorker.register();
  onUpdate: registration => {
    window.dispatchEvent(new CustomEvent('swUpdate', { detail: registration }));
  }
 });
@@ -0,0 +1,112 @@
 import React from 'react';
 import { Helmet } from 'react-helmet';
 import localForage from 'localforage';
 import { sourceLink, infoLine, ToggleDot } from '../utils.js';
 class Article extends React.Component {
 	constructor(props) {
 		super(props);
 		const id = this.props.match ? this.props.match.params.id : 'CLOL';
 		const cache = this.props.cache;
 		if (id in cache) console.log('cache hit');
 		this.state = {
 			story: cache[id] || false,
 			error: false,
 			pConv: [],
 		};
 	}
 	componentDidMount() {
 		const id = this.props.match ? this.props.match.params.id : 'CLOL';
 		localForage.getItem(id)
 			.then(
 				(value) => {
 					if (value) {
 						this.setState({ story: value });
 					}
 				}
 			);
 		fetch('/api/' + id)
 			.then(res => res.json())
 			.then(
 				(result) => {
 					this.setState({ story: result.story });
 					localForage.setItem(id, result.story);
 				},
 				(error) => {
 					this.setState({ error: true });
 				}
 			);
 	}
 	pConvert = (n) => {
 		this.setState({ pConv: [...this.state.pConv, n] });
 	}
 	render() {
 		const id = this.props.match ? this.props.match.params.id : 'CLOL';
 		const story = this.state.story;
 		const error = this.state.error;
 		const pConv = this.state.pConv;
 		let nodes = null;
 		if (story.text) {
 			let div = document.createElement('div');
 			div.innerHTML = story.text;
 			nodes = div.childNodes;
 		}
 		return (
 			<div className='article-container'>
 				{error && <p>Connection error?</p>}
 				{story ?
 					<div className='article'>
 						<Helmet>
 							<title>{story.title} - QotNews</title>
 						</Helmet>
 						<h1>{story.title}</h1>
 						<div className='info'>
 							Source: {sourceLink(story)}
 						</div>
 						{infoLine(story)}
 						{nodes ?
 							<div className='story-text'>
 								{Object.entries(nodes).map(([k, v]) =>
 									pConv.includes(k) ?
 										v.innerHTML.split('\n\n').map(x =>
 											<p dangerouslySetInnerHTML={{ __html: x }} />
 										)
 										:
 										(v.nodeName === '#text' ?
 											<p>{v.data}</p>
 											:
 											<>
 												<v.localName dangerouslySetInnerHTML={v.innerHTML ? { __html: v.innerHTML } : null} />
 												{v.localName == 'pre' && <button onClick={() => this.pConvert(k)}>Convert Code to Paragraph</button>}
 											</>
 										)
 								)}
 							</div>
 							:
 							<p>Problem getting article :(</p>
 						}
 					</div>
 					:
 					<p>loading...</p>
 				}
 				<ToggleDot id={id} article={false} />
 			</div>
 		);
 	}
 }
 export default Article;
@@ -0,0 +1,145 @@
 import React from 'react';
 import { Link } from 'react-router-dom';
 import { HashLink } from 'react-router-hash-link';
 import { Helmet } from 'react-helmet';
 import moment from 'moment';
 import localForage from 'localforage';
 import { infoLine, ToggleDot } from '../utils.js';
 class Article extends React.Component {
 	constructor(props) {
 		super(props);
 		const id = this.props.match.params.id;
 		const cache = this.props.cache;
 		if (id in cache) console.log('cache hit');
 		this.state = {
 			story: cache[id] || false,
 			error: false,
 			collapsed: [],
 			expanded: [],
 		};
 	}
 	componentDidMount() {
 		const id = this.props.match.params.id;
 		localForage.getItem(id)
 			.then(
 				(value) => {
 					this.setState({ story: value });
 				}
 			);
 		fetch('/api/' + id)
 			.then(res => res.json())
 			.then(
 				(result) => {
 					this.setState({ story: result.story }, () => {
 						const hash = window.location.hash.substring(1);
 						if (hash) {
 							document.getElementById(hash).scrollIntoView();
 						}
 					});
 					localForage.setItem(id, result.story);
 				},
 				(error) => {
 					this.setState({ error: true });
 				}
 			);
 	}
 	collapseComment(cid) {
 		this.setState(prevState => ({
 			...prevState,
 			collapsed: [...prevState.collapsed, cid],
 			expanded: prevState.expanded.filter(x => x !== cid),
 		}));
 	}
 	expandComment(cid) {
 		this.setState(prevState => ({
 			...prevState,
 			collapsed: prevState.collapsed.filter(x => x !== cid),
 			expanded: [...prevState.expanded, cid],
 		}));
 	}
 	countComments(c) {
 		return c.comments.reduce((sum, x) => sum + this.countComments(x), 1);
 	}
 	displayComment(story, c, level) {
 		const cid = c.author + c.date;
 		const collapsed = this.state.collapsed.includes(cid);
 		const expanded = this.state.expanded.includes(cid);
 		const hidden = collapsed || (level == 4 && !expanded);
 		const hasChildren = c.comments.length !== 0;
 		return (
 			<div className={level ? 'comment lined' : 'comment'} key={cid}>
 				<div className='info'>
 					<p>
 						{c.author === story.author ? '[OP]' : ''} {c.author || '[Deleted]'}
 						{' '} | <HashLink to={'#' + cid} id={cid}>{moment.unix(c.date).fromNow()}</HashLink>
 						{hasChildren && (
 							hidden ?
 								<span className='collapser expander pointer' onClick={() => this.expandComment(cid)}>+</span>
 								:
 								<span className='collapser pointer' onClick={() => this.collapseComment(cid)}>–</span>
 						)}
 					</p>
 				</div>
 				<div className={collapsed ? 'text hidden' : 'text'} dangerouslySetInnerHTML={{ __html: c.text }} />
 				{hidden && hasChildren ?
 					<div className='comment lined info pointer' onClick={() => this.expandComment(cid)}>[show {this.countComments(c) - 1} more]</div>
 					:
 					c.comments.map(i => this.displayComment(story, i, level + 1))
 				}
 			</div>
 		);
 	}
 	render() {
 		const id = this.props.match.params.id;
 		const story = this.state.story;
 		const error = this.state.error;
 		return (
 			<div className='container'>
 				{error && <p>Connection error?</p>}
 				{story ?
 					<div className='article'>
 						<Helmet>
 							<title>{story.title} - QotNews Comments</title>
 						</Helmet>
 						<h1>{story.title}</h1>
 						<div className='info'>
 							<Link to={'/' + story.id}>View article</Link>
 						</div>
 						{infoLine(story)}
 						<div className='comments'>
 							{story.comments.map(c => this.displayComment(story, c, 0))}
 						</div>
 					</div>
 					:
 					<p>loading...</p>
 				}
 				<ToggleDot id={id} article={true} />
 			</div>
 		);
 	}
 }
 export default Article;
@@ -0,0 +1,64 @@
 import React from 'react';
 import { Helmet } from 'react-helmet';
 import localForage from 'localforage';
 import { StoryItem } from '../components/StoryItem.js';
 class Feed extends React.Component {
 	constructor(props) {
 		super(props);
 		this.state = {
 			stories: JSON.parse(localStorage.getItem('stories')) || false,
 			error: false,
 		};
 	}
 	componentDidMount() {
 		fetch('/api')
 			.then(res => res.json())
 			.then(
 				(result) => {
 					const updated = !this.state.stories || this.state.stories[0].id !== result.stories[0].id;
 					console.log('updated:', updated);
 					const { stories } = result;
 					this.setState({ stories });
 					localStorage.setItem('stories', JSON.stringify(stories));
 					if (updated) {
 						localForage.clear();
 						stories.forEach((x, i) => {
 							fetch('/api/' + x.id)
 								.then(res => res.json())
 								.then(({ story }) => {
 									localForage.setItem(x.id, story)
 										.then(console.log('preloaded', x.id, x.title));
 									this.props.updateCache(x.id, story);
 								}, error => { }
 								);
 						});
 					}
 				},
 				(error) => {
 					this.setState({ error: true });
 				}
 			);
 	}
 	render() {
 		const stories = this.state.stories;
 		const error = this.state.error;
 		return (
 			<div className='container'>
 				<Helmet>
 					<title>Feed - QotNews</title>
 				</Helmet>
 				{error && <p>Connection error?</p>}
 				{stories ? stories.map(story => <StoryItem story={story}></StoryItem>) : <p>loading...</p>}
 			</div>
 		);
 	}
 }
 export default Feed;
@@ -0,0 +1,76 @@
 import React from 'react';
 import { Helmet } from 'react-helmet';
 import AbortController from 'abort-controller';
 import { StoryItem } from '../components/StoryItem.js';
 class Results extends React.Component {
 	constructor(props) {
 		super(props);
 		this.state = {
 			stories: false,
 			error: false,
 		};
 		this.controller = null;
 	}
 	performSearch = () => {
 		if (this.controller) {
 			this.controller.abort();
 		}
 		this.controller = new AbortController();
 		const signal = this.controller.signal;
 		const search = this.props.location.search;
 		fetch('/api/search' + search, { method: 'get', signal: signal })
 			.then(res => res.json())
 			.then(
 				(result) => {
 					this.setState({ stories: result.results });
 				},
 				(error) => {
 					if (error.message !== 'The operation was aborted. ') {
 						this.setState({ error: true });
 					}
 				}
 			);
 	}
 	componentDidMount() {
 		this.performSearch();
 	}
 	componentDidUpdate(prevProps) {
 		if (this.props.location.search !== prevProps.location.search) {
 			this.performSearch();
 		}
 	}
 	render() {
 		const stories = this.state.stories;
 		const error = this.state.error;
 		return (
 			<div className='container'>
 				<Helmet>
 					<title>Feed - QotNews</title>
 				</Helmet>
 				{error && <p>Connection error?</p>}
 				{stories ?
 					<>
 						<p>Search results:</p>
 						<div className='comment lined'>
 							{stories ? stories.map(story => <StoryItem story={story}></StoryItem>) : <p>loading...</p>}
 						</div>
 					</>
 					:
 					<p>loading...</p>
 				}
 			</div>
 		);
 	}
 }
 export default Results;
Author	SHA1	Message	Date
Jason Schwarzenberger	5668fa5dbc	fix mistake.	2020-11-17 12:54:54 +13:00
Jason Schwarzenberger	b771b52501	add regex to get a unique ref from each sitemap/category based article url.	2020-11-17 12:38:28 +13:00
Jason Schwarzenberger	f5c7a658ba	cosmetic filters for the spinoff.	2020-11-16 16:49:39 +13:00
Jason Schwarzenberger	f5ccd844da	fix import error.	2020-11-16 15:41:09 +13:00
Jason Schwarzenberger	6a91b9402f	split categories, sitemap and other crap out of news.py	2020-11-16 15:30:33 +13:00
Jason Schwarzenberger	b80c1a5cb5	extract story list item from Results and Feed.	2020-11-16 13:17:58 +13:00
Jason Schwarzenberger	b23e470317	move reddit thresholds as settings variables.	2020-11-16 10:11:39 +13:00
Jason Schwarzenberger	7420b5ece9	fix microdata multiple authors	2020-11-12 17:33:46 +13:00
Jason Schwarzenberger	64ced635cc	fix mistake.	2020-11-12 17:15:29 +13:00
Jason Schwarzenberger	9318627f1b	ability to pass in multiple site maps/category urls.	2020-11-12 17:11:51 +13:00
Jason Schwarzenberger	3d0a3f1577	support list based json-ld authors.	2020-11-12 15:08:23 +13:00
Jason Schwarzenberger	587b10c438	recursive sitemaps (sitemap indexes)	2020-11-12 14:56:46 +13:00
Jason	00954c6cac	local browser scraper	2020-11-11 09:26:54 +00:00
Jason Schwarzenberger	637bc38476	fix mistake.	2020-11-11 17:21:31 +13:00
Jason Schwarzenberger	164b7e72c4	basically add declutter like capabilities.	2020-11-11 17:16:04 +13:00
Jason Schwarzenberger	3169af3002	hostname from settings.	2020-11-11 09:46:27 +13:00
Jason Schwarzenberger	d588a60930	add source to searchable attributes.	2020-11-11 09:37:54 +13:00
Jason Schwarzenberger	408e2870b2	tzinfo and microdata schema urls.	2020-11-10 16:51:27 +13:00
Jason Schwarzenberger	44b8b36547	add data cast in query.	2020-11-10 15:50:18 +13:00
Jason Schwarzenberger	4f49684194	remove logos from utils.js	2020-11-10 15:38:48 +13:00
Jason Schwarzenberger	1d78b1c592	fix favicon url.	2020-11-10 15:34:21 +13:00
Jason Schwarzenberger	0374794536	Sitemap and Category to get favicon into `icon` property of story.	2020-11-10 15:22:27 +13:00
Jason Schwarzenberger	943a1cfa4f	reader server	2020-11-10 14:56:21 +13:00
Jason Schwarzenberger	9cee370a25	tvnz icon	2020-11-10 14:10:02 +13:00
Jason Schwarzenberger	5efc6ef2d3	add related stories (in api only)	2020-11-10 14:09:56 +13:00
Jason Schwarzenberger	4ec50e20cb	feed thread loop.	2020-11-10 10:10:38 +13:00
Jason Schwarzenberger	c1b7877f4b	remove limit.	2020-11-09 17:54:50 +13:00
Jason Schwarzenberger	7b8cbfc9b9	try to make feed only determined by the max age.	2020-11-09 17:50:58 +13:00
Jason Schwarzenberger	bfa4108a8e	Merge remote-tracking branch 'tanner/master'	2020-11-09 16:08:28 +13:00
Jason Schwarzenberger	0bd0d40a31	use json type in sqlite.	2020-11-09 15:45:10 +13:00
Jason Schwarzenberger	4e04595415	fix search.	2020-11-09 15:44:44 +13:00
Jason	006db2960c	change to 3 days	2020-11-09 01:36:51 +00:00
Jason Schwarzenberger	1f063f0dac	undo log level change	2020-11-06 11:20:34 +13:00
Jason Schwarzenberger	1658346aa9	fix news.py feed.	2020-11-06 10:37:43 +13:00
Jason Schwarzenberger	2dbc702b40	switch to python-dateutil for parser, reverse sort xml feeds.	2020-11-06 10:02:39 +13:00
Jason Schwarzenberger	1c4764e67d	sort sitemap feed by lastmod time.	2020-11-06 09:30:15 +13:00
Jason	ee49d2021e	newsroom	2020-11-05 20:28:55 +00:00
Jason	c391c50ab1	use localize	2020-11-05 04:15:31 +00:00
Jason Schwarzenberger	095f0d549a	use replace.	2020-11-05 16:57:08 +13:00
Jason Schwarzenberger	c21c71667e	fix date issue.	2020-11-05 16:41:15 +13:00
Jason Schwarzenberger	c3a2c91a11	update requirements.txt	2020-11-05 16:33:50 +13:00
Jason Schwarzenberger	0f39446a61	tz aware for use in settings.	2020-11-05 16:30:55 +13:00
Jason Schwarzenberger	351059aab1	fix excludes.	2020-11-05 15:59:13 +13:00
Jason Schwarzenberger	4488e2c292	add an `excludes` list of substrings for urls in the settings for sitemap/category.	2020-11-05 15:51:59 +13:00
Jason Schwarzenberger	afda5b635c	disqus test.	2020-11-05 14:23:51 +13:00
Jason Schwarzenberger	0fc1a44d2b	fix issue in substack.	2020-11-04 17:40:29 +13:00
Jason Schwarzenberger	9fff1b9e46	avoid duplicate articles listed on the category page	2020-11-04 17:14:42 +13:00
Jason Schwarzenberger	16b59f6c67	try stop bad pages.	2020-11-04 16:34:31 +13:00
Jason Schwarzenberger	939f4775a7	better settings example.	2020-11-04 15:52:34 +13:00
Jason Schwarzenberger	9bfc6fc6fa	scraper settings, ordering and loop.	2020-11-04 15:47:12 +13:00
Jason Schwarzenberger	6ea9844d00	remove useless try blocks.	2020-11-04 15:37:19 +13:00
Jason Schwarzenberger	1318259d3d	imply referrer is substack.	2020-11-04 15:21:07 +13:00
Jason Schwarzenberger	98a0c2257c	increase declutter timeout.	2020-11-04 15:15:00 +13:00
Jason Schwarzenberger	e6976db25d	fix tabs	2020-11-04 15:04:20 +13:00
Jason Schwarzenberger	9edc8b7cca	move scraping for article content to files.	2020-11-04 15:00:58 +13:00
Jason Schwarzenberger	33e21e7f30	fix mistake.	2020-11-04 12:45:01 +13:00
Jason Schwarzenberger	892a99eca6	add + expander in place of collapser.	2020-11-04 12:43:15 +13:00
Jason Schwarzenberger	d718d05a04	fix dates for newsroom.	2020-11-04 11:53:16 +13:00
Jason Schwarzenberger	d1795eb1b8	add radionz and newsroom logos.	2020-11-04 11:30:56 +13:00
Jason Schwarzenberger	9f4ff4acf0	remove unnecessary sitemap.xml request.	2020-11-04 11:22:15 +13:00
Jason Schwarzenberger	db6aad84ec	fix mistake.	2020-11-04 11:12:01 +13:00
Jason Schwarzenberger	29f8a8b8cc	add news site categories feed.	2020-11-04 11:08:50 +13:00
tanner	9a279d44b1	Add header to get content type	2020-11-03 20:27:43 +00:00
Jason	abf8589e02	fix sitemap	2020-11-03 10:53:40 +00:00
Jason	b759f46582	use extruct for opengraph/json-ld/microdata of articles	2020-11-03 10:31:36 +00:00
Jason Schwarzenberger	736cdc8576	fix mistake.	2020-11-03 17:04:46 +13:00
Jason Schwarzenberger	244d416f6e	settings config of sitemap/substack publications.	2020-11-03 17:01:29 +13:00
tanner	e506804666	Clean code up	2020-11-03 03:45:56 +00:00
Jason Schwarzenberger	5f98a2e76a	Merge remote-tracking branch 'tanner/master' into master And adding relevant setings.py.example/etc.	2020-11-03 16:44:02 +13:00
Jason Schwarzenberger	0567cdfd9b	move sort to render.	2020-11-03 16:30:22 +13:00
Jason Schwarzenberger	4f90671cec	order feed by reverse chronological	2020-11-03 16:21:23 +13:00
Jason Schwarzenberger	e63a1456a5	add logos.	2020-11-03 16:07:07 +13:00
Jason Schwarzenberger	76f1d57702	sitemap based feed.	2020-11-03 16:00:03 +13:00
Jason Schwarzenberger	de80389ed0	add logos.	2020-11-03 12:48:19 +13:00
Jason Schwarzenberger	4e64cf682a	add the bulletin.	2020-11-03 12:41:16 +13:00
Jason Schwarzenberger	c5fe5d25a0	add substack.py top sites, replacing webworm.py	2020-11-03 12:28:39 +13:00
Jason	283a2b1545	fix webworm comments	2020-11-02 22:06:43 +00:00
Jason Schwarzenberger	0d6a86ace2	fix webworm dates.	2020-11-03 10:31:14 +13:00
Jason Schwarzenberger	f23bf628e0	add webworm/substack as a feed.	2020-11-02 17:09:59 +13:00
tanner	ca78a6d7a9	Move feed and Praw config to settings.py	2020-11-02 02:26:54 +00:00
tanner	7acce407e9	Fix index.html indentation	2020-11-02 00:38:34 +00:00
tanner	5281672000	Fix noscript font color	2020-11-02 00:36:11 +00:00
tanner	e59acefda9	Remove Whoosh	2020-11-02 00:22:40 +00:00
tanner	cbc802b7e9	Try Hackernews API twice	2020-11-02 00:17:22 +00:00
tanner	4579dfce00	Improve logging	2020-11-02 00:13:43 +00:00
tanner	0d16bec6f6	Fix table width CSS	2020-11-01 00:47:18 +00:00
tanner	feba8b7aa0	Make qotnews work with WaPo	2020-10-29 04:55:34 +00:00
tanner	ee5105743d	Upgrade readability	2020-10-29 01:24:13 +00:00
tanner	72802a6fcf	Show exerpt of hidden comments	2020-10-27 00:41:36 +00:00
tanner	99d3a234f4	Fix bug with rendering text nodes	2020-10-26 21:58:36 +00:00
tanner	f95df227f1	Add instructions to download search server	2020-10-26 21:58:36 +00:00
tanner	b82095ca7a	Add buttons to collapse / expand comments	2020-10-26 21:57:10 +00:00
tanner	992c1c1233	Monkeypatch earlier	2020-10-24 22:30:00 +00:00
tanner	88d2216627	Add a script to delete a story	2020-10-03 23:42:21 +00:00
tanner	6cf2f01b08	Adjust feeds	2020-10-03 23:41:57 +00:00
tanner	607573dd44	Add buttons to convert <pre> to <p>	2020-10-03 23:23:25 +00:00
tanner	c554ecd890	Add a line on UI to make search results obvious	2020-08-14 03:58:11 +00:00
tanner	6576eb1bac	Adjust content-type request timeout	2020-08-14 03:57:43 +00:00
tanner	472af76d1a	Adjust port	2020-08-14 03:57:18 +00:00
tanner	4727d34eb6	Delete displayed-attributes when init search	2020-08-14 03:56:47 +00:00
tanner	0e086b60b8	Remove business subreddit from feed	2020-08-14 03:55:28 +00:00
tanner	b46ce36c63	Update requirements	2020-07-08 05:24:32 +00:00
tanner	9a449bf3ca	Remove extra logging	2020-07-08 02:36:40 +00:00
tanner	0bd9f05250	Fix crash when HN feed fails	2020-07-08 02:36:40 +00:00
tanner	9c116bde4a	Remove document img and ignore r/technology	2020-07-08 02:36:40 +00:00
tanner	ebedaef00b	Tune search rankings and attributes	2020-07-08 02:36:40 +00:00
tanner	d7f0643bd7	Add more logging	2020-07-08 02:36:40 +00:00
tanner	eb1137299d	Remove article numbers	2020-07-08 02:36:40 +00:00
tanner	72d4a68929	Remove pre-fetching image	2020-07-08 02:36:40 +00:00
tanner	f1c846acd0	Remove get first image	2020-07-08 02:36:40 +00:00
tanner	850b30e353	Add requests timeouts and temporary logging	2020-07-08 02:36:40 +00:00
tanner	d614ad0743	Integrate with external MeiliSearch server	2020-07-08 02:36:40 +00:00
tanner	f46cafdc90	Integrate sqlite database with server	2020-07-08 02:36:40 +00:00
tanner	873dc44cb1	Update whoosh migration script	2020-07-08 02:36:40 +00:00
tanner	1fb9db3f4b	Store ref list in database too	2020-07-08 02:36:40 +00:00
tanner	b923908a45	Begin initial sqlite conversion	2020-07-08 02:36:40 +00:00
tanner	dbdcfaa921	Check if cache is broken	2020-07-08 02:36:40 +00:00
tanner	8799b10525	Fall back to ref on manual submission title	2020-07-08 02:36:40 +00:00
tanner	6430fe5e9f	Check content-type	2020-07-08 02:36:40 +00:00
tanner	a4cf719cb8	Remove technology subreddit	2020-07-08 02:36:40 +00:00
tanner	595f469b4a	Update tildes parser group tag	2020-07-08 02:36:40 +00:00
tanner	b252c6a207	Make noscript background white	2020-06-22 20:52:51 +00:00
tanner	02b73a8b14	Fix cache load race condition bug	2020-01-28 04:20:48 +00:00
tanner	72f1043952	Remove preload of news source icons	2020-01-28 04:20:29 +00:00
tanner	7b31fcf690	Remove keys of uncached stories	2020-01-28 04:20:05 +00:00
tanner	b3d2eeb67f	Fix tildes deleted comment parser error	2020-01-28 04:19:26 +00:00
tanner	9078b567f0	Add del tag and sort tags	2020-01-04 23:37:41 +00:00
tanner	ced20390eb	Fix back/forward scroll jump issue	2020-01-04 23:36:24 +00:00
tanner	6cd41f0902	Add forward button, convert icons to font	2020-01-03 03:45:56 +00:00
tanner	746932ab96	Add style changes to prevent horizontal scrolling	2019-12-22 21:43:33 +00:00
tanner	2822974b6e	Stop using archive.is on articles (hits CAPTCHAs)	2019-12-15 22:47:33 +00:00
tanner	8fd7fc158c	Fix search result icons	2019-12-14 07:39:25 +00:00
tanner	17ef7e3a65	Whitelist more html tags	2019-12-14 07:39:10 +00:00
tanner	3363ccd47e	Embed base64 logo directly in source to avoid load	2019-12-02 23:54:02 +00:00
tanner	2d80b19414	Grab comments on manually submitted links	2019-12-02 23:15:51 +00:00
tanner	ebcbf1b624	Sanitize html	2019-12-01 22:18:41 +00:00
tanner	e231cd5c31	Decrease feed cache length to 150	2019-12-01 22:18:14 +00:00
tanner	569e5b16ca	Add logo for manual submissions	2019-11-14 08:38:11 +00:00
tanner	db5097ac57	Drop articles more than two days old	2019-11-08 21:50:33 +00:00
tanner	2edb3ceba7	Allow manual submission of articles	2019-11-08 05:55:30 +00:00
tanner	38b5f2dbeb	Move to gevent production http server	2019-11-08 02:37:57 +00:00
tanner	6826f731c7	Handle hostnames better	2019-11-07 22:10:08 +00:00
tanner	bb693ba434	Add subreddit	2019-11-07 22:09:45 +00:00
tanner	632b0276c4	Abort previous search requests	2019-11-07 22:08:28 +00:00
tanner	4cf97304e4	Get rid of lint warnings	2019-10-22 07:31:59 +00:00
tanner	9e55f6e4ec	Fix Tildes down for maintenance edge case	2019-10-22 05:01:30 +00:00
tanner	edc4c439d7	Prefetch first images	2019-10-19 07:33:06 +00:00
tanner	187c6b8110	Cache articles in memory for speed	2019-10-18 21:26:22 +00:00
tanner	6764bf0d6d	Add serviceworker, render logos directly	2019-10-18 05:09:49 +00:00
tanner	dc588fee91	Fix underlines	2019-10-18 01:20:38 +00:00
tanner	f8998b687e	Fix crash from domain and ext check bug	2019-10-16 08:56:31 +00:00
tanner	e4f81472fc	Fix copy/paste error, switch to info logging	2019-10-16 05:26:47 +00:00
tanner	f293f2b5f9	Begin README and add license	2019-10-15 16:40:55 -06:00
tanner	810e8c5ead	Archive WSJ articles first, catch KeyboardInterrupt	2019-10-15 21:03:47 +00:00
tanner	9c4766a928	Stop using python keyword id for id	2019-10-15 20:36:20 +00:00
tanner	0f5b2a5ff9	Cache all articles in IndexedDB	2019-10-12 23:41:31 +00:00
tanner	7cb87b59fe	Move archive to Whoosh and add search	2019-10-12 05:32:17 +00:00
tanner	45b75b420b	Gitkeep archive directory	2019-10-10 21:55:21 +00:00
tanner	f0721519e1	Serve client through apiserver, adding meta info	2019-10-10 21:54:29 +00:00
tanner	25a671f58e	Set title on article and comment pages, add comment anchors	2019-10-10 21:52:28 +00:00
tanner	5fd4fdb21c	Fix Tildes comments with unknown authors	2019-10-08 08:01:17 +00:00
tanner	19e9a80be1	Archive Bloomberg articles first	2019-10-08 08:00:50 +00:00
tanner	5caa4542d8	Gitkeep apiserver data directory	2019-10-08 07:59:30 +00:00
tanner	1ed2baded6	Add huge margin to bottom of body for better pagescroll	2019-09-24 18:40:22 +00:00
tanner	c7734eb2bc	Add site logos, keep displaying news on error	2019-09-24 08:23:14 +00:00
tanner	0053147226	Ignore certain files and domains, remove refs	2019-09-24 08:22:06 +00:00
tanner	0496fbba45	Ignore new Tildes posts and handle deleted ones	2019-09-24 08:21:26 +00:00
tanner	0a1ebaa8b8	Handle Reddit PRAW exceptions	2019-09-24 08:20:46 +00:00
tanner	2ede5ed6ff	Filter out False comments	2019-08-30 06:23:14 +00:00
tanner	20a9d9d452	Settle on serif font, add scroll to top component	2019-08-30 06:22:26 +00:00
tanner	23cdbc9292	Render reddit markdown, poll tildes better, add utils	2019-08-28 04:13:02 +00:00
tanner	10d4ec863e	Snip deeply nested comments	2019-08-26 01:37:50 +00:00
tanner	fc8ce79e33	Try outline.com for reader mode first	2019-08-25 23:49:08 +00:00
tanner	8eca354a47	Add favicons to webclient	2019-08-25 23:48:24 +00:00
tanner	b1275d9a27	Add a button to toggle between article and comments	2019-08-25 08:50:49 +00:00
tanner	9336760ed3	Add fonts, fix styling issues	2019-08-25 07:46:58 +00:00
tanner	cf9e197e6c	Fix tildes comments parsing bug	2019-08-25 07:46:22 +00:00
tanner	2b1a352917	Clear localstorage cache and add slogan	2019-08-25 01:25:28 +00:00
tanner	1b6c8fc6cb	Add tildes to feeds	2019-08-25 00:36:26 +00:00
tanner	a2509958da	Add reddit to feeds	2019-08-24 21:37:43 +00:00
tanner	4450e93c65	Remove DOMPurify import	2019-08-24 08:49:53 +00:00
tanner	d341d4422f	Abstract api server feeds	2019-08-24 08:49:11 +00:00
tanner	82074eb8aa	Stop running DOMPurify on reader server	2019-08-24 05:09:02 +00:00
tanner	c1a81a4d8c	Write news stories to disk	2019-08-24 05:07:16 +00:00
tanner	dde6ac4566	Finish prototype web client	2019-08-24 05:04:51 +00:00
tanner	62d68da415	Finish prototype api server	2019-08-23 08:23:48 +00:00
tanner	c04b5c27f2	Figure out .gitignores	2019-08-23 08:23:26 +00:00
tanner	771c3987ec	Change reader server useragent and port	2019-08-23 08:21:25 +00:00
tanner	c0607b3fb6	Prototype readability server	2019-08-20 21:49:06 -06:00
tanner	a814411c12	Initial commit	2019-08-20 21:48:55 -06:00