update declutter.

sort substack feed by time.
2020-11-24 16:54:21 +13:00 · 2020-11-24 11:20:37 +13:00 · 2020-11-24 10:56:38 +13:00 · 2020-11-24 10:42:41 +13:00 · 2020-11-24 10:36:31 +13:00 · 2020-11-23 16:46:54 +13:00
40 changed files with 4303 additions and 3885 deletions
--- a/.gitmodules
+++ b/.gitmodules
@@ -0,0 +1,3 @@
 [submodule "readerserver"]
 	path = readerserver
 	url = https://github.com/master5o1/declutter.git
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@ $ sudo apt install yarn
 Clone this repo:
 ```text
-$ git clone https://gogs.tannercollin.com/tanner/qotnews.git
+$ git clone --recurse-submodules https://git.1j.nz/jason/qotnews.git
 $ cd qotnews
 ```
@@ -37,14 +37,14 @@ $ source env/bin/activate
 Configure Praw for your Reddit account (optional):
-* Go to https://www.reddit.com/prefs/apps
+- Go to https://www.reddit.com/prefs/apps
-* Click "Create app"
+- Click "Create app"
-* Name: whatever
+- Name: whatever
-* App type: script
+- App type: script
-* Description: blank
+- Description: blank
-* About URL: blank
+- About URL: blank
-* Redirect URL: your GitHub profile
+- Redirect URL: your GitHub profile
-* Submit, copy the client ID and client secret into `settings.py` below
+- Submit, copy the client ID and client secret into `settings.py` below
 ```text
 (env) $ vim settings.py.example
@@ -109,7 +109,7 @@ stdout_logfile_maxbytes=1MB
 [program:qotnewsreader]
 user=qotnews
 directory=/home/qotnews/qotnews/readerserver
-command=node main.js
+command=node index.js
 autostart=true
 autorestart=true
 stderr_logfile=/var/log/qotnewsreader.log
--- a/apiserver/database.py
+++ b/apiserver/database.py
@@ -1,9 +1,9 @@
-import json
+from datetime import datetime, timedelta
 from sqlalchemy import create_engine, Column, String, ForeignKey, Integer
 from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.orm import sessionmaker
 from sqlalchemy.exc import IntegrityError
 from sqlalchemy.types import JSON
 engine = create_engine('sqlite:///data/qotnews.sqlite')
 Session = sessionmaker(bind=engine)
@@ -15,8 +15,8 @@ class Story(Base):
    sid = Column(String(16), primary_key=True)
    ref = Column(String(16), unique=True)
-    meta_json = Column(String)
+    meta = Column(JSON)
-    full_json = Column(String)
+    data = Column(JSON)
    title = Column(String)
 class Reflist(Base):
@@ -24,6 +24,7 @@ class Reflist(Base):
    rid = Column(Integer, primary_key=True)
    ref = Column(String(16), unique=True)
    urlref = Column(String)
    sid = Column(String, ForeignKey('stories.sid'), unique=True)
    source = Column(String(16))
@@ -36,19 +37,21 @@ def get_story(sid):
 def put_story(story):
    story = story.copy()
-    full_json = json.dumps(story)
+    data = {}
    data.update(story)
-    story.pop('text', None)
+    meta = {}
-    story.pop('comments', None)
+    meta.update(story)
-    meta_json = json.dumps(story)
+    meta.pop('text', None)
    meta.pop('comments', None)
    try:
        session = Session()
        s = Story(
            sid=story['id'],
            ref=story['ref'],
-            full_json=full_json,
+            data=data,
-            meta_json=meta_json,
+            meta=meta,
            title=story.get('title', None),
        )
        session.merge(s)
@@ -63,24 +66,39 @@ def get_story_by_ref(ref):
    session = Session()
    return session.query(Story).filter(Story.ref==ref).first()
-def get_reflist(amount):
+def get_stories_by_url(url):
    session = Session()
-    q = session.query(Reflist).order_by(Reflist.rid.desc()).limit(amount)
+    return session.query(Story).\
-    return [dict(ref=x.ref, sid=x.sid, source=x.source) for x in q.all()]
+            filter(Story.title != None).\
            filter(Story.meta['url'].as_string() == url).\
            order_by(Story.meta['date'].desc())
-def get_stories(amount):
+def get_ref_by_sid(sid):
    session = Session()
-    q = session.query(Reflist, Story.meta_json).\
+    x = session.query(Reflist).\
-            order_by(Reflist.rid.desc()).\
+        filter(Reflist.sid == sid).\
        first()
    return dict(ref=x.ref, sid=x.sid, source=x.source, urlref=x.urlref)
 def get_reflist():
    session = Session()
    q = session.query(Reflist).order_by(Reflist.rid.desc())
    return [dict(ref=x.ref, sid=x.sid, source=x.source, urlref=x.urlref) for x in q.all()]
 def get_stories(maxage=60*60*24*2):
    time = datetime.now().timestamp() - maxage
    session = Session()
    q = session.query(Reflist, Story.meta).\
            join(Story).\
            filter(Story.title != None).\
-            limit(amount)
+            filter(Story.meta['date'].as_integer() > time).\
            order_by(Story.meta['date'].desc())
    return [x[1] for x in q]
-def put_ref(ref, sid, source):
+def put_ref(ref, sid, source, urlref):
    try:
        session = Session()
-        r = Reflist(ref=ref, sid=sid, source=source)
+        r = Reflist(ref=ref, sid=sid, source=source, urlref=urlref)
        session.add(r)
        session.commit()
    except:
--- a/apiserver/feed.py
+++ b/apiserver/feed.py
@@ -6,61 +6,84 @@ logging.basicConfig(
 import requests
 import time
 from bs4 import BeautifulSoup
 import itertools
 import settings
-from feeds import hackernews, reddit, tildes, manual
+from feeds import hackernews, reddit, tildes, substack, manual
-
+from feeds.sitemap import Sitemap
-OUTLINE_API = 'https://api.outline.com/v3/parse_article'
+from feeds.category import Category
-READ_API = 'http://127.0.0.1:33843'
+from scrapers import outline, declutter, headless, simple
 INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com']
 TWO_DAYS = 60*60*24*2
-def list():
+substacks = {}
-    feed = []
+for key, value in settings.SUBSTACK.items():
    substacks[key] = substack.Publication(value['url'])
 categories = {}
 for key, value in settings.CATEGORY.items():
    categories[key] = Category(value)
 sitemaps = {}
 for key, value in settings.SITEMAP.items():
    sitemaps[key] = Sitemap(value)
 def get_list():
    feeds = {}
    if settings.NUM_HACKERNEWS:
-        feed += [(x, 'hackernews') for x in hackernews.feed()[:settings.NUM_HACKERNEWS]]
+        feeds['hackernews'] = [(x, 'hackernews', x) for x in hackernews.feed()[:settings.NUM_HACKERNEWS]]
    if settings.NUM_REDDIT:
-        feed += [(x, 'reddit') for x in reddit.feed()[:settings.NUM_REDDIT]]
+        feeds['reddit'] = [(x, 'reddit', x) for x in reddit.feed()[:settings.NUM_REDDIT]]
    if settings.NUM_TILDES:
-        feed += [(x, 'tildes') for x in tildes.feed()[:settings.NUM_TILDES]]
+        feeds['tildes'] = [(x, 'tildes', x) for x in tildes.feed()[:settings.NUM_TILDES]]
    if settings.NUM_SUBSTACK:
        feeds['substack'] = [(x, 'substack', x) for x in substack.top.feed()[:settings.NUM_SUBSTACK]]
    for key, publication in substacks.items():
        count = settings.SUBSTACK[key]['count']
        feeds[key] = [(x, key, x) for x in publication.feed()[:count]]
    for key, sites in categories.items():
        count = settings.CATEGORY[key].get('count') or 0
        excludes = settings.CATEGORY[key].get('excludes')
        tz = settings.CATEGORY[key].get('tz')
        feeds[key] = [(x, key, u) for x, u in sites.feed(excludes)[:count]]
    for key, sites in sitemaps.items():
        count = settings.SITEMAP[key].get('count') or 0
        excludes = settings.SITEMAP[key].get('excludes')
        feeds[key] = [(x, key, u) for x, u in sites.feed(excludes)[:count]]
    values = feeds.values()
    feed = itertools.chain.from_iterable(itertools.zip_longest(*values, fillvalue=None))
    feed = list(filter(None, feed))
    return feed
 def get_article(url):
-    try:
+    scrapers = {
-        params = {'source_url': url}
+        'headless': headless,
-        headers = {'Referer': 'https://outline.com/'}
+        'simple': simple,
-        r = requests.get(OUTLINE_API, params=params, headers=headers, timeout=20)
+        'outline': outline,
-        if r.status_code == 429:
+        'declutter': declutter,
-            logging.info('Rate limited by outline, sleeping 30s and skipping...')
+    }
-            time.sleep(30)
+    available = settings.SCRAPERS or ['headless', 'simple']
-            return ''
+    if 'simple' not in available:
-        if r.status_code != 200:
+        available += ['simple']
            raise Exception('Bad response code ' + str(r.status_code))
        html = r.json()['data']['html']
        if 'URL is not supported by Outline' in html:
            raise Exception('URL not supported by Outline')
        return html
    except KeyboardInterrupt:
        raise
    except BaseException as e:
        logging.error('Problem outlining article: {}'.format(str(e)))
-    logging.info('Trying our server instead...')
+    for scraper in available:
-
+        if scraper not in scrapers.keys():
-    try:
+            continue
-        r = requests.post(READ_API, data=dict(url=url), timeout=20)
+        try:
-        if r.status_code != 200:
+            html = scrapers[scraper].get_html(url)
-            raise Exception('Bad response code ' + str(r.status_code))
+            if html:
-        return r.text
+                return html
-    except KeyboardInterrupt:
+        except KeyboardInterrupt:
-        raise
+            raise
-    except BaseException as e:
+        except:
-        logging.error('Problem getting article: {}'.format(str(e)))
+            pass
-        return ''
+    return ''
 def get_content_type(url):
    try:
@@ -78,7 +101,7 @@ def get_content_type(url):
    except:
        return ''
-def update_story(story, is_manual=False):
+def update_story(story, is_manual=False, urlref=None):
    res = {}
    if story['source'] == 'hackernews':
@@ -87,6 +110,14 @@ def update_story(story, is_manual=False):
        res = reddit.story(story['ref'])
    elif story['source'] == 'tildes':
        res = tildes.story(story['ref'])
    elif story['source'] == 'substack':
        res = substack.top.story(story['ref'])
    elif story['source'] in categories.keys():
        res = categories[story['source']].story(story['ref'], urlref)
    elif story['source'] in sitemaps.keys():
        res = sitemaps[story['source']].story(story['ref'], urlref)
    elif story['source'] in substacks.keys():
        res = substacks[story['source']].story(story['ref'])
    elif story['source'] == 'manual':
        res = manual.story(story['ref'])
@@ -96,7 +127,7 @@ def update_story(story, is_manual=False):
        logging.info('Story not ready yet')
        return False
-    if story['date'] and not is_manual and story['date'] + TWO_DAYS < time.time():
+    if story['date'] and not is_manual and story['date'] + settings.MAX_STORY_AGE < time.time():
        logging.info('Story too old, removing')
        return False
--- a/apiserver/feeds/category.py
+++ b/apiserver/feeds/category.py
@@ -0,0 +1,72 @@
 import logging
 logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        level=logging.DEBUG)
 if __name__ == '__main__':
    import sys
    sys.path.insert(0,'.')
 from bs4 import BeautifulSoup
 import settings
 from utils import clean
 from misc.api import xml
 from misc.news import Base
 def _filter_links(links, category_url, excludes=None):
    links = list(filter(None, [link if link.startswith(category_url) else None for link in links]))
    links = list(filter(None, [link if link != category_url else None for link in links]))
    links = list(set(links))
    if excludes:
        links = list(filter(None, [None if any(e in link for e in excludes) else link for link in links]))
    return links
 def _get_category(category_url, excludes=None):
    base_url = '/'.join(category_url.split('/')[:3])
    markup = xml(lambda x: category_url)
    if not markup: return []
    soup = BeautifulSoup(markup, features='html.parser')
    links = soup.find_all('a', href=True)
    links = [link.get('href') for link in links]
    links = [f"{base_url}{link}" if link.startswith('/') else link for link in links]
    links = _filter_links(links, category_url, excludes)
    return links
 class Category(Base):
    def __init__(self, config):
        self.config = config
        self.category_url = config.get('url')
        self.tz = config.get('tz')
    def feed(self, excludes=None):
        links = []
        if isinstance(self.category_url, str):
            links += _get_category(self.category_url, excludes)
        elif isinstance(self.category_url, list):
            for url in self.category_url:
                links += _get_category(url, excludes)
        links = list(set(links))
        return [(self.get_id(link), link) for link in links]
 # scratchpad so I can quickly develop the parser
 if __name__ == '__main__':
    print("Category: RadioNZ")
    site = Category({ 'url': "https://www.rnz.co.nz/news/" })
    excludes = [
        'rnz.co.nz/news/sport',
        'rnz.co.nz/weather',
        'rnz.co.nz/news/weather',
    ]
    posts = site.feed(excludes)
    print(posts[:5])
    print(site.story(posts[0][0], posts[0][1]))
    print("Category: Newsroom")
    site = Category({ 'url': "https://www.newsroom.co.nz/news/", 'tz': 'Pacific/Auckland'})
    posts = site.feed()
    print(posts[:5])
    print(site.story(posts[0][0], posts[0][1]))
--- a/apiserver/feeds/hackernews.py
+++ b/apiserver/feeds/hackernews.py
@@ -40,7 +40,7 @@ def api(route, ref=None):
        return False
 def feed():
-    return [str(x) for x in api(API_TOPSTORIES) or []]
+    return ['hn:'+str(x) for x in api(API_TOPSTORIES) or []]
 def comment(i):
    if 'author' not in i:
@@ -60,6 +60,7 @@ def comment_count(i):
    return sum([comment_count(c) for c in i['comments']]) + alive
 def story(ref):
    ref = ref.replace('hn:', '')
    r = api(API_ITEM, ref)
    if not r: return False
--- a/apiserver/feeds/manual.py
+++ b/apiserver/feeds/manual.py
@@ -7,6 +7,8 @@ import requests
 import time
 from bs4 import BeautifulSoup
 import settings
 USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'
 def api(route):
@@ -33,7 +35,7 @@ def story(ref):
    s = {}
    s['author'] = 'manual submission'
-    s['author_link'] = 'https://news.t0.vc'
+    s['author_link'] = 'https://{}'.format(settings.HOSTNAME)
    s['score'] = 0
    s['date'] = int(time.time())
    s['title'] = str(soup.title.string) if soup.title else ref
--- a/apiserver/feeds/reddit.py
+++ b/apiserver/feeds/reddit.py
@@ -73,7 +73,7 @@ def story(ref):
        s['comments'] = list(filter(bool, s['comments']))
        s['num_comments'] = r.num_comments
-        if s['score'] < 25 and s['num_comments'] < 10:
+        if s['score'] < settings.REDDIT_SCORE_THRESHOLD and s['num_comments'] < settings.REDDIT_COMMENT_THRESHOLD:
            return False
        if r.selftext:
--- a/apiserver/feeds/sitemap.py
+++ b/apiserver/feeds/sitemap.py
@@ -0,0 +1,101 @@
 import logging
 logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        level=logging.DEBUG)
 if __name__ == '__main__':
    import sys
    sys.path.insert(0,'.')
 from datetime import datetime
 from bs4 import BeautifulSoup
 import settings
 from utils import clean
 from misc.time import unix
 from misc.api import xml
 from misc.news import Base
 def _get_sitemap_date(a):
    if a.find('lastmod'):
        return a.find('lastmod').text
    if a.find('news:publication_date'):
        return a.find('news:publication_date').text
    if a.find('ns2:publication_date'):
        return a.find('ns2:publication_date').text
    return ''
 def _filter_links(links, excludes=None):
    too_old = datetime.now().timestamp() - settings.MAX_STORY_AGE
    links = list(filter(None, [a if _get_sitemap_date(a) else None for a in links]))
    links = list(filter(None, [a if unix(_get_sitemap_date(a)) > too_old else None for a in links]))
    links.sort(key=lambda a: unix(_get_sitemap_date(a)), reverse=True)
    links = [x.find('loc').text for x in links] or []
    links = list(set(links))
    if excludes:
        links = list(filter(None, [None if any(e in link for e in excludes) else link for link in links]))
    return links
 def _get_sitemap(feed_url, excludes=None):
    markup = xml(lambda x: feed_url)
    if not markup: return []
    soup = BeautifulSoup(markup, features='lxml')
    links = []
    feed_urls = []
    if soup.find('sitemapindex'):
        sitemap = soup.find('sitemapindex').findAll('sitemap')
        feed_urls = list(filter(None, [a if a.find('loc') else None for a in sitemap]))
    if soup.find('urlset'):
        sitemap = soup.find('urlset').findAll('url')
        links = list(filter(None, [a if a.find('loc') else None for a in sitemap]))
    feed_urls = _filter_links(feed_urls, excludes)
    links = _filter_links(links, excludes)
    for url in feed_urls:
        links += _get_sitemap(url, excludes)
    return list(set(links))
 class Sitemap(Base):
    def __init__(self, config):
        self.config = config
        self.sitemap_url = config.get('url')
        self.tz = config.get('tz')
    def feed(self, excludes=None):
        links = []
        if isinstance(self.sitemap_url, str):
            links += _get_sitemap(self.sitemap_url, excludes)
        elif isinstance(self.sitemap_url, list):
            for url in self.sitemap_url:
                links += _get_sitemap(url, excludes)
        links = list(set(links))
        return [(self.get_id(link), link) for link in links]
 # scratchpad so I can quickly develop the parser
 if __name__ == '__main__':
    print("Sitemap: The Spinoff")
    site = Sitemap({ 'url': "https://thespinoff.co.nz/sitemap.xml" })
    excludes = [
        'thespinoff.co.nz/sitemap-misc.xml',
        'thespinoff.co.nz/sitemap-authors.xml',
        'thespinoff.co.nz/sitemap-tax-category.xml',
    ]
    posts = site.feed(excludes)
    print(posts[:5])
    print(site.story(posts[0][0], posts[0][1]))
    print("Sitemap: Newshub")
    site = Sitemap({
        'url': [
            'https://www.newshub.co.nz/home/politics.gnewssitemap.xml',
            'https://www.newshub.co.nz/home/new-zealand.gnewssitemap.xml',
            'https://www.newshub.co.nz/home/world.gnewssitemap.xml',
            'https://www.newshub.co.nz/home/money.gnewssitemap.xml',
        ],
    })
    posts = site.feed()
    print(posts[:5])
    print(site.story(posts[0][0], posts[0][1]))
--- a/apiserver/feeds/substack.py
+++ b/apiserver/feeds/substack.py
@@ -0,0 +1,174 @@
 import logging
 logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        level=logging.DEBUG)
 if __name__ == '__main__':
    import sys
    sys.path.insert(0,'.')
 import requests
 from datetime import datetime
 import settings
 from misc.time import unix
 from misc.metadata import get_icons
 from misc.api import xml, json
 from utils import clean
 SUBSTACK_REFERER = 'https://substack.com'
 SUBSTACK_API_TOP_POSTS = lambda x: "https://substack.com/api/v1/reader/top-posts"
 def author_link(author_id, base_url):
    return f"{base_url}/people/{author_id}"
 def api_comments(post_id, base_url):
    return f"{base_url}/api/v1/post/{post_id}/comments?all_comments=true&sort=best_first"
 def api_stories(x, base_url): 
    return f"{base_url}/api/v1/archive?sort=new&search=&offset=0&limit=100"
 def comment(i):
    if 'body' not in i:
        return False
    c = {}
    c['date'] = unix(i.get('date'))
    c['author'] = i.get('name', '')
    c['score'] = i.get('reactions').get('❤')
    c['text'] = clean(i.get('body', '') or '')
    c['comments'] = [comment(j) for j in i['children']]
    c['comments'] = list(filter(bool, c['comments']))
    return c
 class Publication:
    def __init__(self, domain):
        self.BASE_DOMAIN = domain
    def ref_prefix(self, ref):
        return f"{self.BASE_DOMAIN}/#id:{ref}"
    def strip_ref_prefix(self, ref):
        return ref.replace(f"{self.BASE_DOMAIN}/#id:", '')
    def feed(self):
        too_old = datetime.now().timestamp() - settings.MAX_STORY_AGE
        stories = json(lambda x: api_stories(x, self.BASE_DOMAIN), headers={'Referer': self.BASE_DOMAIN})
        if not stories: return []
        stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
        stories = list(filter(None, [i if unix(i.get('post_date')) > too_old else None for i in stories]))
        stories.sort(key=lambda a: unix(a.get('post_date')), reverse=True)
        return [self.ref_prefix(str(i.get("id"))) for i in stories or []]
    def story(self, ref):
        ref = self.strip_ref_prefix(ref)
        stories = json(lambda x: api_stories(x, self.BASE_DOMAIN), headers={'Referer': self.BASE_DOMAIN})
        if not stories: return False
        stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
        stories = list(filter(None, [i if str(i.get('id')) == ref else None for i in stories]))
        if len(stories) == 0:
            return False
        r = stories[0]
        if not r:
            return False
        s = {}
        s['author'] = ''
        s['author_link'] = ''
        s['date'] = unix(r.get('post_date'))
        s['score'] = r.get('reactions').get('❤')
        s['title'] = r.get('title', '')
        s['link'] = r.get('canonical_url', '')
        s['url'] = r.get('canonical_url', '')
        comments = json(lambda x: api_comments(x, self.BASE_DOMAIN), r.get('id'), headers={'Referer': self.BASE_DOMAIN})
        s['comments'] = [comment(i) for i in comments.get('comments')]
        s['comments'] = list(filter(bool, s['comments']))
        s['num_comments'] = r.get('comment_count', 0)
        authors = list(filter(None, [self._bylines(byline) for byline in r.get('publishedBylines')]))
        if len(authors):
            s['author'] = authors[0].get('name')
            s['author_link'] = authors[0].get('link')
        markup = xml(lambda x: s['link'])
        if markup:
            icons = get_icons(markup, url=s['link'])
            if icons:
                s['icon'] = icons[0]
        return s
    def _bylines(self, b):
        if 'id' not in b:
            return None
        a = {}
        a['name'] = b.get('name')
        a['link'] = author_link(b.get('id'), self.BASE_DOMAIN)
        return a
 class Top:
    def ref_prefix(self, base_url, ref):
        return f"{base_url}/#id:{ref}"
    def strip_ref_prefix(self, ref):
        if '/#id:' in ref:
            base_url, item = ref.split(f"/#id:")
            return item
        return ref
    def feed(self):
        too_old = datetime.now().timestamp() - settings.MAX_STORY_AGE
        stories = json(SUBSTACK_API_TOP_POSTS, headers={'Referer': SUBSTACK_REFERER})
        if not stories: return []
        stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
        stories = list(filter(None, [i if unix(i.get('post_date')) > too_old else None for i in stories]))
        stories.sort(key=lambda a: unix(a.get('post_date')), reverse=True)
        stories = [self.ref_prefix(str(i.get("pub").get("base_url")), str(i.get("id"))) for i in stories]
        return stories
    def story(self, ref):
        ref = self.strip_ref_prefix(ref)
        stories = json(SUBSTACK_API_TOP_POSTS, headers={'Referer': SUBSTACK_REFERER})
        if not stories: return False
        stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
        stories = list(filter(None, [i if str(i.get('id')) == ref else None for i in stories]))
        if len(stories) == 0:
            return False
        r = stories[0]
        if not r:
            return False
        s = {}
        pub = r.get('pub')
        base_url = pub.get('base_url')
        s['author'] = pub.get('author_name')
        s['author_link'] = author_link(pub.get('author_id'), base_url)
        s['date'] = unix(r.get('post_date'))
        s['score'] = r.get('score')
        s['title'] = r.get('title', '')
        s['link'] = r.get('canonical_url', '')
        s['url'] = r.get('canonical_url', '')
        comments = json(lambda x: api_comments(x, base_url), r.get('id'), headers={'Referer': SUBSTACK_REFERER})
        s['comments'] = [comment(i) for i in comments.get('comments')]
        s['comments'] = list(filter(bool, s['comments']))
        s['num_comments'] = r.get('comment_count', 0)
        return s
 top = Top()        
 # scratchpad so I can quickly develop the parser
 if __name__ == '__main__':
    top_posts = top.feed()
    print(top.story(top_posts[0]))
    webworm = Publication("https://www.webworm.co/")
    posts = webworm.feed()
    print(webworm.story(posts[0]))
--- a/apiserver/misc/api.py
+++ b/apiserver/misc/api.py
@@ -0,0 +1,40 @@
 import logging
 logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        level=logging.DEBUG)
 import requests
 GOOGLEBOT_USER_AGENT = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
 GOOGLEBOT_IP = '66.249.66.1'
 TIMEOUT = 30
 def xml(route, ref=None, headers=dict(), use_googlebot=True):
    try:
        if use_googlebot:
            headers['User-Agent'] = GOOGLEBOT_USER_AGENT
            headers['X-Forwarded-For'] = GOOGLEBOT_IP
        r = requests.get(route(ref), headers=headers, timeout=TIMEOUT)
        if r.status_code != 200:
            raise Exception('Bad response code ' + str(r.status_code))
        return r.text
    except KeyboardInterrupt:
        raise
    except BaseException as e:
        logging.error('Problem hitting URL: {}'.format(str(e)))
        return False
 def json(route, ref=None, headers=dict(), use_googlebot=True):
    try:
        if use_googlebot:
            headers['User-Agent'] = GOOGLEBOT_USER_AGENT
            headers['X-Forwarded-For'] = GOOGLEBOT_IP
        r = requests.get(route(ref), headers=headers, timeout=TIMEOUT)
        if r.status_code != 200:
            raise Exception('Bad response code ' + str(r.status_code))
        return r.json()
    except KeyboardInterrupt:
        raise
    except BaseException as e:
        logging.error('Problem hitting URL: {}'.format(str(e)))
        return False
--- a/apiserver/misc/icons.py
+++ b/apiserver/misc/icons.py
@@ -0,0 +1,14 @@
 from bs4 import BeautifulSoup
 def get_icons(markup):
    soup = BeautifulSoup(markup, features='html.parser')
    icon32 = soup.find_all('link', rel="icon", href=True, sizes="32x32")
    icon16 = soup.find_all('link', rel="icon", href=True, sizes="16x16")
    favicon = soup.find_all('link', rel="shortcut icon", href=True)
    others = soup.find_all('link', rel="icon", href=True)
    icons = icon32 + icon16 + favicon + others
    base_url = '/'.join(urlref.split('/')[:3])
    icons = list(set([i.get('href') for i in icons]))
    icons = [i if i.startswith('http') else base_url + i for i in icons]
    return icons
--- a/apiserver/misc/metadata.py
+++ b/apiserver/misc/metadata.py
@@ -0,0 +1,84 @@
 from bs4 import BeautifulSoup
 def get_icons(markup, url):
    soup = BeautifulSoup(markup, features='html.parser')
    icon32 = soup.find_all('link', rel="icon", href=True, sizes="32x32")
    icon16 = soup.find_all('link', rel="icon", href=True, sizes="16x16")
    favicon = soup.find_all('link', rel="shortcut icon", href=True)
    others = soup.find_all('link', rel="icon", href=True)
    icons = icon32 + icon16 + favicon + others
    base_url = '/'.join(url.split('/')[:3])
    icons = list(set([i.get('href') for i in icons]))
    icons = [i if i.startswith('http') else base_url + i for i in icons]
    return icons
 def parse_extruct(s, data):
    rdfa_keys = {
        'title': [
            'http://ogp.me/ns#title',
            'https://ogp.me/ns#title',
        ],
        'date': [
            'http://ogp.me/ns/article#modified_time',
            'https://ogp.me/ns/article#modified_time',
            'http://ogp.me/ns/article#published_time',
            'https://ogp.me/ns/article#published_time',
        ]
    }
    for rdfa in data['rdfa']:
        for key, props in rdfa.items():
            for attribute, properties in rdfa_keys.items():
                for prop in properties:
                    if prop in props:
                        for values in props[prop]:
                            s[attribute] = values['@value']
    for og in data['opengraph']:
        titles = list(filter(None, [value if 'og:title' in key else None for key, value in og['properties']]))
        modified = list(filter(None, [value if 'article:modified_time' in key else None for key, value in og['properties']]))
        published = list(filter(None, [value if 'article:published_time' in key else None for key, value in og['properties']]))
        if len(modified):
            s['date'] = modified[0]
        if len(published):
            s['date'] = published[0]
        if len(titles):
            s['title'] = titles[0]
    for md in data['microdata']:
        if md['type'] in ['https://schema.org/NewsArticle', 'http://schema.org/NewsArticle']:
            props = md['properties']
            s['title'] = props['headline']
            if props['dateModified']:
                s['date'] = props['dateModified']
            if props['datePublished']:
                s['date'] = props['datePublished']
            if 'author' in props and props['author']:
                if 'properties' in props['author']:
                    s['author'] = props['author']['properties']['name']
                elif isinstance(props['author'], list):
                    s['author'] = props['author'][0]['properties']['name']
    for ld in data['json-ld']:
        if '@type' in ld and ld['@type'] in ['Article', 'NewsArticle']:
            s['title'] = ld['headline']
            if ld['dateModified']:
                s['date'] = ld['dateModified']
            if ld['datePublished']:
                s['date'] = ld['datePublished']
            if 'author' in ld and ld['author']:
                if 'name' in ld['author']:
                    s['author'] = ld['author']['name']
                elif isinstance(ld['author'], list):
                    s['author'] = ld['author'][0]['name']
        if '@graph' in ld:
            for gld in ld['@graph']:
                if '@type' in gld and gld['@type'] in ['Article', 'NewsArticle']:
                    s['title'] = gld['headline']
                    if gld['dateModified']:
                        s['date'] = gld['dateModified']
                    if gld['datePublished']:
                        s['date'] = gld['datePublished']
    return s
--- a/apiserver/misc/news.py
+++ b/apiserver/misc/news.py
@@ -0,0 +1,98 @@
 import logging
 logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        level=logging.DEBUG)
 import re
 import requests
 from bs4 import BeautifulSoup
 from scrapers import declutter
 import extruct
 import settings
 from utils import clean
 from misc.metadata import parse_extruct, get_icons
 from misc.time import unix
 from misc.api import xml
 import misc.stuff as stuff
 def comment(i):
    if 'author' not in i:
        return False
    c = {}
    c['author'] = i.get('author', '')
    c['score'] = i.get('points', 0)
    c['date'] = unix(i.get('date', 0))
    c['text'] = clean(i.get('text', '') or '')
    c['comments'] = [comment(j) for j in i['children']]
    c['comments'] = list(filter(bool, c['comments']))
    return c
 def comment_count(i):
    alive = 1 if i['author'] else 0
    return sum([comment_count(c) for c in i['comments']]) + alive
 class Base:
    def __init__(config):
        self.config = config
        self.url = config.get('url')
        self.tz = config.get('tz')
    def get_id(self, link):
        patterns = self.config.get('patterns')
        if not patterns:
            return link
        patterns = [re.compile(p) for p in patterns]
        patterns = list(filter(None, [p.match(link) for p in patterns]))
        patterns = list(set([':'.join(p.groups()) for p in patterns]))
        if not patterns:
            return link
        return patterns[0]
    def feed(self, excludes=None):
        return []
    def story(self, ref, urlref):
        if urlref is None:
            return False
        markup = xml(lambda x: urlref)
        if not markup:
            return False
        s = {}
        s['author_link'] = ''
        s['score'] = 0
        s['comments'] = []
        s['num_comments'] = 0
        s['link'] = urlref
        s['url'] = urlref
        s['date'] = 0
        icons = get_icons(markup, url=urlref)
        if icons:
            s['icon'] = icons[0]
        data = extruct.extract(markup)
        s = parse_extruct(s, data)
        if s['date']:
            s['date'] = unix(s['date'], tz=self.tz)
        if 'disqus' in markup:
            try:
                s['comments'] = declutter.get_comments(urlref)
                s['comments'] = list(filter(bool, s['comments']))
                s['num_comments'] = comment_count(s['comments'])
            except KeyboardInterrupt:
                raise
            except:
                pass
        if urlref.startswith('https://www.stuff.co.nz'):
            s['comments'] = stuff.get_comments(urlref)
            s['comments'] = list(filter(bool, s['comments']))
            s['num_comments'] = len(s['comments'])
        if not s['date']:
            return False
        return s
--- a/apiserver/misc/stuff.py
+++ b/apiserver/misc/stuff.py
@@ -0,0 +1,64 @@
 import re
 from bs4 import BeautifulSoup
 if __name__ == '__main__':
    import sys
    sys.path.insert(0,'.')
 from misc.time import unix
 from misc.api import xml
 def _soup_get_text(soup):
    if not soup: return None
    if soup.text: return soup.text
    s = soup.find(text=lambda tag: isinstance(tag, bs4.CData))
    if s and s.string: return s.string.strip()
    return None
 def _parse_comment(soup):
    c = {
        'author': '',
        'authorLink': '',
        'score': 0,
        'date': 0,
        'text': '',
        'comments': [],
    }
    if soup.find('link'):
        title = _soup_get_text(soup.find('link'))
        if title and 'By:' in title:
            c['author'] = title.strip('By:').strip()
    if soup.find('dc:creator'):
        c['author'] = _soup_get_text(soup.find('dc:creator'))
    if soup.find('link'):
        c['authorLink'] = _soup_get_text(soup.find('link'))
    if soup.find('description'):
        c['text'] = _soup_get_text(soup.find('description'))
    if soup.find('pubdate'):
        c['date'] = unix(soup.find('pubdate').text)
    elif soup.find('pubDate'):
        c['date'] = unix(soup.find('pubDate').text)
    return c
 def get_comments(url):
    regex = r"https:\/\/www\.stuff\.co\.nz\/(.*\/\d+)/[^\/]+"
    p = re.compile(regex).match(url)
    path = p.groups()[0]
    comment_url = f'https://comments.us1.gigya.com/comments/rss/6201101/Stuff/stuff/{path}'
    markup = xml(lambda x: comment_url)
    if not markup: return []
    soup = BeautifulSoup(markup, features='html.parser')
    comments = soup.find_all('item')
    if not comments: return []
    comments = [_parse_comment(c) for c in comments]
    return comments
 # scratchpad so I can quickly develop the parser
 if __name__ == '__main__':
    comments = get_comments('https://www.stuff.co.nz/life-style/homed/houses/123418468/dear-jacinda-we-need-to-talk-about-housing')
    print(len(comments))
    print(comments[:5])
--- a/apiserver/misc/time.py
+++ b/apiserver/misc/time.py
@@ -0,0 +1,18 @@
 import pytz
 import dateutil.parser
 TZINFOS = {
    'NZDT': pytz.timezone('Pacific/Auckland'),
    'NZST': pytz.timezone('Pacific/Auckland')
 }
 def unix(date_str, tz=None, tzinfos=TZINFOS):
    try:
        dt = dateutil.parser.parse(date_str, tzinfos=tzinfos)
        if tz:
            dt = pytz.timezone(tz).localize(dt)
        return int(dt.timestamp())
    except:
        pass
    return 0
--- a/apiserver/requirements.txt
+++ b/apiserver/requirements.txt
@@ -4,6 +4,7 @@ certifi==2020.6.20
 chardet==3.0.4
 click==7.1.2
 commonmark==0.9.1
 extruct==0.10.0
 Flask==1.1.2
 Flask-Cors==3.0.8
 gevent==20.6.2
@@ -11,11 +12,13 @@ greenlet==0.4.16
 idna==2.10
 itsdangerous==1.1.0
 Jinja2==2.11.2
 lxml==4.6.1
 MarkupSafe==1.1.1
 packaging==20.4
 praw==6.4.0
 prawcore==1.4.0
 pyparsing==2.4.7
 pytz==2020.4
 requests==2.24.0
 six==1.15.0
 soupsieve==2.0.1
@@ -27,3 +30,4 @@ websocket-client==0.57.0
 Werkzeug==1.0.1
 zope.event==4.4
 zope.interface==5.1.0
 python-dateutil==2.8.1
--- a/apiserver/scrapers/declutter.py
+++ b/apiserver/scrapers/declutter.py
@@ -0,0 +1,41 @@
 import logging
 logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        level=logging.DEBUG)
 import requests
 DECLUTTER_API = 'https://declutter.1j.nz/headless/details'
 DECLUTTER_COMMENT_API = 'https://declutter.1j.nz/headless/comments'
 TIMEOUT = 90
 def get_html(url):
    logging.info(f"Declutter Scraper: {url}")
    details = get_details(url)
    if not details:
        return ''
    return details['content']
 def get_details(url):
    try:
        r = requests.post(DECLUTTER_API, data=dict(url=url), timeout=TIMEOUT)
        if r.status_code != 200:
            raise Exception('Bad response code ' + str(r.status_code))
        return r.json()
    except KeyboardInterrupt:
        raise
    except BaseException as e:
        logging.error('Problem decluttering article: {}'.format(str(e)))
        return None
 def get_comments(url):
    try:
        r = requests.post(DECLUTTER_COMMENT_API, data=dict(url=url), timeout=TIMEOUT)
        if r.status_code != 200:
            raise Exception('Bad response code ' + str(r.status_code))
        return r.json()
    except KeyboardInterrupt:
        raise
    except BaseException as e:
        logging.error('Problem getting comments for article: {}'.format(str(e)))
        return None
--- a/apiserver/scrapers/headless.py
+++ b/apiserver/scrapers/headless.py
@@ -0,0 +1,41 @@
 import logging
 logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        level=logging.DEBUG)
 import requests
 from settings import HEADLESS_READER_PORT
 READ_API = 'http://127.0.0.1:{}/headless/details'.format(HEADLESS_READER_PORT or 33843)
 READ_COMMENT__API = 'http://127.0.0.1:{}/headless/comments'.format(HEADLESS_READER_PORT or 33843)
 TIMEOUT = 90
 def get_html(url):
    logging.info(f"Headless Scraper: {url}")
    details = get_details(url)
    if not details:
        return ''
    return details['content']
 def get_details(url):
    try:
        r = requests.post(READ_API, data=dict(url=url), timeout=TIMEOUT)
        if r.status_code != 200:
            raise Exception('Bad response code ' + str(r.status_code))
        return r.json()
    except KeyboardInterrupt:
        raise
    except BaseException as e:
        logging.error('Problem scraping article: {}'.format(str(e)))
        return None
 def get_comments(url):
    try:
        r = requests.post(READ_COMMENT_API, data=dict(url=url), timeout=TIMEOUT)
        if r.status_code != 200:
            raise Exception('Bad response code ' + str(r.status_code))
        return r.json()
    except KeyboardInterrupt:
        raise
    except BaseException as e:
        logging.error('Problem getting comments for article: {}'.format(str(e)))
        return None
--- a/apiserver/scrapers/outline.py
+++ b/apiserver/scrapers/outline.py
@@ -0,0 +1,37 @@
 import logging
 logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        level=logging.DEBUG)
 import requests
 OUTLINE_REFERER = 'https://outline.com/'
 OUTLINE_API = 'https://api.outline.com/v3/parse_article'
 TIMEOUT = 20
 def get_html(url):
    details = get_details(url)
    if not details:
        return ''
    return details['html']
 def get_details(url):
    try:
        logging.info(f"Outline Scraper: {url}")
        params = {'source_url': url}
        headers = {'Referer': OUTLINE_REFERER}
        r = requests.get(OUTLINE_API, params=params, headers=headers, timeout=TIMEOUT)
        if r.status_code == 429:
            logging.info('Rate limited by outline, sleeping 30s and skipping...')
            time.sleep(30)
            return None
        if r.status_code != 200:
            raise Exception('Bad response code ' + str(r.status_code))
        data = r.json()['data']
        if 'URL is not supported by Outline' in data['html']:
            raise Exception('URL not supported by Outline')
        return data
    except KeyboardInterrupt:
        raise
    except BaseException as e:
        logging.error('Problem outlining article: {}'.format(str(e)))
        return None
--- a/apiserver/scrapers/simple.py
+++ b/apiserver/scrapers/simple.py
@@ -0,0 +1,28 @@
 import logging
 logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        level=logging.DEBUG)
 import requests
 from settings import SIMPLE_READER_PORT
 READ_API = 'http://127.0.0.1:{}/simple/details'.format(SIMPLE_READER_PORT or 33843)
 TIMEOUT = 20
 def get_html(url):
    logging.info(f"Simple Scraper: {url}")
    details = get_details(url)
    if not details:
        return ''
    return details['content']
 def get_details(url):
    try:
        r = requests.post(READ_API, data=dict(url=url), timeout=TIMEOUT)
        if r.status_code != 200:
            raise Exception('Bad response code ' + str(r.status_code))
        return r.json()
    except KeyboardInterrupt:
        raise
    except BaseException as e:
        logging.error('Problem getting article: {}'.format(str(e)))
        return None
--- a/apiserver/search.py
+++ b/apiserver/search.py
@@ -35,14 +35,11 @@ def update_rankings():
 def update_attributes():
    try:
-        json = ['title', 'url', 'author', 'link', 'id']
+        json = ['title', 'url', 'author', 'link', 'id', 'source']
        r = requests.post(MEILI_URL + 'indexes/qotnews/settings/searchable-attributes', json=json, timeout=2)
        if r.status_code != 202:
            raise Exception('Bad response code ' + str(r.status_code))
-        return r.json()
+        requests.delete(MEILI_URL + 'indexes/qotnews/settings/displayed-attributes', timeout=2)
        r = requests.delete(MEILI_URL + 'indexes/qotnews/settings/displayed-attributes', timeout=2)
        if r.status_code != 202:
            raise Exception('Bad response code ' + str(r.status_code))
        return r.json()
    except KeyboardInterrupt:
        raise
--- a/apiserver/server.py
+++ b/apiserver/server.py
@@ -15,6 +15,7 @@ import traceback
 import time
 from urllib.parse import urlparse, parse_qs
 import settings
 import database
 import search
 import feed
@@ -27,9 +28,6 @@ from flask_cors import CORS
 database.init()
 search.init()
 FEED_LENGTH = 75
 news_index = 0
 def new_id():
    nid = gen_rand_id()
    while database.get_story(nid):
@@ -42,9 +40,8 @@ cors = CORS(flask_app)
@flask_app.route('/api')
 def api():
-    stories = database.get_stories(FEED_LENGTH)
+    stories = database.get_stories(settings.MAX_STORY_AGE)
-    # hacky nested json
+    res = Response(json.dumps({"stories": stories}))
    res = Response('{"stories":[' + ','.join(stories) + ']}')
    res.headers['content-type'] = 'application/json'
    return res
@@ -73,7 +70,7 @@ def submit():
        elif 'reddit.com' in parse.hostname and 'comments' in url:
            source = 'reddit'
            ref = parse.path.split('/')[4]
-        elif 'news.t0.vc' in parse.hostname:
+        elif settings.HOSTNAME in parse.hostname:
            raise Exception('Invalid article')
        else:
            source = 'manual'
@@ -102,8 +99,11 @@ def submit():
 def story(sid):
    story = database.get_story(sid)
    if story:
-        # hacky nested json
+        related = []
-        res = Response('{"story":' + story.full_json + '}')
+        if story.meta['url']:
            related = database.get_stories_by_url(story.meta['url'])
            related = [r.meta for r in related]
        res = Response(json.dumps({"story": story.data, "related": related}))
        res.headers['content-type'] = 'application/json'
        return res
    else:
@@ -114,7 +114,7 @@ def story(sid):
 def index():
    return render_template('index.html',
            title='Feed',
-            url='news.t0.vc',
+            url=settings.HOSTNAME,
            description='Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode')
@flask_app.route('/<sid>', strict_slashes=False)
@@ -127,7 +127,7 @@ def static_story(sid):
    story = database.get_story(sid)
    if not story: return abort(404)
-    story = json.loads(story.full_json)
+    story = story.data
    score = story['score']
    num_comments = story['num_comments']
@@ -144,54 +144,55 @@ def static_story(sid):
            url=url,
            description=description)
-http_server = WSGIServer(('', 33842), flask_app)
+http_server = WSGIServer(('', settings.API_PORT or 33842), flask_app)
 def _add_new_refs():
    for ref, source, urlref in feed.get_list():
        if database.get_story_by_ref(ref):
            continue
        try:
            nid = new_id()
            database.put_ref(ref, nid, source, urlref)
            logging.info('Added ref ' + ref)
        except database.IntegrityError:
            logging.info('Unable to add ref ' + ref)
            continue
 def _update_current_story(item):
    try:
        story = database.get_story(item['sid']).data
    except AttributeError:
        story = dict(id=item['sid'], ref=item['ref'], source=item['source'])
    logging.info('Updating story: {}'.format(str(story['ref'])))
    valid = feed.update_story(story, urlref=item['urlref'])
    if valid:
        try:
            database.put_story(story)
            search.put_story(story)
        except database.IntegrityError:
            logging.info('Unable to add story with ref ' + ref)
    else:
        database.del_ref(item['ref'])
        logging.info('Removed ref {}'.format(item['ref']))
 def feed_thread():
-    global news_index
+    ref_list = []
    try:
        while True:
            # onboard new stories
-            if news_index == 0:
+            if not len(ref_list):
-                for ref, source in feed.list():
+                _add_new_refs()
-                    if database.get_story_by_ref(ref):
+                ref_list = database.get_reflist()
                        continue
                    try:
                        nid = new_id()
                        database.put_ref(ref, nid, source)
                        logging.info('Added ref ' + ref)
                    except database.IntegrityError:
                        continue
            ref_list = database.get_reflist(FEED_LENGTH)
            # update current stories
-            if news_index < len(ref_list):
+            if len(ref_list):
-                item = ref_list[news_index]
+                item = ref_list.pop(0)
-
+                _update_current_story(item)
                try:
                    story_json = database.get_story(item['sid']).full_json
                    story = json.loads(story_json)
                except AttributeError:
                    story = dict(id=item['sid'], ref=item['ref'], source=item['source'])
                logging.info('Updating story: ' + str(story['ref']) + ', index: ' + str(news_index))
                valid = feed.update_story(story)
                if valid:
                    database.put_story(story)
                    search.put_story(story)
                else:
                    database.del_ref(item['ref'])
                    logging.info('Removed ref {}'.format(item['ref']))
            else:
                logging.info('Skipping index: ' + str(news_index))
            gevent.sleep(6)
            news_index += 1
            if news_index == FEED_LENGTH: news_index = 0
    except KeyboardInterrupt:
        logging.info('Ending feed thread...')
    except ValueError as e:
--- a/apiserver/settings.py.example
+++ b/apiserver/settings.py.example
@@ -1,12 +1,60 @@
 # QotNews settings
 # edit this file and save it as settings.py
 HOSTNAME = 'news.t0.vc'
 MAX_STORY_AGE = 3*24*60*60
 SCRAPERS = ['headless', 'outline', 'declutter', 'simple']
 API_PORT = 33842
 SIMPLE_READER_PORT = 33843
 HEADLESS_READER_PORT = 33843
 # Feed Lengths
 # Number of top items from each site to pull
 # set to 0 to disable that site
 NUM_HACKERNEWS = 15
 NUM_REDDIT = 10
 NUM_TILDES = 5
 NUM_SUBSTACK = 10
 SITEMAP = {}
 # SITEMAP['nzherald'] = {
 #     'url': "https://www.nzherald.co.nz/arcio/news-sitemap/",
 #     'count': 20,
 #     'patterns': [
 #         r'^https:\/\/www\.(nzherald\.co\.nz)\/.*\/([^/]+)\/?$',
 #     ],
 #     'excludes': [
 #         'driven.co.nz',
 #         'oneroof.co.nz',
 #         'nzherald.co.nz/sponsored-stories',
 #         'nzherald.co.nz/entertainment/',
 #         'nzherald.co.nz/lifestyle/',
 #         'nzherald.co.nz/travel/',
 #         'nzherald.co.nz/sport/',
 #         'nzherald.co.nz/promotions/',
 #         'nzherald.co.nzhttp',
 #         'herald-afternoon-quiz',
 #         'herald-morning-quiz'
 #     ],
 # }
 SUBSTACK = {}
 # SUBSTACK['webworm'] = { 'url': "https://www.webworm.co", 'count': 10},
 # SUBSTACK['the bulletin'] = { 'url': "https://thespinoff.substack.com", 'count': 10},
 CATEGORY = {}
 # CATEGORY['radionz'] = {
 #     'url': "https://www.rnz.co.nz/news/",
 #     'count': 20,
 #     'patterns': [
 #         r'https:\/\/www\.(rnz\.co\.nz)\/news\/[^\/]+\/(\d+)\/[^\/]+\/?'
 #     ],
 #     'excludes': [
 #         'rnz.co.nz/news/sport',
 #         'rnz.co.nz/weather',
 #     ],
 # }
 # Reddit account info
 # leave blank if not using Reddit
@@ -14,6 +62,10 @@ REDDIT_CLIENT_ID = ''
 REDDIT_CLIENT_SECRET = ''
 REDDIT_USER_AGENT = ''
 # Minimum points or number of comments before including a thread:
 REDDIT_COMMENT_THRESHOLD = 10
 REDDIT_SCORE_THRESHOLD = 25
 SUBREDDITS = [
    'Economics',
    'AcademicPhilosophy',
--- a/apiserver/update-story.py
+++ b/apiserver/update-story.py
@@ -0,0 +1,48 @@
 import logging
 logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        level=logging.INFO)
 import sys
 import json
 import requests
 import database
 import feed
 import search
 database.init()
 search.init()
 def _update_current_story(story, item):
    logging.info('Updating story: {}'.format(str(story['ref'])))
    if story.get('url', ''):
        story['text'] = ''
    valid = feed.update_story(story, urlref=item['urlref'])
    if valid:
        database.put_story(story)
        search.put_story(story)
    else:
        database.del_ref(item['ref'])
        logging.info('Removed ref {}'.format(item['ref']))
 if __name__ == '__main__':
    if len(sys.argv) == 2:
        sid = sys.argv[1]
    else:
        print('Usage: python delete-story.py [story id]')
        exit(1)
    item = database.get_ref_by_sid(sid)
    if item:
        story = database.get_story(item['sid']).data
        if story:
            print('Updating story:')
            _update_current_story(story, item)
        else:
            print('Story not found. Exiting.')
    else:
        print('Story not found. Exiting.')
--- a/apiserver/utils.py
+++ b/apiserver/utils.py
@@ -9,7 +9,7 @@ import string
 from bleach.sanitizer import Cleaner
 def gen_rand_id():
-    return ''.join(random.choice(string.ascii_uppercase) for _ in range(4))
+    return ''.join(random.choice(string.ascii_uppercase) for _ in range(5))
 def render_md(md):
    if md:
--- a/1
+++ b/1
--- a/readerserver/.gitignore
+++ b/readerserver/.gitignore
@@ -1,92 +0,0 @@
 # Logs
 logs
 *.log
 npm-debug.log*
 yarn-debug.log*
 yarn-error.log*
 lerna-debug.log*
 # Diagnostic reports (https://nodejs.org/api/report.html)
 report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
 # Runtime data
 pids
 *.pid
 *.seed
 *.pid.lock
 # Directory for instrumented libs generated by jscoverage/JSCover
 lib-cov
 # Coverage directory used by tools like istanbul
 coverage
 *.lcov
 # nyc test coverage
 .nyc_output
 # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
 .grunt
 # Bower dependency directory (https://bower.io/)
 bower_components
 # node-waf configuration
 .lock-wscript
 # Compiled binary addons (https://nodejs.org/api/addons.html)
 build/Release
 # Dependency directories
 node_modules/
 jspm_packages/
 # TypeScript v1 declaration files
 typings/
 # TypeScript cache
 *.tsbuildinfo
 # Optional npm cache directory
 .npm
 # Optional eslint cache
 .eslintcache
 # Optional REPL history
 .node_repl_history
 # Output of 'npm pack'
 *.tgz
 # Yarn Integrity file
 .yarn-integrity
 # dotenv environment variables file
 .env
 .env.test
 # parcel-bundler cache (https://parceljs.org/)
 .cache
 # next.js build output
 .next
 # nuxt.js build output
 .nuxt
 # vuepress build output
 .vuepress/dist
 # Serverless directories
 .serverless/
 # FuseBox cache
 .fusebox/
 # DynamoDB Local files
 .dynamodb/
 # Editor
 *.swp
 *.swo
--- a/readerserver/main.js
+++ b/readerserver/main.js
@@ -1,53 +0,0 @@
 const express = require('express');
 const app = express();
 const port = 33843;
 const request = require('request');
 const JSDOM = require('jsdom').JSDOM;
 const { Readability } = require('readability');
 app.use(express.urlencoded({ extended: true }));
 app.get('/', (req, res) => {
 	res.send('<form method="POST" accept-charset="UTF-8"><input name="url"><button type="submit">SUBMIT</button></form>');
 });
 const requestCallback = (url, res) => (error, response, body) => {
 	if (!error && response.statusCode == 200) {
 		console.log('Response OK.');
 		const doc = new JSDOM(body, {url: url});
 		const reader = new Readability(doc.window.document);
 		const article = reader.parse();
 		if (article && article.content) {
 			res.send(article.content);
 		} else {
 			res.sendStatus(404);
 		}
 	} else {
 		console.log('Response error:', error ? error.toString() : response.statusCode);
 		res.sendStatus(response ? response.statusCode : 404);
 	}
 };
 app.post('/', (req, res) => {
 	const url = req.body.url;
 	const requestOptions = {
 		url: url,
 		//headers: {'User-Agent': 'Googlebot/2.1 (+http://www.google.com/bot.html)'},
 		//headers: {'User-Agent': 'Twitterbot/1.0'},
 		headers: {
 			'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0',
 			'X-Forwarded-For': '66.249.66.1',
 		},
 	};
 	console.log('Parse request for:', url);
 	request(requestOptions, requestCallback(url, res));
 });
 app.listen(port, () => {
 	console.log(`Example app listening on port ${port}!`);
 });
--- a/readerserver/package.json
+++ b/readerserver/package.json
@@ -1,13 +0,0 @@
 {
  "name": "readerserver",
  "version": "1.0.0",
  "main": "main.js",
  "license": "MIT",
  "dependencies": {
    "dompurify": "^1.0.11",
    "express": "^4.17.1",
    "jsdom": "^15.1.1",
    "readability": "https://github.com/mozilla/readability",
    "request": "^2.88.0"
  }
 }
--- a/readerserver/yarn.lock
+++ b/readerserver/yarn.lock
--- a/webclient/src/App.js
+++ b/webclient/src/App.js
@@ -5,13 +5,14 @@ import './Style-light.css';
 import './Style-dark.css';
 import './fonts/Fonts.css';
 import { ForwardDot } from './utils.js';
 import Feed from './Feed.js';
 import Article from './Article.js';
 import Comments from './Comments.js';
 import Search from './Search.js';
 import Submit from './Submit.js';
 import Results from './Results.js';
 import ScrollToTop from './ScrollToTop.js';
 import Feed from './pages/Feed.js';
 import Article from './pages/Article.js';
 import Comments from './pages/Comments.js';
 import Results from './pages/Results.js';
 class App extends React.Component {
 	constructor(props) {
@@ -70,7 +71,7 @@ class App extends React.Component {
 						<Route path='/search' component={Results} />
 						<Route path='/:id' exact render={(props) => <Article {...props} cache={this.cache} />} />
 					</Switch>
-					<Route path='/:id/c' exact render={(props) => <Comments {...props} cache={this.cache} />} />
+					<Route path='/:id/c' exact render={(props) => <Comments {...props} cache={this.cache} key={props.match.params.id} />} />
 					<ForwardDot />
--- a/webclient/src/Style-light.css
+++ b/webclient/src/Style-light.css
@@ -1,225 +1,231 @@
 body {
-	text-rendering: optimizeLegibility;
+  text-rendering: optimizeLegibility;
-	font: 1rem/1.3 sans-serif;
+  font: 1rem/1.3 sans-serif;
-	color: #000000;
+  color: #000000;
-	margin-bottom: 100vh;
+  margin-bottom: 100vh;
-	word-break: break-word;
+  word-break: break-word;
-	font-kerning: normal;
+  font-kerning: normal;
 }
 a {
-	color: #000000;
+  color: #000000;
-	text-decoration: none;
+  text-decoration: none;
-	outline: none;
+  outline: none;
 }
 input {
-	font-size: 1.05rem;
+  font-size: 1.05rem;
-	background-color: transparent;
+  background-color: transparent;
-	border: 1px solid #828282;
+  border: 1px solid #828282;
-	margin: 0.25rem;
+  margin: 0.25rem;
-	padding: 6px;
+  padding: 6px;
-	border-radius: 4px;
+  border-radius: 4px;
 }
 pre {
-	overflow: auto;
+  overflow: auto;
 }
 .container {
-	margin: 1rem auto;
+  margin: 1rem auto;
-	max-width: 64rem;
+  max-width: 64rem;
 }
 .menu {
-	font-size: 1.1rem;
+  font-size: 1.1rem;
-	padding: 0 1rem;
+  padding: 0 1rem;
 }
 .slogan {
-	color: #828282;
+  color: #828282;
 }
 .theme {
-	float: right;
+  float: right;
 }
 .item {
-	display: table;
+  display: table;
-	color: #828282;
+  color: #828282;
-	margin-bottom: 0.7rem;
+  margin-bottom: 0.7rem;
 }
 .item .source-logo {
-	width: 0.9rem;
+  width: 0.9rem;
-	height: 0.9rem;
+  height: 0.9rem;
 }
 .item a {
-	color: #828282;
+  color: #828282;
 }
 .item a:hover {
-	text-decoration: underline;
+  text-decoration: underline;
 }
 .item a.link {
-	font-size: 1.1rem;
+  font-size: 1.1rem;
-	color: #000000;
+  color: #000000;
 }
 .item a.link:visited {
-	color: #828282;
+  color: #828282;
 }
 .item a.link:hover {
-	text-decoration: none;
+  text-decoration: none;
 }
 span.source {
-	margin-left: 0.4rem;
+  margin-left: 0.4rem;
 }
 .item .info a.hot {
-	color: #444444;
+  color: #444444;
 }
 .article {
-	padding-bottom: 3rem;
+  padding-bottom: 3rem;
 }
 .article-container {
-	margin: 1rem auto;
+  margin: 1rem auto;
-	max-width: 38rem;
+  max-width: 38rem;
 }
 .article a {
-	border-bottom: 1px solid #222222;
+  border-bottom: 1px solid #222222;
 }
 .article h1 {
-	font-size: 1.6rem;
+  font-size: 1.6rem;
 }
 .article h2 {
-	font-size: 1.4rem;
+  font-size: 1.4rem;
 }
-.article h3, .article h4 {
+.article h3,
-	font-size: 1.3rem;
+.article h4 {
  font-size: 1.3rem;
 }
 .article img {
-	max-width: 100%;
+  max-width: 100%;
-	height: auto;
+  height: auto;
 }
-.article figure, .article video {
+.article figure,
-	width: 100%;
+.article video {
-	height: auto;
+  width: 100%;
-	margin: 0;
+  height: auto;
  margin: 0;
 }
 .article table {
-	width: 100%;
+  width: 100%;
-	table-layout: fixed;
+  table-layout: fixed;
 }
 .article iframe {
-	display: none;
+  display: none;
 }
 .article u {
-	border-bottom: 1px solid #222;
+  border-bottom: 1px solid #222;
-	text-decoration: none;
+  text-decoration: none;
 }
 .article .info {
-	color: #828282;
+  color: #828282;
 }
 .article .info a {
-	border-bottom: none;
+  border-bottom: none;
-	color: #828282;
+  color: #828282;
 }
 .article .info a:hover {
-	text-decoration: underline;
+  text-decoration: underline;
 }
 .story-text {
-	font: 1.2rem/1.5 'Apparatus SIL', sans-serif;
+  font: 1.2rem/1.5 "Apparatus SIL", sans-serif;
-	margin-top: 1em;
+  margin-top: 1em;
 }
 .comments {
-	margin-left: -1.25rem;
+  margin-left: -1.25rem;
 }
 .comment {
-	padding-left: 1.25rem;
+  padding-left: 1.25rem;
 }
 .comment.lined {
-	border-left: 1px solid #cccccc;
+  border-left: 1px solid #cccccc;
 }
 .comment .text {
-	margin-top: -0.5rem;
+  margin-top: -0.5rem;
 }
 .comment .text.hidden > p {
-	white-space: nowrap;
+  white-space: nowrap;
-	overflow: hidden;
+  overflow: hidden;
-	text-overflow: ellipsis;
+  text-overflow: ellipsis;
-	display: none;
+  display: none;
-	color: #828282;
+  color: #828282;
 }
 .comment .text.hidden > p:first-child {
-	display: block;
+  display: block;
 }
 .comment .collapser {
-	padding-left: 0.5rem;
+  padding-left: 0.5rem;
-	padding-right: 1.5rem;
+  padding-right: 1.5rem;
 }
 .comment .pointer {
-	cursor: pointer;
+  cursor: pointer;
 }
 .toggleDot {
-	position: fixed;
+  position: fixed;
-	bottom: 1rem;
+  bottom: 1rem;
-	left: 1rem;
+  left: 1rem;
-	height: 3rem;
+  height: 3rem;
-	width: 3rem;
+  width: 3rem;
-	background-color: #828282;
+  background-color: #828282;
-	border-radius: 50%;
+  border-radius: 50%;
 }
 .toggleDot .button {
-	font: 2rem/1 'icomoon';
+  font: 2rem/1 "icomoon";
-	position: relative;
+  position: relative;
-	top: 0.5rem;
+  top: 0.5rem;
-	left: 0.55rem;
+  left: 0.55rem;
 }
 .forwardDot {
-	cursor: pointer;
+  cursor: pointer;
-	position: fixed;
+  position: fixed;
-	bottom: 1rem;
+  bottom: 1rem;
-	right: 1rem;
+  right: 1rem;
-	height: 3rem;
+  height: 3rem;
-	width: 3rem;
+  width: 3rem;
-	background-color: #828282;
+  background-color: #828282;
-	border-radius: 50%;
+  border-radius: 50%;
 }
 .forwardDot .button {
-	font: 2.5rem/1 'icomoon';
+  font: 2.5rem/1 "icomoon";
-	position: relative;
+  position: relative;
-	top: 0.25rem;
+  top: 0.25rem;
-	left: 0.3rem;
+  left: 0.3rem;
 }
 .search form {
-	display: inline;
+  display: inline;
 }
 .indented {
  padding: 0 0 0 1rem;
 }
--- a/webclient/src/components/StoryItem.js
+++ b/webclient/src/components/StoryItem.js
@@ -0,0 +1,34 @@
 import React from "react";
 import { Link } from "react-router-dom";
 import { sourceLink, infoLine, getLogoUrl } from "../utils.js";
 export class StoryItem extends React.Component {
 	constructor(props) {
 		super(props);
 	}
 	render() {
 		const story = this.props.story;
 		const { id, title } = story;
 		return (
 			<div className="item" key={id}>
 				<div className="title">
 					<Link className="link" to={"/" + id}>
 						<img
 							className="source-logo"
 							src={getLogoUrl(story)}
 							alt="source logo"
 						/>
 						{" "}
 						{title}
 					</Link>
 					<span className="source">({sourceLink(story)})</span>
 				</div>
 				{infoLine(story)}
 			</div>
 		);
 	}
 }
--- a/webclient/src/pages/Article.js
+++ b/webclient/src/pages/Article.js
@@ -1,7 +1,7 @@
 import React from 'react';
 import { Helmet } from 'react-helmet';
 import localForage from 'localforage';
-import { sourceLink, infoLine, ToggleDot } from './utils.js';
+import { sourceLink, infoLine, otherDiscussions, ToggleDot } from '../utils.js';
 class Article extends React.Component {
 	constructor(props) {
@@ -14,29 +14,25 @@ class Article extends React.Component {
 		this.state = {
 			story: cache[id] || false,
 			related: [],
 			error: false,
 			pConv: [],
 		};
 	}
-	
+
 	componentDidMount() {
 		const id = this.props.match ? this.props.match.params.id : 'CLOL';
-		localForage.getItem(id)
+		localForage.getItem(id).then((value) => value ? this.setState({ story: value }) : null);
-			.then(
+		localForage.getItem(`related-${id}`).then((value) => value ? this.setState({ related: value }) : null);
 				(value) => {
 					if (value) {
 						this.setState({ story: value });
 					}
 				}
 			);
 		fetch('/api/' + id)
 			.then(res => res.json())
 			.then(
 				(result) => {
-					this.setState({ story: result.story });
+					this.setState({ story: result.story, related: result.related });
 					localForage.setItem(id, result.story);
 					localForage.setItem(`related-${id}`, result.related);
 				},
 				(error) => {
 					this.setState({ error: true });
@@ -45,12 +41,13 @@ class Article extends React.Component {
 	}
 	pConvert = (n) => {
-		this.setState({ pConv: [...this.state.pConv, n]});
+		this.setState({ pConv: [...this.state.pConv, n] });
 	}
 	render() {
 		const id = this.props.match ? this.props.match.params.id : 'CLOL';
 		const story = this.state.story;
 		const related = this.state.related.filter(r => r.id != id);
 		const error = this.state.error;
 		const pConv = this.state.pConv;
 		let nodes = null;
@@ -77,6 +74,7 @@ class Article extends React.Component {
 						</div>
 						{infoLine(story)}
 						{otherDiscussions(related)}
 						{nodes ?
 							<div className='story-text'>
@@ -85,10 +83,10 @@ class Article extends React.Component {
 										v.innerHTML.split('\n\n').map(x =>
 											<p dangerouslySetInnerHTML={{ __html: x }} />
 										)
-									:
+										:
 										(v.nodeName === '#text' ?
 											<p>{v.data}</p>
-										:
+											:
 											<>
 												<v.localName dangerouslySetInnerHTML={v.innerHTML ? { __html: v.innerHTML } : null} />
 												{v.localName == 'pre' && <button onClick={() => this.pConvert(k)}>Convert Code to Paragraph</button>}
@@ -96,11 +94,11 @@ class Article extends React.Component {
 										)
 								)}
 							</div>
-						:
+							:
 							<p>Problem getting article :(</p>
 						}
 					</div>
-				:
+					:
 					<p>loading...</p>
 				}
 				<ToggleDot id={id} article={false} />
--- a/webclient/src/pages/Comments.js
+++ b/webclient/src/pages/Comments.js
@@ -4,9 +4,9 @@ import { HashLink } from 'react-router-hash-link';
 import { Helmet } from 'react-helmet';
 import moment from 'moment';
 import localForage from 'localforage';
-import { infoLine, ToggleDot } from './utils.js';
+import { infoLine, otherDiscussions, ToggleDot } from '../utils.js';
-class Article extends React.Component {
+class Comments extends React.Component {
 	constructor(props) {
 		super(props);
@@ -17,6 +17,7 @@ class Article extends React.Component {
 		this.state = {
 			story: cache[id] || false,
 			related: [],
 			error: false,
 			collapsed: [],
 			expanded: [],
@@ -26,24 +27,21 @@ class Article extends React.Component {
 	componentDidMount() {
 		const id = this.props.match.params.id;
-		localForage.getItem(id)
+		localForage.getItem(id).then((value) => this.setState({ story: value }));
-			.then(
+		localForage.getItem(`related-${id}`).then((value) => value ? this.setState({ related: value }) : null);
 				(value) => {
 					this.setState({ story: value });
 				}
 			);
 		fetch('/api/' + id)
 			.then(res => res.json())
 			.then(
 				(result) => {
-					this.setState({ story: result.story }, () => {
+					this.setState({ story: result.story, related: result.related }, () => {
 						const hash = window.location.hash.substring(1);
 						if (hash) {
 							document.getElementById(hash).scrollIntoView();
 						}
 					});
 					localForage.setItem(id, result.story);
 					localForage.setItem(`related-${id}`, result.related);
 				},
 				(error) => {
 					this.setState({ error: true });
@@ -72,7 +70,7 @@ class Article extends React.Component {
 	}
 	displayComment(story, c, level) {
-		const cid = c.author+c.date;
+		const cid = c.author + c.date;
 		const collapsed = this.state.collapsed.includes(cid);
 		const expanded = this.state.expanded.includes(cid);
@@ -85,19 +83,22 @@ class Article extends React.Component {
 				<div className='info'>
 					<p>
 						{c.author === story.author ? '[OP]' : ''} {c.author || '[Deleted]'}
-						{' '} | <HashLink to={'#'+cid} id={cid}>{moment.unix(c.date).fromNow()}</HashLink>
+						{' '} | <HashLink to={'#' + cid} id={cid}>{moment.unix(c.date).fromNow()}</HashLink>
-						{hidden || hasChildren &&
+						{hasChildren && (
-							<span className='collapser pointer' onClick={() => this.collapseComment(cid)}>–</span>
+							hidden ?
-						}
+								<span className='collapser expander pointer' onClick={() => this.expandComment(cid)}>+</span>
 								:
 								<span className='collapser pointer' onClick={() => this.collapseComment(cid)}>–</span>
 						)}
 					</p>
 				</div>
-				<div className={collapsed ? 'text hidden' : 'text'}  dangerouslySetInnerHTML={{ __html: c.text }} />
+				<div className={collapsed ? 'text hidden' : 'text'} dangerouslySetInnerHTML={{ __html: c.text }} />
 				{hidden && hasChildren ?
-					<div className='comment lined info pointer' onClick={() => this.expandComment(cid)}>[show {this.countComments(c)-1} more]</div>
+					<div className='comment lined info pointer' onClick={() => this.expandComment(cid)}>[show {this.countComments(c) - 1} more]</div>
-				:
+					:
 					c.comments.map(i => this.displayComment(story, i, level + 1))
 				}
 			</div>
@@ -107,6 +108,7 @@ class Article extends React.Component {
 	render() {
 		const id = this.props.match.params.id;
 		const story = this.state.story;
 		const related = this.state.related.filter(r => r.id != id);
 		const error = this.state.error;
 		return (
@@ -125,12 +127,13 @@ class Article extends React.Component {
 						</div>
 						{infoLine(story)}
 						{otherDiscussions(related)}
 						<div className='comments'>
 							{story.comments.map(c => this.displayComment(story, c, 0))}
 						</div>
 					</div>
-				:
+					:
 					<p>loading...</p>
 				}
 				<ToggleDot id={id} article={true} />
@@ -139,4 +142,4 @@ class Article extends React.Component {
 	}
 }
-export default Article;
+export default Comments;
--- a/webclient/src/pages/Feed.js
+++ b/webclient/src/pages/Feed.js
@@ -1,8 +1,7 @@
 import React from 'react';
 import { Link } from 'react-router-dom';
 import { Helmet } from 'react-helmet';
 import localForage from 'localforage';
-import { sourceLink, infoLine, logos } from './utils.js';
+import { StoryItem } from '../components/StoryItem.js';
 class Feed extends React.Component {
 	constructor(props) {
@@ -22,20 +21,24 @@ class Feed extends React.Component {
 					const updated = !this.state.stories || this.state.stories[0].id !== result.stories[0].id;
 					console.log('updated:', updated);
-					this.setState({ stories: result.stories });
+					const { stories } = result;
-					localStorage.setItem('stories', JSON.stringify(result.stories));
+					this.setState({ stories });
 					localStorage.setItem('stories', JSON.stringify(stories));
 					if (updated) {
 						localForage.clear();
-						result.stories.forEach((x, i) => {
+						stories.forEach((x, i) => {
 							fetch('/api/' + x.id)
 								.then(res => res.json())
-								.then(result => {
+								.then(({ story, related }) => {
-									localForage.setItem(x.id, result.story)
+									Promise.all([
-										.then(console.log('preloaded', x.id, x.title));
+										localForage.setItem(x.id, story),
-									this.props.updateCache(x.id, result.story);
+										localForage.setItem(`related-${x.id}`, related)
-								}, error => {}
+									]).then(console.log('preloaded', x.id, x.title));
-							);
+									this.props.updateCache(x.id, story);
 									this.props.updateCache(`related-${x.id}`, related);
 								}, error => { }
 								);
 						});
 					}
 				},
@@ -55,27 +58,7 @@ class Feed extends React.Component {
 					<title>Feed - QotNews</title>
 				</Helmet>
 				{error && <p>Connection error?</p>}
-				{stories ?
+				{stories ? stories.map(story => <StoryItem story={story}></StoryItem>) : <p>loading...</p>}
 					<div>
 						{stories.map(x =>
 							<div className='item' key={x.id}>
 								<div className='title'>
 									<Link className='link' to={'/' + x.id}>
 										<img className='source-logo' src={logos[x.source]} alt='source logo' /> {x.title}
 									</Link>
 									<span className='source'>
 										({sourceLink(x)})
 									</span>
 								</div>
 								{infoLine(x)}
 							</div>
 						)}
 					</div>
 				:
 					<p>loading...</p>
 				}
 			</div>
 		);
 	}
--- a/webclient/src/pages/Results.js
+++ b/webclient/src/pages/Results.js
@@ -1,8 +1,7 @@
 import React from 'react';
 import { Link } from 'react-router-dom';
 import { Helmet } from 'react-helmet';
 import { sourceLink, infoLine, logos } from './utils.js';
 import AbortController from 'abort-controller';
 import { StoryItem } from '../components/StoryItem.js';
 class Results extends React.Component {
 	constructor(props) {
@@ -63,28 +62,10 @@ class Results extends React.Component {
 					<>
 						<p>Search results:</p>
 						<div className='comment lined'>
-							{stories.length ?
+							{stories ? stories.map(story => <StoryItem story={story}></StoryItem>) : <p>loading...</p>}
 								stories.map(x =>
 									<div className='item' key={x.id}>
 										<div className='title'>
 											<Link className='link' to={'/' + x.id}>
 												<img className='source-logo' src={logos[x.source]} alt='source logo' /> {x.title}
 											</Link>
 											<span className='source'>
 												({sourceLink(x)})
 											</span>
 										</div>
 										{infoLine(x)}
 									</div>
 								)
 							:
 								<p>none</p>
 							}
 						</div>
 					</>
-				:
+					:
 					<p>loading...</p>
 				}
 			</div>
--- a/webclient/src/utils.js
+++ b/webclient/src/utils.js
--- a/webclient/yarn.lock
+++ b/webclient/yarn.lock
Author	SHA1	Message	Date
Jason Schwarzenberger	2439c113b3	update declutter.	2020-11-24 16:54:21 +13:00
Jason Schwarzenberger	0f5e28136d	update declutter.	2020-11-24 11:20:37 +13:00
Jason Schwarzenberger	bb1413b586	sort substack feed by time.	2020-11-24 10:56:38 +13:00
Jason Schwarzenberger	0a27c0da1f	update declutter.	2020-11-24 10:42:41 +13:00
Jason Schwarzenberger	fe01ea52e5	get favicons for custom substack publications.	2020-11-24 10:36:31 +13:00
Jason Schwarzenberger	3daae5fa1b	change substack time parsing to misc.time	2020-11-23 16:46:54 +13:00
Jason Schwarzenberger	25caee17d6	add related stories to pre-fetch caching.	2020-11-23 16:26:37 +13:00
Jason Schwarzenberger	c1b6349771	namespace the refs for hn and substack.	2020-11-23 16:09:12 +13:00
Jason	54a4c7e55a	fix with try-catch	2020-11-23 01:20:40 +00:00
Jason	b12a3570b0	add logging, extend id length	2020-11-21 21:21:31 +00:00
Jason Schwarzenberger	0bfa920654	fix mistake.	2020-11-20 04:29:54 +00:00
Jason Schwarzenberger	9341b4d966	fix mistake.	2020-11-20 04:27:28 +00:00
Jason Schwarzenberger	a2e5faa3b5	fix empty source links.	2020-11-20 17:02:09 +13:00
Jason Schwarzenberger	a86eb98c1a	fix hn self posts related discussion.	2020-11-20 13:06:19 +13:00
Jason Schwarzenberger	abf7f0a802	force reader update in update-story.py	2020-11-20 12:21:27 +13:00
Jason Schwarzenberger	d288546d6f	update declutter.	2020-11-20 11:51:56 +13:00
Jason Schwarzenberger	cc130942ca	update declutter.	2020-11-20 11:48:46 +13:00
Jason Schwarzenberger	f0b14408d4	fix other discussions links.	2020-11-20 09:47:56 +13:00
Jason Schwarzenberger	e1830a589b	wip on other discussions ui.	2020-11-19 17:27:00 +13:00
Jason Schwarzenberger	32bc3b906b	add update-story.py	2020-11-19 15:06:55 +13:00
Jason Schwarzenberger	f5e65632b8	fix comment date.	2020-11-19 14:27:24 +13:00
Jason Schwarzenberger	1fe524207e	stuff comments.	2020-11-19 14:23:01 +13:00
Jason Schwarzenberger	dc3d17b171	update declutter	2020-11-19 12:30:27 +13:00
Jason Schwarzenberger	539350a83d	port separation.	2020-11-18 17:21:37 +13:00
Jason Schwarzenberger	2f730c1f52	update declutter.	2020-11-18 15:20:23 +13:00
Jason Schwarzenberger	e0960d59f3	update readme.	2020-11-18 13:26:34 +13:00
Jason Schwarzenberger	f5b38f5c6b	remove readerserver, add declutter.	2020-11-18 12:59:35 +13:00
Jason Schwarzenberger	c9da2a078b	increase setTimeouts.	2020-11-18 10:06:45 +13:00
Jason Schwarzenberger	78654e0c63	reduce setTimeout.	2020-11-17 16:07:33 +13:00
Jason Schwarzenberger	3b885e4327	renaming things.	2020-11-17 15:54:14 +13:00
Jason Schwarzenberger	55d50a86d8	hmmm	2020-11-17 15:13:38 +13:00
Jason Schwarzenberger	55e7f6bb14	cosmetic filters for newshub.	2020-11-17 15:01:12 +13:00
Jason Schwarzenberger	5668fa5dbc	fix mistake.	2020-11-17 12:54:54 +13:00
Jason Schwarzenberger	b771b52501	add regex to get a unique ref from each sitemap/category based article url.	2020-11-17 12:38:28 +13:00
Jason Schwarzenberger	f5c7a658ba	cosmetic filters for the spinoff.	2020-11-16 16:49:39 +13:00
Jason Schwarzenberger	f5ccd844da	fix import error.	2020-11-16 15:41:09 +13:00
Jason Schwarzenberger	6a91b9402f	split categories, sitemap and other crap out of news.py	2020-11-16 15:30:33 +13:00
Jason Schwarzenberger	b80c1a5cb5	extract story list item from Results and Feed.	2020-11-16 13:17:58 +13:00
Jason Schwarzenberger	b23e470317	move reddit thresholds as settings variables.	2020-11-16 10:11:39 +13:00
Jason Schwarzenberger	7420b5ece9	fix microdata multiple authors	2020-11-12 17:33:46 +13:00
Jason Schwarzenberger	64ced635cc	fix mistake.	2020-11-12 17:15:29 +13:00
Jason Schwarzenberger	9318627f1b	ability to pass in multiple site maps/category urls.	2020-11-12 17:11:51 +13:00
Jason Schwarzenberger	3d0a3f1577	support list based json-ld authors.	2020-11-12 15:08:23 +13:00
Jason Schwarzenberger	587b10c438	recursive sitemaps (sitemap indexes)	2020-11-12 14:56:46 +13:00
Jason	00954c6cac	local browser scraper	2020-11-11 09:26:54 +00:00
Jason Schwarzenberger	637bc38476	fix mistake.	2020-11-11 17:21:31 +13:00
Jason Schwarzenberger	164b7e72c4	basically add declutter like capabilities.	2020-11-11 17:16:04 +13:00
Jason Schwarzenberger	3169af3002	hostname from settings.	2020-11-11 09:46:27 +13:00
Jason Schwarzenberger	d588a60930	add source to searchable attributes.	2020-11-11 09:37:54 +13:00
Jason Schwarzenberger	408e2870b2	tzinfo and microdata schema urls.	2020-11-10 16:51:27 +13:00
Jason Schwarzenberger	44b8b36547	add data cast in query.	2020-11-10 15:50:18 +13:00
Jason Schwarzenberger	4f49684194	remove logos from utils.js	2020-11-10 15:38:48 +13:00
Jason Schwarzenberger	1d78b1c592	fix favicon url.	2020-11-10 15:34:21 +13:00
Jason Schwarzenberger	0374794536	Sitemap and Category to get favicon into `icon` property of story.	2020-11-10 15:22:27 +13:00
Jason Schwarzenberger	943a1cfa4f	reader server	2020-11-10 14:56:21 +13:00
Jason Schwarzenberger	9cee370a25	tvnz icon	2020-11-10 14:10:02 +13:00
Jason Schwarzenberger	5efc6ef2d3	add related stories (in api only)	2020-11-10 14:09:56 +13:00
Jason Schwarzenberger	4ec50e20cb	feed thread loop.	2020-11-10 10:10:38 +13:00
Jason Schwarzenberger	c1b7877f4b	remove limit.	2020-11-09 17:54:50 +13:00
Jason Schwarzenberger	7b8cbfc9b9	try to make feed only determined by the max age.	2020-11-09 17:50:58 +13:00
Jason Schwarzenberger	bfa4108a8e	Merge remote-tracking branch 'tanner/master'	2020-11-09 16:08:28 +13:00
Jason Schwarzenberger	0bd0d40a31	use json type in sqlite.	2020-11-09 15:45:10 +13:00
Jason Schwarzenberger	4e04595415	fix search.	2020-11-09 15:44:44 +13:00
Jason	006db2960c	change to 3 days	2020-11-09 01:36:51 +00:00
Jason Schwarzenberger	1f063f0dac	undo log level change	2020-11-06 11:20:34 +13:00
Jason Schwarzenberger	1658346aa9	fix news.py feed.	2020-11-06 10:37:43 +13:00
Jason Schwarzenberger	2dbc702b40	switch to python-dateutil for parser, reverse sort xml feeds.	2020-11-06 10:02:39 +13:00
Jason Schwarzenberger	1c4764e67d	sort sitemap feed by lastmod time.	2020-11-06 09:30:15 +13:00
Jason	ee49d2021e	newsroom	2020-11-05 20:28:55 +00:00
Jason	c391c50ab1	use localize	2020-11-05 04:15:31 +00:00
Jason Schwarzenberger	095f0d549a	use replace.	2020-11-05 16:57:08 +13:00
Jason Schwarzenberger	c21c71667e	fix date issue.	2020-11-05 16:41:15 +13:00
Jason Schwarzenberger	c3a2c91a11	update requirements.txt	2020-11-05 16:33:50 +13:00
Jason Schwarzenberger	0f39446a61	tz aware for use in settings.	2020-11-05 16:30:55 +13:00
Jason Schwarzenberger	351059aab1	fix excludes.	2020-11-05 15:59:13 +13:00
Jason Schwarzenberger	4488e2c292	add an `excludes` list of substrings for urls in the settings for sitemap/category.	2020-11-05 15:51:59 +13:00
Jason Schwarzenberger	afda5b635c	disqus test.	2020-11-05 14:23:51 +13:00
Jason Schwarzenberger	0fc1a44d2b	fix issue in substack.	2020-11-04 17:40:29 +13:00
Jason Schwarzenberger	9fff1b9e46	avoid duplicate articles listed on the category page	2020-11-04 17:14:42 +13:00
Jason Schwarzenberger	16b59f6c67	try stop bad pages.	2020-11-04 16:34:31 +13:00
Jason Schwarzenberger	939f4775a7	better settings example.	2020-11-04 15:52:34 +13:00
Jason Schwarzenberger	9bfc6fc6fa	scraper settings, ordering and loop.	2020-11-04 15:47:12 +13:00
Jason Schwarzenberger	6ea9844d00	remove useless try blocks.	2020-11-04 15:37:19 +13:00
Jason Schwarzenberger	1318259d3d	imply referrer is substack.	2020-11-04 15:21:07 +13:00
Jason Schwarzenberger	98a0c2257c	increase declutter timeout.	2020-11-04 15:15:00 +13:00
Jason Schwarzenberger	e6976db25d	fix tabs	2020-11-04 15:04:20 +13:00
Jason Schwarzenberger	9edc8b7cca	move scraping for article content to files.	2020-11-04 15:00:58 +13:00
Jason Schwarzenberger	33e21e7f30	fix mistake.	2020-11-04 12:45:01 +13:00
Jason Schwarzenberger	892a99eca6	add + expander in place of collapser.	2020-11-04 12:43:15 +13:00
Jason Schwarzenberger	d718d05a04	fix dates for newsroom.	2020-11-04 11:53:16 +13:00
Jason Schwarzenberger	d1795eb1b8	add radionz and newsroom logos.	2020-11-04 11:30:56 +13:00
Jason Schwarzenberger	9f4ff4acf0	remove unnecessary sitemap.xml request.	2020-11-04 11:22:15 +13:00
Jason Schwarzenberger	db6aad84ec	fix mistake.	2020-11-04 11:12:01 +13:00
Jason Schwarzenberger	29f8a8b8cc	add news site categories feed.	2020-11-04 11:08:50 +13:00
Jason	abf8589e02	fix sitemap	2020-11-03 10:53:40 +00:00
Jason	b759f46582	use extruct for opengraph/json-ld/microdata of articles	2020-11-03 10:31:36 +00:00
Jason Schwarzenberger	736cdc8576	fix mistake.	2020-11-03 17:04:46 +13:00
Jason Schwarzenberger	244d416f6e	settings config of sitemap/substack publications.	2020-11-03 17:01:29 +13:00
Jason Schwarzenberger	5f98a2e76a	Merge remote-tracking branch 'tanner/master' into master And adding relevant setings.py.example/etc.	2020-11-03 16:44:02 +13:00
Jason Schwarzenberger	0567cdfd9b	move sort to render.	2020-11-03 16:30:22 +13:00
Jason Schwarzenberger	4f90671cec	order feed by reverse chronological	2020-11-03 16:21:23 +13:00
Jason Schwarzenberger	e63a1456a5	add logos.	2020-11-03 16:07:07 +13:00
Jason Schwarzenberger	76f1d57702	sitemap based feed.	2020-11-03 16:00:03 +13:00
Jason Schwarzenberger	de80389ed0	add logos.	2020-11-03 12:48:19 +13:00
Jason Schwarzenberger	4e64cf682a	add the bulletin.	2020-11-03 12:41:16 +13:00
Jason Schwarzenberger	c5fe5d25a0	add substack.py top sites, replacing webworm.py	2020-11-03 12:28:39 +13:00
Jason	283a2b1545	fix webworm comments	2020-11-02 22:06:43 +00:00
Jason Schwarzenberger	0d6a86ace2	fix webworm dates.	2020-11-03 10:31:14 +13:00
Jason Schwarzenberger	f23bf628e0	add webworm/substack as a feed.	2020-11-02 17:09:59 +13:00