update declutter.

sort substack feed by time.
2020-11-24 16:54:21 +13:00 · 2020-11-24 11:20:37 +13:00 · 2020-11-24 10:56:38 +13:00 · 2020-11-24 10:42:41 +13:00 · 2020-11-24 10:36:31 +13:00 · 2020-11-23 16:46:54 +13:00
25 changed files with 3360 additions and 2583 deletions
--- a/apiserver/database.py
+++ b/apiserver/database.py
@@ -73,6 +73,13 @@ def get_stories_by_url(url):
            filter(Story.meta['url'].as_string() == url).\
            order_by(Story.meta['date'].desc())

+def get_ref_by_sid(sid):
+    session = Session()
+    x = session.query(Reflist).\
+        filter(Reflist.sid == sid).\
+        first()
+    return dict(ref=x.ref, sid=x.sid, source=x.source, urlref=x.urlref)
+
 def get_reflist():
    session = Session()
    q = session.query(Reflist).order_by(Reflist.rid.desc())
--- a/apiserver/feeds/category.py
+++ b/apiserver/feeds/category.py
@@ -53,7 +53,7 @@ class Category(Base):
 # scratchpad so I can quickly develop the parser
 if __name__ == '__main__':
    print("Category: RadioNZ")
-    site = Category("https://www.rnz.co.nz/news/")
+    site = Category({ 'url': "https://www.rnz.co.nz/news/" })
    excludes = [
        'rnz.co.nz/news/sport',
        'rnz.co.nz/weather',
@@ -61,12 +61,12 @@ if __name__ == '__main__':
    ]
    posts = site.feed(excludes)
    print(posts[:5])
-    print(site.story(posts[0]))
+    print(site.story(posts[0][0], posts[0][1]))

    print("Category: Newsroom")
-    site = Category("https://www.newsroom.co.nz/news/", tz='Pacific/Auckland')
+    site = Category({ 'url': "https://www.newsroom.co.nz/news/", 'tz': 'Pacific/Auckland'})
    posts = site.feed()
    print(posts[:5])
-    print(site.story(posts[0]))
+    print(site.story(posts[0][0], posts[0][1]))


--- a/apiserver/feeds/hackernews.py
+++ b/apiserver/feeds/hackernews.py
@@ -40,7 +40,7 @@ def api(route, ref=None):
        return False

 def feed():
-    return [str(x) for x in api(API_TOPSTORIES) or []]
+    return ['hn:'+str(x) for x in api(API_TOPSTORIES) or []]

 def comment(i):
    if 'author' not in i:
@@ -60,6 +60,7 @@ def comment_count(i):
    return sum([comment_count(c) for c in i['comments']]) + alive

 def story(ref):
+    ref = ref.replace('hn:', '')
    r = api(API_ITEM, ref)
    if not r: return False

--- a/apiserver/feeds/sitemap.py
+++ b/apiserver/feeds/sitemap.py
@@ -76,7 +76,7 @@ class Sitemap(Base):
 # scratchpad so I can quickly develop the parser
 if __name__ == '__main__':
    print("Sitemap: The Spinoff")
-    site = Sitemap("https://thespinoff.co.nz/sitemap.xml")
+    site = Sitemap({ 'url': "https://thespinoff.co.nz/sitemap.xml" })
    excludes = [
        'thespinoff.co.nz/sitemap-misc.xml',
        'thespinoff.co.nz/sitemap-authors.xml',
@@ -84,16 +84,18 @@ if __name__ == '__main__':
    ]
    posts = site.feed(excludes)
    print(posts[:5])
-    print(site.story(posts[0]))
+    print(site.story(posts[0][0], posts[0][1]))

    print("Sitemap: Newshub")
-    site = Sitemap([
-        'https://www.newshub.co.nz/home/politics.gnewssitemap.xml',
-        'https://www.newshub.co.nz/home/new-zealand.gnewssitemap.xml',
-        'https://www.newshub.co.nz/home/world.gnewssitemap.xml',
-        'https://www.newshub.co.nz/home/money.gnewssitemap.xml',
-    ])
+    site = Sitemap({
+        'url': [
+            'https://www.newshub.co.nz/home/politics.gnewssitemap.xml',
+            'https://www.newshub.co.nz/home/new-zealand.gnewssitemap.xml',
+            'https://www.newshub.co.nz/home/world.gnewssitemap.xml',
+            'https://www.newshub.co.nz/home/money.gnewssitemap.xml',
+        ],
+    })
    posts = site.feed()
    print(posts[:5])
-    print(site.story(posts[0]))
-    print(site.story(posts[:-1]))
+    print(site.story(posts[0][0], posts[0][1]))
+    
--- a/apiserver/feeds/substack.py
+++ b/apiserver/feeds/substack.py
@@ -10,6 +10,10 @@ if __name__ == '__main__':
 import requests
 from datetime import datetime

+import settings
+from misc.time import unix
+from misc.metadata import get_icons
+from misc.api import xml, json
 from utils import clean

 SUBSTACK_REFERER = 'https://substack.com'
@@ -22,32 +26,6 @@ def api_comments(post_id, base_url):
 def api_stories(x, base_url): 
    return f"{base_url}/api/v1/archive?sort=new&search=&offset=0&limit=100"

-def unix(date_str):
-    return int(datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S.%fZ').timestamp())
-
-def api(route, ref=None, referer=None):
-    headers = {'Referer': referer} if referer else None
-    try:
-        r = requests.get(route(ref), headers=headers, timeout=10)
-        if r.status_code != 200:
-            raise Exception('Bad response code ' + str(r.status_code))
-        return r.json()
-    except KeyboardInterrupt:
-        raise
-    except BaseException as e:
-        logging.error('Problem hitting Substack API: {}, trying again'.format(str(e)))
-
-    try:
-        r = requests.get(route(ref), headers=headers, timeout=20)
-        if r.status_code != 200:
-            raise Exception('Bad response code ' + str(r.status_code))
-        return r.json()
-    except KeyboardInterrupt:
-        raise
-    except BaseException as e:
-        logging.error('Problem hitting Substack API: {}'.format(str(e)))
-        return False
-
 def comment(i):
    if 'body' not in i:
        return False
@@ -66,14 +44,25 @@ class Publication:
    def __init__(self, domain):
        self.BASE_DOMAIN = domain

+    def ref_prefix(self, ref):
+        return f"{self.BASE_DOMAIN}/#id:{ref}"
+
+    def strip_ref_prefix(self, ref):
+        return ref.replace(f"{self.BASE_DOMAIN}/#id:", '')
+
    def feed(self):
-        stories = api(lambda x: api_stories(x, self.BASE_DOMAIN), referer=self.BASE_DOMAIN)
+        too_old = datetime.now().timestamp() - settings.MAX_STORY_AGE
+        stories = json(lambda x: api_stories(x, self.BASE_DOMAIN), headers={'Referer': self.BASE_DOMAIN})
        if not stories: return []
        stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
-        return [str(i.get("id")) for i in stories or []]
+        stories = list(filter(None, [i if unix(i.get('post_date')) > too_old else None for i in stories]))
+        stories.sort(key=lambda a: unix(a.get('post_date')), reverse=True)
+
+        return [self.ref_prefix(str(i.get("id"))) for i in stories or []]

    def story(self, ref):
-        stories = api(lambda x: api_stories(x, self.BASE_DOMAIN), referer=self.BASE_DOMAIN)
+        ref = self.strip_ref_prefix(ref)
+        stories = json(lambda x: api_stories(x, self.BASE_DOMAIN), headers={'Referer': self.BASE_DOMAIN})
        if not stories: return False
        stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
        stories = list(filter(None, [i if str(i.get('id')) == ref else None for i in stories]))
@@ -94,7 +83,7 @@ class Publication:
        s['title'] = r.get('title', '')
        s['link'] = r.get('canonical_url', '')
        s['url'] = r.get('canonical_url', '')
-        comments = api(lambda x: api_comments(x, self.BASE_DOMAIN), r.get('id'), referer=self.BASE_DOMAIN)
+        comments = json(lambda x: api_comments(x, self.BASE_DOMAIN), r.get('id'), headers={'Referer': self.BASE_DOMAIN})
        s['comments'] = [comment(i) for i in comments.get('comments')]
        s['comments'] = list(filter(bool, s['comments']))
        s['num_comments'] = r.get('comment_count', 0)
@@ -104,6 +93,12 @@ class Publication:
            s['author'] = authors[0].get('name')
            s['author_link'] = authors[0].get('link')

+        markup = xml(lambda x: s['link'])
+        if markup:
+            icons = get_icons(markup, url=s['link'])
+            if icons:
+                s['icon'] = icons[0]
+
        return s

    def _bylines(self, b):
@@ -116,14 +111,28 @@ class Publication:


 class Top:
+    def ref_prefix(self, base_url, ref):
+        return f"{base_url}/#id:{ref}"
+
+    def strip_ref_prefix(self, ref):
+        if '/#id:' in ref:
+            base_url, item = ref.split(f"/#id:")
+            return item
+        return ref
+
    def feed(self):
-        stories = api(SUBSTACK_API_TOP_POSTS, referer=SUBSTACK_REFERER)
+        too_old = datetime.now().timestamp() - settings.MAX_STORY_AGE
+        stories = json(SUBSTACK_API_TOP_POSTS, headers={'Referer': SUBSTACK_REFERER})
        if not stories: return []
        stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
-        return [str(i.get("id")) for i in stories or []]
+        stories = list(filter(None, [i if unix(i.get('post_date')) > too_old else None for i in stories]))
+        stories.sort(key=lambda a: unix(a.get('post_date')), reverse=True)
+        stories = [self.ref_prefix(str(i.get("pub").get("base_url")), str(i.get("id"))) for i in stories]
+        return stories

    def story(self, ref):
-        stories = api(SUBSTACK_API_TOP_POSTS, referer=SUBSTACK_REFERER)
+        ref = self.strip_ref_prefix(ref)
+        stories = json(SUBSTACK_API_TOP_POSTS, headers={'Referer': SUBSTACK_REFERER})
        if not stories: return False
        stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
        stories = list(filter(None, [i if str(i.get('id')) == ref else None for i in stories]))
@@ -146,7 +155,7 @@ class Top:
        s['title'] = r.get('title', '')
        s['link'] = r.get('canonical_url', '')
        s['url'] = r.get('canonical_url', '')
-        comments = api(lambda x: api_comments(x, base_url), r.get('id'), referer=SUBSTACK_REFERER)
+        comments = json(lambda x: api_comments(x, base_url), r.get('id'), headers={'Referer': SUBSTACK_REFERER})
        s['comments'] = [comment(i) for i in comments.get('comments')]
        s['comments'] = list(filter(bool, s['comments']))
        s['num_comments'] = r.get('comment_count', 0)
--- a/apiserver/misc/api.py
+++ b/apiserver/misc/api.py
@@ -5,13 +5,16 @@ logging.basicConfig(

 import requests

-USER_AGENT = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
-FORWARD_IP = '66.249.66.1'
+GOOGLEBOT_USER_AGENT = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
+GOOGLEBOT_IP = '66.249.66.1'
+TIMEOUT = 30

-def xml(route, ref=None):
+def xml(route, ref=None, headers=dict(), use_googlebot=True):
    try:
-        headers = {'User-Agent': USER_AGENT, 'X-Forwarded-For': FORWARD_IP}
-        r = requests.get(route(ref), headers=headers, timeout=5)
+        if use_googlebot:
+            headers['User-Agent'] = GOOGLEBOT_USER_AGENT
+            headers['X-Forwarded-For'] = GOOGLEBOT_IP
+        r = requests.get(route(ref), headers=headers, timeout=TIMEOUT)
        if r.status_code != 200:
            raise Exception('Bad response code ' + str(r.status_code))
        return r.text
@@ -21,10 +24,12 @@ def xml(route, ref=None):
        logging.error('Problem hitting URL: {}'.format(str(e)))
        return False

-def json(route, ref=None):
+def json(route, ref=None, headers=dict(), use_googlebot=True):
    try:
-        headers = {'User-Agent': USER_AGENT, 'X-Forwarded-For': FORWARD_IP}
-        r = requests.get(route(ref), headers=headers, timeout=5)
+        if use_googlebot:
+            headers['User-Agent'] = GOOGLEBOT_USER_AGENT
+            headers['X-Forwarded-For'] = GOOGLEBOT_IP
+        r = requests.get(route(ref), headers=headers, timeout=TIMEOUT)
        if r.status_code != 200:
            raise Exception('Bad response code ' + str(r.status_code))
        return r.json()
--- a/apiserver/misc/icons.py
+++ b/apiserver/misc/icons.py
@@ -0,0 +1,14 @@
+from bs4 import BeautifulSoup
+
+def get_icons(markup):
+    soup = BeautifulSoup(markup, features='html.parser')
+    icon32 = soup.find_all('link', rel="icon", href=True, sizes="32x32")
+    icon16 = soup.find_all('link', rel="icon", href=True, sizes="16x16")
+    favicon = soup.find_all('link', rel="shortcut icon", href=True)
+    others = soup.find_all('link', rel="icon", href=True)
+    icons = icon32 + icon16 + favicon + others
+    base_url = '/'.join(urlref.split('/')[:3])
+    icons = list(set([i.get('href') for i in icons]))
+    icons = [i if i.startswith('http') else base_url + i for i in icons]
+
+    return icons
--- a/apiserver/misc/metadata.py
+++ b/apiserver/misc/metadata.py
@@ -1,4 +1,19 @@

+from bs4 import BeautifulSoup
+
+def get_icons(markup, url):
+    soup = BeautifulSoup(markup, features='html.parser')
+    icon32 = soup.find_all('link', rel="icon", href=True, sizes="32x32")
+    icon16 = soup.find_all('link', rel="icon", href=True, sizes="16x16")
+    favicon = soup.find_all('link', rel="shortcut icon", href=True)
+    others = soup.find_all('link', rel="icon", href=True)
+    icons = icon32 + icon16 + favicon + others
+    base_url = '/'.join(url.split('/')[:3])
+    icons = list(set([i.get('href') for i in icons]))
+    icons = [i if i.startswith('http') else base_url + i for i in icons]
+
+    return icons
+
 def parse_extruct(s, data):
    rdfa_keys = {
        'title': [
--- a/apiserver/misc/news.py
+++ b/apiserver/misc/news.py
@@ -11,9 +11,10 @@ import extruct

 import settings
 from utils import clean
-from misc.metadata import parse_extruct
+from misc.metadata import parse_extruct, get_icons
 from misc.time import unix
 from misc.api import xml
+import misc.stuff as stuff

 def comment(i):
    if 'author' not in i:
@@ -68,16 +69,7 @@ class Base:
        s['url'] = urlref
        s['date'] = 0

-        soup = BeautifulSoup(markup, features='html.parser')
-        icon32 = soup.find_all('link', rel="icon", href=True, sizes="32x32")
-        icon16 = soup.find_all('link', rel="icon", href=True, sizes="16x16")
-        favicon = soup.find_all('link', rel="shortcut icon", href=True)
-        others = soup.find_all('link', rel="icon", href=True)
-        icons = icon32 + icon16 + favicon + others
-        base_url = '/'.join(urlref.split('/')[:3])
-        icons = list(set([i.get('href') for i in icons]))
-        icons = [i if i.startswith('http') else base_url + i for i in icons]
-
+        icons = get_icons(markup, url=urlref)
        if icons:
            s['icon'] = icons[0]

@@ -89,13 +81,18 @@ class Base:
        if 'disqus' in markup:
            try:
                s['comments'] = declutter.get_comments(urlref)
-                c['comments'] = list(filter(bool, c['comments']))
+                s['comments'] = list(filter(bool, s['comments']))
                s['num_comments'] = comment_count(s['comments'])
            except KeyboardInterrupt:
                raise
            except:
                pass

+        if urlref.startswith('https://www.stuff.co.nz'):
+            s['comments'] = stuff.get_comments(urlref)
+            s['comments'] = list(filter(bool, s['comments']))
+            s['num_comments'] = len(s['comments'])
+
        if not s['date']:
            return False
        return s
--- a/apiserver/misc/stuff.py
+++ b/apiserver/misc/stuff.py
@@ -0,0 +1,64 @@
+import re
+from bs4 import BeautifulSoup
+
+if __name__ == '__main__':
+    import sys
+    sys.path.insert(0,'.')
+
+from misc.time import unix
+from misc.api import xml
+
+def _soup_get_text(soup):
+    if not soup: return None
+    if soup.text: return soup.text
+
+    s = soup.find(text=lambda tag: isinstance(tag, bs4.CData))
+    if s and s.string: return s.string.strip()
+    return None
+
+def _parse_comment(soup):
+    c = {
+        'author': '',
+        'authorLink': '',
+        'score': 0,
+        'date': 0,
+        'text': '',
+        'comments': [],
+    }
+    
+    if soup.find('link'):
+        title = _soup_get_text(soup.find('link'))
+        if title and 'By:' in title:
+            c['author'] = title.strip('By:').strip()
+    if soup.find('dc:creator'):
+        c['author'] = _soup_get_text(soup.find('dc:creator'))
+    if soup.find('link'):
+        c['authorLink'] = _soup_get_text(soup.find('link'))
+    if soup.find('description'):
+        c['text'] = _soup_get_text(soup.find('description'))
+    if soup.find('pubdate'):
+        c['date'] = unix(soup.find('pubdate').text)
+    elif soup.find('pubDate'):
+        c['date'] = unix(soup.find('pubDate').text)
+
+    return c
+
+def get_comments(url):
+    regex = r"https:\/\/www\.stuff\.co\.nz\/(.*\/\d+)/[^\/]+"
+    p = re.compile(regex).match(url)
+    path = p.groups()[0]
+    comment_url = f'https://comments.us1.gigya.com/comments/rss/6201101/Stuff/stuff/{path}'
+    markup = xml(lambda x: comment_url)
+    if not markup: return []
+    soup = BeautifulSoup(markup, features='html.parser')
+    comments = soup.find_all('item')
+    if not comments: return []
+    comments = [_parse_comment(c) for c in comments]
+    return comments
+
+
+# scratchpad so I can quickly develop the parser
+if __name__ == '__main__':
+    comments = get_comments('https://www.stuff.co.nz/life-style/homed/houses/123418468/dear-jacinda-we-need-to-talk-about-housing')
+    print(len(comments))
+    print(comments[:5])
--- a/apiserver/scrapers/declutter.py
+++ b/apiserver/scrapers/declutter.py
@@ -4,9 +4,9 @@ logging.basicConfig(
        level=logging.DEBUG)
 import requests

-DECLUTTER_API = 'https://declutter.1j.nz/details'
-DECLUTTER_COMMENT_API = 'https://declutter.1j.nz/comments'
-TIMEOUT = 30
+DECLUTTER_API = 'https://declutter.1j.nz/headless/details'
+DECLUTTER_COMMENT_API = 'https://declutter.1j.nz/headless/comments'
+TIMEOUT = 90


 def get_html(url):
--- a/apiserver/scrapers/headless.py
+++ b/apiserver/scrapers/headless.py
@@ -3,15 +3,14 @@ logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        level=logging.DEBUG)
 import requests
-from settings import READER_PORT
-
-READ_API = 'http://127.0.0.1:{}/headless/details'.format(READER_PORT or 3000)
-READ_COMMENT__API = 'http://127.0.0.1:{}/headless/comments'.format(READER_PORT or 3000)
-TIMEOUT = 60
+from settings import HEADLESS_READER_PORT

+READ_API = 'http://127.0.0.1:{}/headless/details'.format(HEADLESS_READER_PORT or 33843)
+READ_COMMENT__API = 'http://127.0.0.1:{}/headless/comments'.format(HEADLESS_READER_PORT or 33843)
+TIMEOUT = 90

 def get_html(url):
-    logging.info(f"Headless Browser Scraper: {url}")
+    logging.info(f"Headless Scraper: {url}")
    details = get_details(url)
    if not details:
        return ''
--- a/apiserver/scrapers/simple.py
+++ b/apiserver/scrapers/simple.py
@@ -3,9 +3,9 @@ logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        level=logging.DEBUG)
 import requests
-from settings import READER_PORT
+from settings import SIMPLE_READER_PORT

-READ_API = 'http://127.0.0.1:{}/simple/details'.format(READER_PORT or 3000)
+READ_API = 'http://127.0.0.1:{}/simple/details'.format(SIMPLE_READER_PORT or 33843)
 TIMEOUT = 20

 def get_html(url):
--- a/apiserver/server.py
+++ b/apiserver/server.py
@@ -99,8 +99,10 @@ def submit():
 def story(sid):
    story = database.get_story(sid)
    if story:
-        related = database.get_stories_by_url(story.meta['url'])
-        related = [r.meta for r in related]
+        related = []
+        if story.meta['url']:
+            related = database.get_stories_by_url(story.meta['url'])
+            related = [r.meta for r in related]
        res = Response(json.dumps({"story": story.data, "related": related}))
        res.headers['content-type'] = 'application/json'
        return res
@@ -153,6 +155,7 @@ def _add_new_refs():
            database.put_ref(ref, nid, source, urlref)
            logging.info('Added ref ' + ref)
        except database.IntegrityError:
+            logging.info('Unable to add ref ' + ref)
            continue

 def _update_current_story(item):
@@ -165,8 +168,11 @@ def _update_current_story(item):

    valid = feed.update_story(story, urlref=item['urlref'])
    if valid:
-        database.put_story(story)
-        search.put_story(story)
+        try:
+            database.put_story(story)
+            search.put_story(story)
+        except database.IntegrityError:
+            logging.info('Unable to add story with ref ' + ref)
    else:
        database.del_ref(item['ref'])
        logging.info('Removed ref {}'.format(item['ref']))
--- a/apiserver/settings.py.example
+++ b/apiserver/settings.py.example
@@ -6,7 +6,8 @@ MAX_STORY_AGE = 3*24*60*60

 SCRAPERS = ['headless', 'outline', 'declutter', 'simple']
 API_PORT = 33842
-READER_PORT = 3000
+SIMPLE_READER_PORT = 33843
+HEADLESS_READER_PORT = 33843

 # Feed Lengths
 # Number of top items from each site to pull
--- a/apiserver/update-story.py
+++ b/apiserver/update-story.py
@@ -0,0 +1,48 @@
+import logging
+logging.basicConfig(
+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+        level=logging.INFO)
+
+import sys
+import json
+import requests
+
+import database
+import feed
+import search
+
+database.init()
+search.init()
+
+def _update_current_story(story, item):
+    logging.info('Updating story: {}'.format(str(story['ref'])))
+
+    if story.get('url', ''):
+        story['text'] = ''
+
+    valid = feed.update_story(story, urlref=item['urlref'])
+    if valid:
+        database.put_story(story)
+        search.put_story(story)
+    else:
+        database.del_ref(item['ref'])
+        logging.info('Removed ref {}'.format(item['ref']))
+
+if __name__ == '__main__':
+    if len(sys.argv) == 2:
+        sid = sys.argv[1]
+    else:
+        print('Usage: python delete-story.py [story id]')
+        exit(1)
+
+    item = database.get_ref_by_sid(sid)
+
+    if item:
+        story = database.get_story(item['sid']).data
+        if story:
+            print('Updating story:')
+            _update_current_story(story, item)
+        else:
+            print('Story not found. Exiting.')
+    else:
+        print('Story not found. Exiting.')
--- a/apiserver/utils.py
+++ b/apiserver/utils.py
@@ -9,7 +9,7 @@ import string
 from bleach.sanitizer import Cleaner

 def gen_rand_id():
-    return ''.join(random.choice(string.ascii_uppercase) for _ in range(4))
+    return ''.join(random.choice(string.ascii_uppercase) for _ in range(5))

 def render_md(md):
    if md:
--- a/2
+++ b/2
--- a/webclient/src/App.js
+++ b/webclient/src/App.js
@@ -71,7 +71,7 @@ class App extends React.Component {
 						<Route path='/search' component={Results} />
 						<Route path='/:id' exact render={(props) => <Article {...props} cache={this.cache} />} />
 					</Switch>
-					<Route path='/:id/c' exact render={(props) => <Comments {...props} cache={this.cache} />} />
+					<Route path='/:id/c' exact render={(props) => <Comments {...props} cache={this.cache} key={props.match.params.id} />} />

 					<ForwardDot />

--- a/webclient/src/Style-light.css
+++ b/webclient/src/Style-light.css
@@ -1,225 +1,231 @@
 body {
-	text-rendering: optimizeLegibility;
-	font: 1rem/1.3 sans-serif;
-	color: #000000;
-	margin-bottom: 100vh;
-	word-break: break-word;
-	font-kerning: normal;
+  text-rendering: optimizeLegibility;
+  font: 1rem/1.3 sans-serif;
+  color: #000000;
+  margin-bottom: 100vh;
+  word-break: break-word;
+  font-kerning: normal;
 }

 a {
-	color: #000000;
-	text-decoration: none;
-	outline: none;
+  color: #000000;
+  text-decoration: none;
+  outline: none;
 }

 input {
-	font-size: 1.05rem;
-	background-color: transparent;
-	border: 1px solid #828282;
-	margin: 0.25rem;
-	padding: 6px;
-	border-radius: 4px;
+  font-size: 1.05rem;
+  background-color: transparent;
+  border: 1px solid #828282;
+  margin: 0.25rem;
+  padding: 6px;
+  border-radius: 4px;
 }

 pre {
-	overflow: auto;
+  overflow: auto;
 }

 .container {
-	margin: 1rem auto;
-	max-width: 64rem;
+  margin: 1rem auto;
+  max-width: 64rem;
 }

 .menu {
-	font-size: 1.1rem;
-	padding: 0 1rem;
+  font-size: 1.1rem;
+  padding: 0 1rem;
 }

 .slogan {
-	color: #828282;
+  color: #828282;
 }

 .theme {
-	float: right;
+  float: right;
 }

 .item {
-	display: table;
-	color: #828282;
-	margin-bottom: 0.7rem;
+  display: table;
+  color: #828282;
+  margin-bottom: 0.7rem;
 }

 .item .source-logo {
-	width: 0.9rem;
-	height: 0.9rem;
+  width: 0.9rem;
+  height: 0.9rem;
 }

 .item a {
-	color: #828282;
+  color: #828282;
 }
 .item a:hover {
-	text-decoration: underline;
+  text-decoration: underline;
 }

 .item a.link {
-	font-size: 1.1rem;
-	color: #000000;
+  font-size: 1.1rem;
+  color: #000000;
 }
 .item a.link:visited {
-	color: #828282;
+  color: #828282;
 }
 .item a.link:hover {
-	text-decoration: none;
+  text-decoration: none;
 }

 span.source {
-	margin-left: 0.4rem;
+  margin-left: 0.4rem;
 }

 .item .info a.hot {
-	color: #444444;
+  color: #444444;
 }

 .article {
-	padding-bottom: 3rem;
+  padding-bottom: 3rem;
 }

 .article-container {
-	margin: 1rem auto;
-	max-width: 38rem;
+  margin: 1rem auto;
+  max-width: 38rem;
 }

 .article a {
-	border-bottom: 1px solid #222222;
+  border-bottom: 1px solid #222222;
 }

 .article h1 {
-	font-size: 1.6rem;
+  font-size: 1.6rem;
 }

 .article h2 {
-	font-size: 1.4rem;
+  font-size: 1.4rem;
 }

-.article h3, .article h4 {
-	font-size: 1.3rem;
+.article h3,
+.article h4 {
+  font-size: 1.3rem;
 }

 .article img {
-	max-width: 100%;
-	height: auto;
+  max-width: 100%;
+  height: auto;
 }

-.article figure, .article video {
-	width: 100%;
-	height: auto;
-	margin: 0;
+.article figure,
+.article video {
+  width: 100%;
+  height: auto;
+  margin: 0;
 }

 .article table {
-	width: 100%;
-	table-layout: fixed;
+  width: 100%;
+  table-layout: fixed;
 }

 .article iframe {
-	display: none;
+  display: none;
 }

 .article u {
-	border-bottom: 1px solid #222;
-	text-decoration: none;
+  border-bottom: 1px solid #222;
+  text-decoration: none;
 }

 .article .info {
-	color: #828282;
+  color: #828282;
 }

 .article .info a {
-	border-bottom: none;
-	color: #828282;
+  border-bottom: none;
+  color: #828282;
 }
 .article .info a:hover {
-	text-decoration: underline;
+  text-decoration: underline;
 }

 .story-text {
-	font: 1.2rem/1.5 'Apparatus SIL', sans-serif;
-	margin-top: 1em;
+  font: 1.2rem/1.5 "Apparatus SIL", sans-serif;
+  margin-top: 1em;
 }

 .comments {
-	margin-left: -1.25rem;
+  margin-left: -1.25rem;
 }

 .comment {
-	padding-left: 1.25rem;
+  padding-left: 1.25rem;
 }

 .comment.lined {
-	border-left: 1px solid #cccccc;
+  border-left: 1px solid #cccccc;
 }

 .comment .text {
-	margin-top: -0.5rem;
+  margin-top: -0.5rem;
 }

 .comment .text.hidden > p {
-	white-space: nowrap;
-	overflow: hidden;
-	text-overflow: ellipsis;
-	display: none;
-	color: #828282;
+  white-space: nowrap;
+  overflow: hidden;
+  text-overflow: ellipsis;
+  display: none;
+  color: #828282;
 }

 .comment .text.hidden > p:first-child {
-	display: block;
+  display: block;
 }

 .comment .collapser {
-	padding-left: 0.5rem;
-	padding-right: 1.5rem;
+  padding-left: 0.5rem;
+  padding-right: 1.5rem;
 }

 .comment .pointer {
-	cursor: pointer;
+  cursor: pointer;
 }

 .toggleDot {
-	position: fixed;
-	bottom: 1rem;
-	left: 1rem;
-	height: 3rem;
-	width: 3rem;
-	background-color: #828282;
-	border-radius: 50%;
+  position: fixed;
+  bottom: 1rem;
+  left: 1rem;
+  height: 3rem;
+  width: 3rem;
+  background-color: #828282;
+  border-radius: 50%;
 }

 .toggleDot .button {
-	font: 2rem/1 'icomoon';
-	position: relative;
-	top: 0.5rem;
-	left: 0.55rem;
+  font: 2rem/1 "icomoon";
+  position: relative;
+  top: 0.5rem;
+  left: 0.55rem;
 }

 .forwardDot {
-	cursor: pointer;
-	position: fixed;
-	bottom: 1rem;
-	right: 1rem;
-	height: 3rem;
-	width: 3rem;
-	background-color: #828282;
-	border-radius: 50%;
+  cursor: pointer;
+  position: fixed;
+  bottom: 1rem;
+  right: 1rem;
+  height: 3rem;
+  width: 3rem;
+  background-color: #828282;
+  border-radius: 50%;
 }

 .forwardDot .button {
-	font: 2.5rem/1 'icomoon';
-	position: relative;
-	top: 0.25rem;
-	left: 0.3rem;
+  font: 2.5rem/1 "icomoon";
+  position: relative;
+  top: 0.25rem;
+  left: 0.3rem;
 }

 .search form {
-	display: inline;
+  display: inline;
+}
+
+.indented {
+  padding: 0 0 0 1rem;
 }
--- a/webclient/src/pages/Article.js
+++ b/webclient/src/pages/Article.js
@@ -1,7 +1,7 @@
 import React from 'react';
 import { Helmet } from 'react-helmet';
 import localForage from 'localforage';
-import { sourceLink, infoLine, ToggleDot } from '../utils.js';
+import { sourceLink, infoLine, otherDiscussions, ToggleDot } from '../utils.js';

 class Article extends React.Component {
 	constructor(props) {
@@ -14,6 +14,7 @@ class Article extends React.Component {

 		this.state = {
 			story: cache[id] || false,
+			related: [],
 			error: false,
 			pConv: [],
 		};
@@ -22,21 +23,16 @@ class Article extends React.Component {
 	componentDidMount() {
 		const id = this.props.match ? this.props.match.params.id : 'CLOL';

-		localForage.getItem(id)
-			.then(
-				(value) => {
-					if (value) {
-						this.setState({ story: value });
-					}
-				}
-			);
+		localForage.getItem(id).then((value) => value ? this.setState({ story: value }) : null);
+		localForage.getItem(`related-${id}`).then((value) => value ? this.setState({ related: value }) : null);

 		fetch('/api/' + id)
 			.then(res => res.json())
 			.then(
 				(result) => {
-					this.setState({ story: result.story });
+					this.setState({ story: result.story, related: result.related });
 					localForage.setItem(id, result.story);
+					localForage.setItem(`related-${id}`, result.related);
 				},
 				(error) => {
 					this.setState({ error: true });
@@ -51,6 +47,7 @@ class Article extends React.Component {
 	render() {
 		const id = this.props.match ? this.props.match.params.id : 'CLOL';
 		const story = this.state.story;
+		const related = this.state.related.filter(r => r.id != id);
 		const error = this.state.error;
 		const pConv = this.state.pConv;
 		let nodes = null;
@@ -77,6 +74,7 @@ class Article extends React.Component {
 						</div>

 						{infoLine(story)}
+						{otherDiscussions(related)}

 						{nodes ?
 							<div className='story-text'>
--- a/webclient/src/pages/Comments.js
+++ b/webclient/src/pages/Comments.js
@@ -4,9 +4,9 @@ import { HashLink } from 'react-router-hash-link';
 import { Helmet } from 'react-helmet';
 import moment from 'moment';
 import localForage from 'localforage';
-import { infoLine, ToggleDot } from '../utils.js';
+import { infoLine, otherDiscussions, ToggleDot } from '../utils.js';

-class Article extends React.Component {
+class Comments extends React.Component {
 	constructor(props) {
 		super(props);

@@ -17,6 +17,7 @@ class Article extends React.Component {

 		this.state = {
 			story: cache[id] || false,
+			related: [],
 			error: false,
 			collapsed: [],
 			expanded: [],
@@ -26,24 +27,21 @@ class Article extends React.Component {
 	componentDidMount() {
 		const id = this.props.match.params.id;

-		localForage.getItem(id)
-			.then(
-				(value) => {
-					this.setState({ story: value });
-				}
-			);
+		localForage.getItem(id).then((value) => this.setState({ story: value }));
+		localForage.getItem(`related-${id}`).then((value) => value ? this.setState({ related: value }) : null);

 		fetch('/api/' + id)
 			.then(res => res.json())
 			.then(
 				(result) => {
-					this.setState({ story: result.story }, () => {
+					this.setState({ story: result.story, related: result.related }, () => {
 						const hash = window.location.hash.substring(1);
 						if (hash) {
 							document.getElementById(hash).scrollIntoView();
 						}
 					});
 					localForage.setItem(id, result.story);
+					localForage.setItem(`related-${id}`, result.related);
 				},
 				(error) => {
 					this.setState({ error: true });
@@ -110,6 +108,7 @@ class Article extends React.Component {
 	render() {
 		const id = this.props.match.params.id;
 		const story = this.state.story;
+		const related = this.state.related.filter(r => r.id != id);
 		const error = this.state.error;

 		return (
@@ -128,6 +127,7 @@ class Article extends React.Component {
 						</div>

 						{infoLine(story)}
+						{otherDiscussions(related)}

 						<div className='comments'>
 							{story.comments.map(c => this.displayComment(story, c, 0))}
@@ -142,4 +142,4 @@ class Article extends React.Component {
 	}
 }

-export default Article;
+export default Comments;
--- a/webclient/src/pages/Feed.js
+++ b/webclient/src/pages/Feed.js
@@ -30,10 +30,13 @@ class Feed extends React.Component {
 						stories.forEach((x, i) => {
 							fetch('/api/' + x.id)
 								.then(res => res.json())
-								.then(({ story }) => {
-									localForage.setItem(x.id, story)
-										.then(console.log('preloaded', x.id, x.title));
+								.then(({ story, related }) => {
+									Promise.all([
+										localForage.setItem(x.id, story),
+										localForage.setItem(`related-${x.id}`, related)
+									]).then(console.log('preloaded', x.id, x.title));
 									this.props.updateCache(x.id, story);
+									this.props.updateCache(`related-${x.id}`, related);
 								}, error => { }
 								);
 						});
--- a/webclient/src/utils.js
+++ b/webclient/src/utils.js
@@ -15,18 +15,37 @@ export const sourceLink = (story) => {

 export const infoLine = (story) => (
 	<div className="info">
-		{story.score} points by <a href={story.author_link}>{story.author}</a>
-    &#8203; {moment.unix(story.date).fromNow()}
-    &#8203; on <a href={story.link}>{story.source}</a> | &#8203;
-		<Link
+		{story.score} points by {story.author_link ? <a href={story.author_link}>{story.author}</a> : story.author}
+		&#8203; {moment.unix(story.date).fromNow()}
+		&#8203; on <a href={story.link}>{story.source}</a> | &#8203;
+  		<Link
 			className={story.num_comments > 99 ? "hot" : ""}
-			to={"/" + story.id + "/c"}
-		>
+			to={"/" + story.id + "/c"}>
 			{story.num_comments} comment{story.num_comments !== 1 && "s"}
 		</Link>
 	</div>
 );

+export const otherDiscussions = (related) => {
+	const stories = related.filter(r => r.num_comments > 0);
+	if (!stories.length) {
+		return null;
+	}
+	return (
+		<div className='related indented info'>
+			<span>Other discussions: </span>
+			{stories.map((story, i) =>
+				<span id={story.id}>
+					{i !== 0 ? <> &bull; </> : <></>}
+					<Link className={story.num_comments > 99 ? "hot" : ""} to={"/" + story.id + "/c"}>
+						{story.source} ({story.num_comments} comment{story.num_comments !== 1 && "s"})
+					</Link>
+				</span>
+			)}
+		</div>
+	);
+}
+
 export class ToggleDot extends React.Component {
 	render() {
 		const id = this.props.id;
--- a/webclient/yarn.lock
+++ b/webclient/yarn.lock
Author	SHA1	Message	Date
Jason Schwarzenberger	2439c113b3	update declutter.	2020-11-24 16:54:21 +13:00
Jason Schwarzenberger	0f5e28136d	update declutter.	2020-11-24 11:20:37 +13:00
Jason Schwarzenberger	bb1413b586	sort substack feed by time.	2020-11-24 10:56:38 +13:00
Jason Schwarzenberger	0a27c0da1f	update declutter.	2020-11-24 10:42:41 +13:00
Jason Schwarzenberger	fe01ea52e5	get favicons for custom substack publications.	2020-11-24 10:36:31 +13:00
Jason Schwarzenberger	3daae5fa1b	change substack time parsing to misc.time	2020-11-23 16:46:54 +13:00
Jason Schwarzenberger	25caee17d6	add related stories to pre-fetch caching.	2020-11-23 16:26:37 +13:00
Jason Schwarzenberger	c1b6349771	namespace the refs for hn and substack.	2020-11-23 16:09:12 +13:00
Jason	54a4c7e55a	fix with try-catch	2020-11-23 01:20:40 +00:00
Jason	b12a3570b0	add logging, extend id length	2020-11-21 21:21:31 +00:00
Jason Schwarzenberger	0bfa920654	fix mistake.	2020-11-20 04:29:54 +00:00
Jason Schwarzenberger	9341b4d966	fix mistake.	2020-11-20 04:27:28 +00:00
Jason Schwarzenberger	a2e5faa3b5	fix empty source links.	2020-11-20 17:02:09 +13:00
Jason Schwarzenberger	a86eb98c1a	fix hn self posts related discussion.	2020-11-20 13:06:19 +13:00
Jason Schwarzenberger	abf7f0a802	force reader update in update-story.py	2020-11-20 12:21:27 +13:00
Jason Schwarzenberger	d288546d6f	update declutter.	2020-11-20 11:51:56 +13:00
Jason Schwarzenberger	cc130942ca	update declutter.	2020-11-20 11:48:46 +13:00
Jason Schwarzenberger	f0b14408d4	fix other discussions links.	2020-11-20 09:47:56 +13:00
Jason Schwarzenberger	e1830a589b	wip on other discussions ui.	2020-11-19 17:27:00 +13:00
Jason Schwarzenberger	32bc3b906b	add update-story.py	2020-11-19 15:06:55 +13:00
Jason Schwarzenberger	f5e65632b8	fix comment date.	2020-11-19 14:27:24 +13:00
Jason Schwarzenberger	1fe524207e	stuff comments.	2020-11-19 14:23:01 +13:00
Jason Schwarzenberger	dc3d17b171	update declutter	2020-11-19 12:30:27 +13:00
Jason Schwarzenberger	539350a83d	port separation.	2020-11-18 17:21:37 +13:00