Ignore dead and political stories

Fix Better HN api content extraction
Add Better HN as an API backup
2025-05-27 18:47:17 +00:00 · 2025-02-01 22:39:13 +00:00 · 2025-02-01 21:42:06 +00:00 · 2025-02-01 20:31:35 +00:00 · 2024-03-16 20:41:24 +00:00 · 2024-03-08 03:08:18 +00:00
40 changed files with 1716 additions and 753 deletions
--- a/README.md
+++ b/README.md
@@ -35,7 +35,7 @@ $ source env/bin/activate
 (env) $ pip install -r requirements.txt
 ```

-Configure Praw for your Reddit account:
+Configure Praw for your Reddit account (optional):

 * Go to https://www.reddit.com/prefs/apps
 * Click "Create app"
@@ -44,16 +44,14 @@ Configure Praw for your Reddit account:
 * Description: blank
 * About URL: blank
 * Redirect URL: your GitHub profile
-* Submit, copy the client ID and client secret into `praw.ini`:
+* Submit, copy the client ID and client secret into `settings.py` below

 ```text
-(env) $ vim praw.ini
-[bot]
-client_id=paste here
-client_secret=paste here
-user_agent=script by github/your-username-here
+(env) $ vim settings.py.example
 ```

+Edit it and save it as `settings.py`.
+
 Now you can run the server:

 ```text
--- a/apiserver/.gitignore
+++ b/apiserver/.gitignore
@@ -105,7 +105,9 @@ ENV/
 # DB
 db.sqlite3

-praw.ini
+settings.py
 data.db
 data.db.bak
 data/archive/*
+data/backup/*
+qotnews.sqlite
--- a/apiserver/archive.py
+++ b/apiserver/archive.py
@@ -1,52 +0,0 @@
-from whoosh.analysis import StemmingAnalyzer, CharsetFilter, NgramFilter
-from whoosh.index import create_in, open_dir, exists_in
-from whoosh.fields import *
-from whoosh.qparser import QueryParser
-from whoosh.support.charset import accent_map
-
-analyzer = StemmingAnalyzer() | CharsetFilter(accent_map) | NgramFilter(minsize=3)
-
-title_field = TEXT(analyzer=analyzer, stored=True)
-id_field = ID(unique=True, stored=True)
-
-schema = Schema(
-        id=id_field,
-        title=title_field,
-        story=STORED,
-        )
-
-ARCHIVE_LOCATION = 'data/archive'
-
-ix = None
-
-def init():
-    global ix
-
-    if exists_in(ARCHIVE_LOCATION):
-        ix = open_dir(ARCHIVE_LOCATION)
-    else:
-        ix = create_in(ARCHIVE_LOCATION, schema)
-
-def update(story):
-    writer = ix.writer()
-    writer.update_document(
-            id=story['id'],
-            title=story['title'],
-            story=story,
-            )
-    writer.commit()
-
-def get_story(sid):
-    with ix.searcher() as searcher:
-        result = searcher.document(id=sid)
-        return result['story'] if result else None
-
-def search(search):
-    with ix.searcher() as searcher:
-        query = QueryParser('title', ix.schema).parse(search)
-        results = searcher.search(query)
-        stories = [r['story'] for r in results]
-        for s in stories:
-            s.pop('text', '')
-            s.pop('comments', '')
-        return stories
--- a/apiserver/data/archive/.gitkeep
+++ b/apiserver/data/archive/.gitkeep
--- a/apiserver/database.py
+++ b/apiserver/database.py
@@ -0,0 +1,122 @@
+import json
+
+from sqlalchemy import create_engine, Column, String, ForeignKey, Integer
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import sessionmaker
+from sqlalchemy.exc import IntegrityError
+
+engine = create_engine('sqlite:///data/qotnews.sqlite', connect_args={'timeout': 360})
+Session = sessionmaker(bind=engine)
+
+Base = declarative_base()
+
+class Story(Base):
+    __tablename__ = 'stories'
+
+    sid = Column(String(16), primary_key=True)
+    ref = Column(String(16), unique=True)
+    meta_json = Column(String)
+    full_json = Column(String)
+    title = Column(String)
+
+class Reflist(Base):
+    __tablename__ = 'reflist'
+
+    rid = Column(Integer, primary_key=True)
+    ref = Column(String(16), unique=True)
+    sid = Column(String, ForeignKey('stories.sid'), unique=True)
+    source = Column(String(16))
+
+def init():
+    Base.metadata.create_all(engine)
+
+def get_story(sid):
+    session = Session()
+    return session.query(Story).get(sid)
+
+def put_story(story):
+    story = story.copy()
+    full_json = json.dumps(story)
+
+    story.pop('text', None)
+    story.pop('comments', None)
+    meta_json = json.dumps(story)
+
+    try:
+        session = Session()
+        s = Story(
+            sid=story['id'],
+            ref=story['ref'],
+            full_json=full_json,
+            meta_json=meta_json,
+            title=story.get('title', None),
+        )
+        session.merge(s)
+        session.commit()
+    except:
+        session.rollback()
+        raise
+    finally:
+        session.close() 
+
+def get_story_by_ref(ref):
+    session = Session()
+    return session.query(Story).filter(Story.ref==ref).first()
+
+def get_reflist(amount):
+    session = Session()
+    q = session.query(Reflist).order_by(Reflist.rid.desc()).limit(amount)
+    return [dict(ref=x.ref, sid=x.sid, source=x.source) for x in q.all()]
+
+def get_stories(amount, skip=0):
+    session = Session()
+    q = session.query(Reflist, Story.meta_json).\
+            order_by(Reflist.rid.desc()).\
+            join(Story).\
+            filter(Story.title != None).\
+            offset(skip).\
+            limit(amount)
+    return [x[1] for x in q]
+
+def put_ref(ref, sid, source):
+    try:
+        session = Session()
+        r = Reflist(ref=ref, sid=sid, source=source)
+        session.add(r)
+        session.commit()
+    except:
+        session.rollback()
+        raise
+    finally:
+        session.close()
+
+def del_ref(ref):
+    try:
+        session = Session()
+        session.query(Reflist).filter(Reflist.ref==ref).delete()
+        session.commit()
+    except:
+        session.rollback()
+        raise
+    finally:
+        session.close()
+
+def count_stories():
+    try:
+        session = Session()
+        return session.query(Story).count()
+    finally:
+        session.close()
+
+def get_story_list():
+    try:
+        session = Session()
+        return session.query(Story.sid).all()
+    finally:
+        session.close()
+
+if __name__ == '__main__':
+    init()
+
+    #print(get_story_by_ref('hgi3sy'))
+    print(len(get_reflist(99999)))
--- a/apiserver/feed.py
+++ b/apiserver/feed.py
@@ -7,47 +7,40 @@ import requests
 import time
 from bs4 import BeautifulSoup

-from feeds import hackernews, reddit, tildes, manual
+import settings
+from feeds import hackernews, reddit, tildes, manual, lobsters
+import utils

-OUTLINE_API = 'https://outlineapi.com/article'
-ARCHIVE_API = 'https://archive.fo/submit/'
-READ_API = 'http://127.0.0.1:33843'
-
-INVALID_FILES = ['.pdf', '.png', '.jpg', '.gif']
-INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com']
+INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com', 'sec.gov']
 TWO_DAYS = 60*60*24*2

 def list():
    feed = []
-    feed += [(x, 'hackernews') for x in hackernews.feed()[:10]]
-    feed += [(x, 'reddit') for x in reddit.feed()[:10]]
-    feed += [(x, 'tildes') for x in tildes.feed()[:5]]
+    if settings.NUM_HACKERNEWS:
+        feed += [(x, 'hackernews') for x in hackernews.feed()[:settings.NUM_HACKERNEWS]]
+
+    if settings.NUM_LOBSTERS:
+        feed += [(x, 'lobsters') for x in lobsters.feed()[:settings.NUM_LOBSTERS]]
+
+    if settings.NUM_REDDIT:
+        feed += [(x, 'reddit') for x in reddit.feed()[:settings.NUM_REDDIT]]
+
+    if settings.NUM_TILDES:
+        feed += [(x, 'tildes') for x in tildes.feed()[:settings.NUM_TILDES]]
+
    return feed

 def get_article(url):
-    try:
-        params = {'source_url': url}
-        headers = {'Referer': 'https://outline.com/'}
-        r = requests.get(OUTLINE_API, params=params, headers=headers, timeout=20)
-        if r.status_code == 429:
-            logging.info('Rate limited by outline, sleeping 30s and skipping...')
-            time.sleep(30)
+    if not settings.READER_URL:
+        logging.info('Readerserver not configured, aborting.')
        return ''
-        if r.status_code != 200:
-            raise Exception('Bad response code ' + str(r.status_code))
-        html = r.json()['data']['html']
-        if 'URL is not supported by Outline' in html:
-            raise Exception('URL not supported by Outline')
-        return html
-    except KeyboardInterrupt:
-        raise
-    except BaseException as e:
-        logging.error('Problem outlining article: {}'.format(str(e)))

-    logging.info('Trying our server instead...')
+    if url.startswith('https://twitter.com'):
+        logging.info('Replacing twitter.com url with nitter.net')
+        url = url.replace('twitter.com', 'nitter.net')

    try:
-        r = requests.post(READ_API, data=dict(url=url), timeout=10)
+        r = requests.post(settings.READER_URL, data=dict(url=url), timeout=20)
        if r.status_code != 200:
            raise Exception('Bad response code ' + str(r.status_code))
        return r.text
@@ -57,58 +50,72 @@ def get_article(url):
        logging.error('Problem getting article: {}'.format(str(e)))
        return ''

-def get_first_image(text):
-    soup = BeautifulSoup(text, features='html.parser')
-
+def get_content_type(url):
    try:
-        first_img = soup.find('img')
-        url = first_img['src']
-        headers = {'User-Agent': 'Twitterbot/1.0'}
-        length = requests.get(url, headers=headers).headers['Content-length']
-        if int(length) > 1000000: raise
-        return url
+        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'}
+        return requests.get(url, headers=headers, timeout=5).headers['content-type']
    except:
        return ''

+    try:
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
+            'X-Forwarded-For': '66.249.66.1',
+        }
+        return requests.get(url, headers=headers, timeout=10).headers['content-type']
+    except:
+        pass
+
 def update_story(story, is_manual=False):
    res = {}

-    logging.info('Updating story ' + str(story['ref']))
-
+    try:
        if story['source'] == 'hackernews':
            res = hackernews.story(story['ref'])
+        elif story['source'] == 'lobsters':
+            res = lobsters.story(story['ref'])
        elif story['source'] == 'reddit':
            res = reddit.story(story['ref'])
        elif story['source'] == 'tildes':
            res = tildes.story(story['ref'])
        elif story['source'] == 'manual':
            res = manual.story(story['ref'])
+    except BaseException as e:
+        utils.alert_tanner('Problem updating {} story, ref {}: {}'.format(story['source'], story['ref'], str(e)))
+        logging.exception(e)
+        return False

    if res:
        story.update(res) # join dicts
    else:
-        logging.info('Article not ready yet')
+        logging.info('Story not ready yet')
        return False

    if story['date'] and not is_manual and story['date'] + TWO_DAYS < time.time():
-        logging.info('Article too old, removing')
+        logging.info('Story too old, removing. Date: {}'.format(story['date']))
        return False

    if story.get('url', '') and not story.get('text', ''):
-        if any([story['url'].endswith(ext) for ext in INVALID_FILES]):
-            logging.info('URL invalid file type')
+        if not get_content_type(story['url']).startswith('text/'):
+            logging.info('URL invalid file type / content type:')
+            logging.info(story['url'])
            return False

        if any([domain in story['url'] for domain in INVALID_DOMAINS]):
-            logging.info('URL invalid domain')
+            logging.info('URL invalid domain:')
+            logging.info(story['url'])
            return False

+        if 'trump' in story['title'].lower() or 'musk' in story['title'].lower():
+            logging.info('Trump / Musk story, skipping')
+            logging.info(story['url'])
+            return False
+
+
        logging.info('Getting article ' + story['url'])
        story['text'] = get_article(story['url'])
        if not story['text']: return False

-        story['img'] = get_first_image(story['text'])
-
    return True

 if __name__ == '__main__':
@@ -122,9 +129,7 @@ if __name__ == '__main__':

    #print(get_article('https://www.bloomberg.com/news/articles/2019-09-23/xi-s-communists-under-pressure-as-high-prices-hit-china-workers'))

-    a = get_article('https://blog.joinmastodon.org/2019/10/mastodon-3.0/')
+    a = get_content_type('https://tefkos.comminfo.rutgers.edu/Courses/e530/Readings/Beal%202008%20full%20text%20searching.pdf')
    print(a)
-    u = get_first_image(a)
-    print(u)

    print('done')
--- a/apiserver/feeds/hackernews.py
+++ b/apiserver/feeds/hackernews.py
@@ -12,7 +12,8 @@ import requests
 from utils import clean

 API_TOPSTORIES = lambda x: 'https://hacker-news.firebaseio.com/v0/topstories.json'
-API_ITEM = lambda x : 'https://hn.algolia.com/api/v1/items/{}'.format(x)
+ALG_API_ITEM = lambda x : 'https://hn.algolia.com/api/v1/items/{}'.format(x)
+BHN_API_ITEM = lambda x : 'https://api.hnpwa.com/v0/item/{}.json'.format(x)

 SITE_LINK = lambda x : 'https://news.ycombinator.com/item?id={}'.format(x)
 SITE_AUTHOR_LINK = lambda x : 'https://news.ycombinator.com/user?id={}'.format(x)
@@ -25,14 +26,24 @@ def api(route, ref=None):
        return r.json()
    except KeyboardInterrupt:
        raise
+    except BaseException as e:
+        logging.error('Problem hitting hackernews API: {}, trying again'.format(str(e)))
+
+    try:
+        r = requests.get(route(ref), timeout=15)
+        if r.status_code != 200:
+            raise Exception('Bad response code ' + str(r.status_code))
+        return r.json()
+    except KeyboardInterrupt:
+        raise
    except BaseException as e:
        logging.error('Problem hitting hackernews API: {}'.format(str(e)))
        return False

 def feed():
-    return api(API_TOPSTORIES) or []
+    return [str(x) for x in api(API_TOPSTORIES) or []]

-def comment(i):
+def alg_comment(i):
    if 'author' not in i:
        return False

@@ -41,21 +52,25 @@ def comment(i):
    c['score'] = i.get('points', 0)
    c['date'] = i.get('created_at_i', 0)
    c['text'] = clean(i.get('text', '') or '')
-    c['comments'] = [comment(j) for j in i['children']]
+    c['comments'] = [alg_comment(j) for j in i['children']]
    c['comments'] = list(filter(bool, c['comments']))
    return c

-def comment_count(i):
+def alg_comment_count(i):
    alive = 1 if i['author'] else 0
-    return sum([comment_count(c) for c in i['comments']]) + alive
+    return sum([alg_comment_count(c) for c in i['comments']]) + alive

-def story(ref):
-    r = api(API_ITEM, ref)
-    if not r: return False
+def alg_story(ref):
+    r = api(ALG_API_ITEM, ref)
+    if not r:
+        logging.info('Bad Algolia Hackernews API response.')
+        return None

    if 'deleted' in r:
+        logging.info('Story was deleted.')
        return False
    elif r.get('type', '') != 'story':
+        logging.info('Type "{}" is not "story".'.format(r.get('type', '')))
        return False

    s = {}
@@ -66,17 +81,85 @@ def story(ref):
    s['title'] = r.get('title', '')
    s['link'] = SITE_LINK(ref)
    s['url'] = r.get('url', '')
-    s['comments'] = [comment(i) for i in r['children']]
+    s['comments'] = [alg_comment(i) for i in r['children']]
    s['comments'] = list(filter(bool, s['comments']))
-    s['num_comments'] = comment_count(s) - 1
+    s['num_comments'] = alg_comment_count(s) - 1

    if 'text' in r and r['text']:
        s['text'] = clean(r['text'] or '')

    return s

+def bhn_comment(i):
+    if 'user' not in i:
+        return False
+
+    c = {}
+    c['author'] = i.get('user', '')
+    c['score'] = 0   # Not present?
+    c['date'] = i.get('time', 0)
+    c['text'] = clean(i.get('content', '') or '')
+    c['comments'] = [bhn_comment(j) for j in i['comments']]
+    c['comments'] = list(filter(bool, c['comments']))
+    return c
+
+def bhn_story(ref):
+    r = api(BHN_API_ITEM, ref)
+    if not r:
+        logging.info('Bad BetterHN Hackernews API response.')
+        return None
+
+    if 'deleted' in r:   # TODO: verify
+        logging.info('Story was deleted.')
+        return False
+    elif r.get('dead', False):
+        logging.info('Story was deleted.')
+        return False
+    elif r.get('type', '') != 'link':
+        logging.info('Type "{}" is not "link".'.format(r.get('type', '')))
+        return False
+
+    s = {}
+    s['author'] = r.get('user', '')
+    s['author_link'] = SITE_AUTHOR_LINK(r.get('user', ''))
+    s['score'] = r.get('points', 0)
+    s['date'] = r.get('time', 0)
+    s['title'] = r.get('title', '')
+    s['link'] = SITE_LINK(ref)
+    s['url'] = r.get('url', '')
+    if s['url'].startswith('item'):
+        s['url'] = SITE_LINK(ref)
+    s['comments'] = [bhn_comment(i) for i in r['comments']]
+    s['comments'] = list(filter(bool, s['comments']))
+    s['num_comments'] = r.get('comments_count', 0)
+
+    if 'content' in r and r['content']:
+        s['text'] = clean(r['content'] or '')
+
+    return s
+
+def story(ref):
+    s = alg_story(ref)
+    if s is None:
+        s = bhn_story(ref)
+    if not s:
+        return False
+
+
+    if s['score'] < 25 and s['num_comments'] < 10:
+        logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
+        return False
+
+    return s
+
 # scratchpad so I can quickly develop the parser
 if __name__ == '__main__':
-    #print(feed())
+    print(feed())
    #print(story(20763961))
-    print(story(20802050))
+    #print(story(20802050))
+
+    #print(story(42899834))   # type "job"
+    #print(story(42900076))   # Ask HN
+    #print(story(42898201))   # Show HN
+    #print(story(42899703))   # normal
+    print(story(42902678))   # bad title?
--- a/apiserver/feeds/lobsters.py
+++ b/apiserver/feeds/lobsters.py
@@ -0,0 +1,120 @@
+import logging
+logging.basicConfig(
+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+        level=logging.DEBUG)
+
+if __name__ == '__main__':
+    import sys
+    sys.path.insert(0,'.')
+
+import requests
+from datetime import datetime
+
+from utils import clean
+
+API_HOTTEST = lambda x: 'https://lobste.rs/hottest.json'
+API_ITEM = lambda x : 'https://lobste.rs/s/{}.json'.format(x)
+
+SITE_LINK = lambda x : 'https://lobste.rs/s/{}'.format(x)
+SITE_AUTHOR_LINK = lambda x : 'https://lobste.rs/u/{}'.format(x)
+
+def api(route, ref=None):
+    try:
+        r = requests.get(route(ref), timeout=5)
+        if r.status_code != 200:
+            raise Exception('Bad response code ' + str(r.status_code))
+        return r.json()
+    except KeyboardInterrupt:
+        raise
+    except BaseException as e:
+        logging.error('Problem hitting lobsters API: {}, trying again'.format(str(e)))
+
+    try:
+        r = requests.get(route(ref), timeout=15)
+        if r.status_code != 200:
+            raise Exception('Bad response code ' + str(r.status_code))
+        return r.json()
+    except KeyboardInterrupt:
+        raise
+    except BaseException as e:
+        logging.error('Problem hitting lobsters API: {}'.format(str(e)))
+        return False
+
+def feed():
+    return [x['short_id'] for x in api(API_HOTTEST) or []]
+
+def unix(date_str):
+    date_str = date_str.replace(':', '')
+    return int(datetime.strptime(date_str, '%Y-%m-%dT%H%M%S.%f%z').timestamp())
+
+def make_comment(i):
+    c = {}
+    try:
+        c['author'] = i['commenting_user']
+    except KeyError:
+        c['author'] = ''
+    c['score'] = i.get('score', 0)
+    try:
+        c['date'] = unix(i['created_at'])
+    except KeyError:
+        c['date'] = 0
+    c['text'] = clean(i.get('comment', '') or '')
+    c['comments'] = []
+    return c
+
+def iter_comments(flat_comments):
+    nested_comments = []
+    parent_stack = []
+    for comment in flat_comments:
+        c = make_comment(comment)
+        indent = comment['depth']
+
+        if indent == 0:
+            nested_comments.append(c)
+            parent_stack = [c]
+        else:
+            parent_stack = parent_stack[:indent]
+            p = parent_stack[-1]
+            p['comments'].append(c)
+            parent_stack.append(c)
+    return nested_comments
+
+def story(ref):
+    r = api(API_ITEM, ref)
+    if not r:
+        logging.info('Bad Lobsters API response.')
+        return False
+
+    s = {}
+    try:
+        s['author'] = r['submitter_user']
+        s['author_link'] = SITE_AUTHOR_LINK(s['author'])
+    except KeyError:
+        s['author'] = ''
+        s['author_link'] = ''
+    s['score'] = r.get('score', 0)
+    try:
+        s['date'] = unix(r['created_at'])
+    except KeyError:
+        s['date'] = 0
+    s['title'] = r.get('title', '')
+    s['link'] = SITE_LINK(ref)
+    s['url'] = r.get('url', '')
+    s['comments'] = iter_comments(r['comments'])
+    s['num_comments'] = r['comment_count']
+
+    if s['score'] < 15 and s['num_comments'] < 10:
+        logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
+        return False
+
+    if 'description' in r and r['description']:
+        s['text'] = clean(r['description'] or '')
+
+    return s
+
+# scratchpad so I can quickly develop the parser
+if __name__ == '__main__':
+    #print(feed())
+    import json
+    print(json.dumps(story('fzvd1v'), indent=4))
+    #print(json.dumps(story('ixyv5u'), indent=4))
--- a/apiserver/feeds/manual.py
+++ b/apiserver/feeds/manual.py
@@ -7,12 +7,15 @@ import requests
 import time
 from bs4 import BeautifulSoup

-USER_AGENT = 'Twitterbot/1.0'
+USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'

 def api(route):
    try:
-        headers = {'User-Agent': USER_AGENT}
-        r = requests.get(route, headers=headers, timeout=5)
+        headers = {
+            'User-Agent': USER_AGENT,
+            'X-Forwarded-For': '66.249.66.1',
+        }
+        r = requests.get(route, headers=headers, timeout=10)
        if r.status_code != 200:
            raise Exception('Bad response code ' + str(r.status_code))
        return r.text
@@ -24,7 +27,9 @@ def api(route):

 def story(ref):
    html = api(ref)
-    if not html: return False
+    if not html:
+        logging.info('Bad http GET response.')
+        return False

    soup = BeautifulSoup(html, features='html.parser')

@@ -33,7 +38,7 @@ def story(ref):
    s['author_link'] = 'https://news.t0.vc'
    s['score'] = 0
    s['date'] = int(time.time())
-    s['title'] = str(soup.title.string)
+    s['title'] = str(soup.title.string) if soup.title else ref
    s['link'] = ref
    s['url'] = ref
    s['comments'] = []
--- a/apiserver/feeds/reddit.py
+++ b/apiserver/feeds/reddit.py
@@ -12,25 +12,28 @@ from praw.exceptions import PRAWException
 from praw.models import MoreComments
 from prawcore.exceptions import PrawcoreException

+import settings
 from utils import render_md, clean

-SUBREDDITS = 'Economics+Foodforthought+TrueReddit+business+technology+privacy'
-
-SITE_LINK = lambda x : 'https://old.reddit.com/{}'.format(x)
+SITE_LINK = lambda x : 'https://old.reddit.com{}'.format(x)
 SITE_AUTHOR_LINK = lambda x : 'https://old.reddit.com/u/{}'.format(x)

-reddit = praw.Reddit('bot')
+if settings.NUM_REDDIT:
+    reddit = praw.Reddit(
+        client_id=settings.REDDIT_CLIENT_ID,
+        client_secret=settings.REDDIT_CLIENT_SECRET,
+        user_agent=settings.REDDIT_USER_AGENT,
+    )
+
+    subs = '+'.join(settings.SUBREDDITS)

 def feed():
    try:
-        return [x.id for x in reddit.subreddit(SUBREDDITS).hot()]
+        return [x.id for x in reddit.subreddit(subs).hot()]
    except KeyboardInterrupt:
        raise
-    except PRAWException as e:
-        logging.error('Problem hitting reddit API: {}'.format(str(e)))
-        return []
-    except PrawcoreException as e:
-        logging.error('Problem hitting reddit API: {}'.format(str(e)))
+    except BaseException as e:
+        logging.critical('Problem hitting reddit API: {}'.format(str(e)))
        return []

 def comment(i):
@@ -53,7 +56,9 @@ def comment(i):
 def story(ref):
    try:
        r = reddit.submission(ref)
-        if not r: return False
+        if not r:
+            logging.info('Bad Reddit API response.')
+            return False

        s = {}
        s['author'] = r.author.name if r.author else '[Deleted]'
@@ -67,6 +72,10 @@ def story(ref):
        s['comments'] = list(filter(bool, s['comments']))
        s['num_comments'] = r.num_comments

+        if s['score'] < 25 and s['num_comments'] < 10:
+            logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
+            return False
+
        if r.selftext:
            s['text'] = render_md(clean(r.selftext))

@@ -75,10 +84,10 @@ def story(ref):
    except KeyboardInterrupt:
        raise
    except PRAWException as e:
-        logging.error('Problem hitting reddit API: {}'.format(str(e)))
+        logging.critical('Problem hitting reddit API: {}'.format(str(e)))
        return False
    except PrawcoreException as e:
-        logging.error('Problem hitting reddit API: {}'.format(str(e)))
+        logging.critical('Problem hitting reddit API: {}'.format(str(e)))
        return False

 # scratchpad so I can quickly develop the parser
--- a/apiserver/feeds/tildes.py
+++ b/apiserver/feeds/tildes.py
@@ -34,7 +34,7 @@ def api(route):
    except KeyboardInterrupt:
        raise
    except BaseException as e:
-        logging.error('Problem hitting tildes website: {}'.format(str(e)))
+        logging.critical('Problem hitting tildes website: {}'.format(str(e)))
        return False

 def feed():
@@ -71,11 +71,15 @@ def story(ref):
        html = api(SITE_LINK(group_lookup[ref], ref))
    else:
        html = api(API_ITEM(ref))
-    if not html: return False
+    if not html:
+        logging.info('Bad Tildes API response.')
+        return False

    soup = BeautifulSoup(html, features='html.parser')
    a = soup.find('article', class_='topic-full')
-    if a is None: return False
+    if a is None:
+        logging.info('Tildes <article> element not found.')
+        return False

    h = a.find('header')
    lu = h.find('a', class_='link-user')
@@ -83,6 +87,7 @@ def story(ref):
    error = a.find('div', class_='text-error')
    if error:
        if 'deleted' in error.string or 'removed' in error.string:
+            logging.info('Article was deleted or removed.')
            return False

    s = {}
@@ -91,7 +96,7 @@ def story(ref):
    s['score'] = int(h.find('span', class_='topic-voting-votes').string)
    s['date'] = unix(h.find('time')['datetime'])
    s['title'] = str(h.h1.string)
-    s['group'] = str(soup.find('a', class_='site-header-context').string)
+    s['group'] = str(soup.find('div', class_='site-header-context').a.string)
    group_lookup[ref] = s['group']
    s['link'] = SITE_LINK(s['group'], ref)
    ud = a.find('div', class_='topic-full-link')
@@ -102,7 +107,21 @@ def story(ref):
    ch = a.find('header', class_='topic-comments-header')
    s['num_comments'] = int(ch.h2.string.split(' ')[0]) if ch else 0

-    if s['score'] < 8 and s['num_comments'] < 6:
+    if s['group'].split('.')[0] not in [
+        '~arts',
+        '~comp',
+        '~creative',
+        '~design',
+        '~engineering',
+        '~finance',
+        '~science',
+        '~tech',
+    ]:
+        logging.info('Group ({}) not in whitelist.'.format(s['group']))
+        return False
+
+    if s['score'] < 15 and s['num_comments'] < 10:
+        logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
        return False

    td = a.find('div', class_='topic-full-text')
@@ -113,7 +132,7 @@ def story(ref):

 # scratchpad so I can quickly develop the parser
 if __name__ == '__main__':
-    #print(feed())
+    print(feed())
    #normal = story('gxt')
    #print(normal)
    #no_comments = story('gxr')
@@ -122,8 +141,8 @@ if __name__ == '__main__':
    #print(self_post)
    #li_comment = story('gqx')
    #print(li_comment)
-    broken = story('l11')
-    print(broken)
+    #broken = story('q4y')
+    #print(broken)

    # make sure there's no self-reference
    #import copy
--- a/apiserver/migrate-shelve-to-whoosh.py
+++ b/apiserver/migrate-shelve-to-whoosh.py
@@ -1,26 +0,0 @@
-import shelve
-
-import archive
-
-archive.init()
-
-#with shelve.open('data/data') as db:
-#    to_delete = []
-#
-#    for s in db.values():
-#        if 'title' in s:
-#            archive.update(s)
-#        if 'id' in s:
-#            to_delete.append(s['id'])
-#
-#    for id in to_delete:
-#        del db[id]
-#
-#    for s in db['news_cache'].values():
-#        if 'title' in s:
-#            archive.update(s)
-
-#with shelve.open('data/whoosh') as db:
-#    for s in db['news_cache'].values():
-#        if 'title' in s and not archive.get_story(s['id']):
-#            archive.update(s)
--- a/apiserver/praw.ini.example
+++ b/apiserver/praw.ini.example
@@ -1,4 +0,0 @@
-[bot]
-client_id=
-client_secret=
-user_agent=
--- a/apiserver/requirements.txt
+++ b/apiserver/requirements.txt
@@ -1,22 +1,29 @@
-beautifulsoup4==4.8.1
-certifi==2019.9.11
+beautifulsoup4==4.9.1
+bleach==3.1.5
+certifi==2020.6.20
 chardet==3.0.4
-Click==7.0
+click==7.1.2
 commonmark==0.9.1
-Flask==1.1.1
+Flask==1.1.2
 Flask-Cors==3.0.8
-idna==2.8
+gevent==20.6.2
+greenlet==0.4.16
+idna==2.10
 itsdangerous==1.1.0
-Jinja2==2.10.3
+Jinja2==2.11.2
 MarkupSafe==1.1.1
-pkg-resources==0.0.0
+packaging==20.4
 praw==6.4.0
-prawcore==1.0.1
-requests==2.22.0
-six==1.12.0
-soupsieve==1.9.4
-update-checker==0.16
-urllib3==1.25.6
-websocket-client==0.56.0
-Werkzeug==0.16.0
-Whoosh==2.7.4
+prawcore==1.4.0
+pyparsing==2.4.7
+requests==2.24.0
+six==1.15.0
+soupsieve==2.0.1
+SQLAlchemy==1.3.18
+update-checker==0.17
+urllib3==1.25.9
+webencodings==0.5.1
+websocket-client==0.57.0
+Werkzeug==1.0.1
+zope.event==4.4
+zope.interface==5.1.0
--- a/apiserver/scripts/delete-story.py
+++ b/apiserver/scripts/delete-story.py
@@ -0,0 +1,52 @@
+import database
+import search
+import sys
+import settings
+import logging
+
+import json
+import requests
+
+database.init()
+search.init()
+
+def database_del_story(sid):
+    try:
+        session = database.Session()
+        session.query(database.Story).filter(database.Story.sid==sid).delete()
+        session.commit()
+    except:
+        session.rollback()
+        raise
+    finally:
+        session.close()
+
+def search_del_story(sid):
+    try:
+        r = requests.delete(settings.MEILI_URL + 'indexes/qotnews/documents/'+sid, timeout=2)
+        if r.status_code != 202:
+            raise Exception('Bad response code ' + str(r.status_code))
+        return r.json()
+    except KeyboardInterrupt:
+        raise
+    except BaseException as e:
+        logging.error('Problem deleting MeiliSearch story: {}'.format(str(e)))
+        return False
+
+if __name__ == '__main__':
+    if len(sys.argv) == 2:
+        sid = sys.argv[1]
+    else:
+        print('Usage: python delete-story.py [story id]')
+        exit(1)
+
+    story = database.get_story(sid)
+
+    if story:
+        print('Deleting story:')
+        print(story.title)
+        database_del_story(sid)
+        search_del_story(sid)
+        database.del_ref(story.ref)
+    else:
+        print('Story not found. Exiting.')
--- a/apiserver/scripts/fix-stories.py
+++ b/apiserver/scripts/fix-stories.py
@@ -0,0 +1,58 @@
+import time
+import json
+import logging
+
+import feed
+import database
+import search
+
+database.init()
+
+def fix_gzip_bug(story_list):
+    FIX_THRESHOLD = 150
+
+    count = 1
+    for sid in story_list:
+        try:
+            sid = sid[0]
+            story = database.get_story(sid)
+            full_json = json.loads(story.full_json)
+            meta_json = json.loads(story.meta_json)
+            text = full_json.get('text', '')
+
+            count = text.count('<EFBFBD>')
+            if not count: continue
+
+            ratio = count / len(text) * 1000
+            print('Bad story:', sid, 'Num ?:', count, 'Ratio:', ratio)
+            if ratio < FIX_THRESHOLD: continue
+
+            print('Attempting to fix...')
+
+            valid = feed.update_story(meta_json, is_manual=True)
+            if valid:
+                database.put_story(meta_json)
+                search.put_story(meta_json)
+                print('Success')
+            else:
+                print('Story was not valid')
+
+            time.sleep(3)
+
+        except KeyboardInterrupt:
+            raise
+        except BaseException as e:
+            logging.exception(e)
+            breakpoint()
+
+if __name__ == '__main__':
+    num_stories = database.count_stories()
+
+    print('Fix {} stories?'.format(num_stories))
+    print('Press ENTER to continue, ctrl-c to cancel')
+    input()
+
+    story_list = database.get_story_list()
+
+    fix_gzip_bug(story_list)
+
--- a/apiserver/scripts/reindex.py
+++ b/apiserver/scripts/reindex.py
@@ -0,0 +1,62 @@
+import logging
+logging.basicConfig(
+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+        level=logging.INFO)
+
+import database
+from sqlalchemy import select
+import search
+import sys
+
+import time
+import json
+import requests
+
+database.init()
+search.init()
+
+BATCH_SIZE = 5000
+
+def put_stories(stories):
+    return search.meili_api(requests.post, 'indexes/qotnews/documents', stories)
+
+def get_update(update_id):
+    return search.meili_api(requests.get, 'tasks/{}'.format(update_id))
+
+if __name__ == '__main__':
+    num_stories = database.count_stories()
+
+    print('Reindex {} stories?'.format(num_stories))
+    print('Press ENTER to continue, ctrl-c to cancel')
+    input()
+
+    story_list = database.get_story_list()
+
+    count = 1
+    while len(story_list):
+        stories = []
+
+        for _ in range(BATCH_SIZE):
+            try:
+                sid = story_list.pop()
+            except IndexError:
+                break
+
+            story = database.get_story(sid)
+            print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title))
+            story_obj = json.loads(story.meta_json)
+            stories.append(story_obj)
+            count += 1
+
+        res = put_stories(stories)
+        update_id = res['uid']
+
+        print('Waiting for processing', end='')
+        while get_update(update_id)['status'] != 'succeeded':
+            time.sleep(0.5)
+            print('.', end='', flush=True)
+
+        print()
+
+    print('Done.')
+
--- a/apiserver/scripts/tests.py
+++ b/apiserver/scripts/tests.py
@@ -0,0 +1,23 @@
+import time
+import requests
+
+def test_search_api():
+    num_tests = 100
+    total_time = 0
+
+    for i in range(num_tests):
+        start = time.time()
+
+        res = requests.get('http://127.0.0.1:33842/api/search?q=iphone')
+        res.raise_for_status()
+
+        duration = time.time() - start
+        total_time += duration
+
+    avg_time = total_time / num_tests
+
+    print('Average search time:', avg_time)
+
+
+if __name__ == '__main__':
+    test_search_api()
--- a/apiserver/search.py
+++ b/apiserver/search.py
@@ -0,0 +1,65 @@
+import logging
+logging.basicConfig(
+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+        level=logging.DEBUG)
+
+import requests
+import settings
+
+SEARCH_ENABLED = bool(settings.MEILI_URL)
+
+def meili_api(method, route, json=None, params=None, parse_json=True):
+    try:
+        r = method(settings.MEILI_URL + route, json=json, params=params, timeout=4)
+        if r.status_code > 299:
+            raise Exception('Bad response code ' + str(r.status_code))
+        if parse_json:
+            return r.json()
+        else:
+            r.encoding = 'utf-8'
+            return r.text
+    except KeyboardInterrupt:
+        raise
+    except BaseException as e:
+        logging.error('Problem with MeiliSearch api route: %s: %s', route, str(e))
+        return False
+
+def create_index():
+    json = dict(uid='qotnews', primaryKey='id')
+    return meili_api(requests.post, 'indexes', json=json)
+
+def update_rankings():
+    json = ['typo', 'words', 'proximity', 'date:desc', 'exactness']
+    return meili_api(requests.post, 'indexes/qotnews/settings/ranking-rules', json=json)
+
+def update_attributes():
+    json = ['title', 'url', 'author']
+    r = meili_api(requests.post, 'indexes/qotnews/settings/searchable-attributes', json=json)
+    json = ['id', 'ref', 'source', 'author', 'author_link', 'score', 'date', 'title', 'link', 'url', 'num_comments']
+    r = meili_api(requests.post, 'indexes/qotnews/settings/displayed-attributes', json=json)
+    return r
+
+def init():
+    if not SEARCH_ENABLED:
+        logging.info('Search is not enabled, skipping init.')
+        return
+    print(create_index())
+    update_rankings()
+    update_attributes()
+
+def put_story(story):
+    if not SEARCH_ENABLED: return
+    return meili_api(requests.post, 'indexes/qotnews/documents', [story])
+
+def search(q):
+    if not SEARCH_ENABLED: return []
+    params = dict(q=q, limit=settings.FEED_LENGTH)
+    r = meili_api(requests.get, 'indexes/qotnews/search', params=params, parse_json=False)
+    return r
+    
+if __name__ == '__main__':
+    init()
+
+    print(update_rankings())
+
+    print(search('facebook'))
--- a/apiserver/server.py
+++ b/apiserver/server.py
@@ -3,13 +3,21 @@ logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        level=logging.INFO)

+import gevent
+from gevent import monkey
+monkey.patch_all()
+from gevent.pywsgi import WSGIServer
+
 import copy
+import json
 import threading
+import traceback
 import time
-import shelve
 from urllib.parse import urlparse, parse_qs

-import archive
+import settings
+import database
+import search
 import feed
 from utils import gen_rand_id

@@ -17,81 +25,41 @@ from flask import abort, Flask, request, render_template, stream_with_context, R
 from werkzeug.exceptions import NotFound
 from flask_cors import CORS

-import gevent
-from gevent import monkey
-from gevent.pywsgi import WSGIServer
-
-monkey.patch_all()
-
-archive.init()
-
-CACHE_LENGTH = 150
-DATA_FILE = 'data/data'
+database.init()
+search.init()

 news_index = 0

-with shelve.open(DATA_FILE) as db:
-    logging.info('Reading caches from disk...')
-    news_list = db.get('news_list', [])
-    news_ref_to_id = db.get('news_ref_to_id', {})
-    news_cache = db.get('news_cache', {})
-
-    # clean cache if broken
-    try:
-        for ref in news_list:
-            nid = news_ref_to_id[ref]
-            _ = news_cache[nid]
-    except KeyError as e:
-        logging.error('Unable to find key {}. Trying to remove...'.format(str(e)))
-        news_list.remove(str(e))
-
-def get_story(sid):
-    if sid in news_cache:
-        return news_cache[sid]
-    else:
-        return archive.get_story(sid)
-
 def new_id():
    nid = gen_rand_id()
-    while nid in news_cache or archive.get_story(nid):
+    while database.get_story(nid):
        nid = gen_rand_id()
    return nid

-def remove_ref(old_ref):
-    while old_ref in news_list:
-        news_list.remove(old_ref)
-    old_story = news_cache.pop(news_ref_to_id[old_ref])
-    old_id = news_ref_to_id.pop(old_ref)
-    logging.info('Removed ref {} id {}.'.format(old_ref, old_id))
-
 build_folder = '../webclient/build'
 flask_app = Flask(__name__, template_folder=build_folder, static_folder=build_folder, static_url_path='')
 cors = CORS(flask_app)

@flask_app.route('/api')
 def api():
-    try:
-        front_page = [news_cache[news_ref_to_id[ref]] for ref in news_list]
-    except KeyError as e:
-        logging.error('Unable to find key {}. Trying to remove...'.format(str(e)))
-        news_list.remove(str(e))
-
-    front_page = [copy.copy(x) for x in front_page if 'title' in x and x['title']]
-    front_page = front_page[:60]
-    for story in front_page:
-        story.pop('text', None)
-        story.pop('comments', None)
-
-    return {'stories': front_page}
+    skip = request.args.get('skip', 0)
+    limit = request.args.get('limit', settings.FEED_LENGTH)
+    stories = database.get_stories(limit, skip)
+    # hacky nested json
+    res = Response('{"stories":[' + ','.join(stories) + ']}')
+    res.headers['content-type'] = 'application/json'
+    return res

@flask_app.route('/api/search', strict_slashes=False)
-def search():
-    search = request.args.get('q', '')
-    if len(search) >= 3:
-        res = archive.search(search)
+def apisearch():
+    q = request.args.get('q', '')
+    if len(q) >= 3:
+        results = search.search(q)
    else:
-        res = []
-    return {'results': res}
+        results = '[]'
+    res = Response(results)
+    res.headers['content-type'] = 'application/json'
+    return res

@flask_app.route('/api/submit', methods=['POST'], strict_slashes=False)
 def submit():
@@ -99,6 +67,8 @@ def submit():
        url = request.form['url']
        nid = new_id()

+        logging.info('Manual submission: ' + url)
+
        parse = urlparse(url)
        if 'news.ycombinator.com' in parse.hostname:
            source = 'hackernews'
@@ -106,40 +76,57 @@ def submit():
        elif 'tildes.net' in parse.hostname and '~' in url:
            source = 'tildes'
            ref = parse.path.split('/')[2]
+        elif 'lobste.rs' in parse.hostname and '/s/' in url:
+            source = 'lobsters'
+            ref = parse.path.split('/')[2]
        elif 'reddit.com' in parse.hostname and 'comments' in url:
            source = 'reddit'
            ref = parse.path.split('/')[4]
+        elif 'news.t0.vc' in parse.hostname:
+            raise Exception('Invalid article')
        else:
            source = 'manual'
            ref = url

-        news_story = dict(id=nid, ref=ref, source=source)
-        news_cache[nid] = news_story
-        valid = feed.update_story(news_story, is_manual=True)
+        existing = database.get_story_by_ref(ref)
+        if existing:
+            return {'nid': existing.sid}
+        else:
+            story = dict(id=nid, ref=ref, source=source)
+            valid = feed.update_story(story, is_manual=True)
            if valid:
-            archive.update(news_story)
+                database.put_story(story)
+                search.put_story(story)
                return {'nid': nid}
            else:
-            news_cache.pop(nid, '')
                raise Exception('Invalid article')

    except BaseException as e:
        logging.error('Problem with article submission: {} - {}'.format(e.__class__.__name__, str(e)))
+        print(traceback.format_exc())
        abort(400)


@flask_app.route('/api/<sid>')
 def story(sid):
-    story = get_story(sid)
-    return dict(story=story) if story else abort(404)
+    story = database.get_story(sid)
+    if story:
+        # hacky nested json
+        res = Response('{"story":' + story.full_json + '}')
+        res.headers['content-type'] = 'application/json'
+        return res
+    else:
+        return abort(404)

@flask_app.route('/')
@flask_app.route('/search')
 def index():
    return render_template('index.html',
-            title='Feed',
+        title='QotNews',
        url='news.t0.vc',
-            description='Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode')
+        description='Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode',
+        robots='index',
+    )

@flask_app.route('/<sid>', strict_slashes=False)
@flask_app.route('/<sid>/c', strict_slashes=False)
@@ -149,8 +136,9 @@ def static_story(sid):
    except NotFound:
        pass

-    story = get_story(sid)
+    story = database.get_story(sid)
    if not story: return abort(404)
+    story = json.loads(story.full_json)

    score = story['score']
    num_comments = story['num_comments']
@@ -163,9 +151,11 @@ def static_story(sid):
    url = url.replace('www.', '')

    return render_template('index.html',
-            title=story['title'],
+        title=story['title'] + ' | QotNews',
        url=url,
-            description=description)
+        description=description,
+        robots='noindex',
+    )

 http_server = WSGIServer(('', 33842), flask_app)

@@ -176,57 +166,57 @@ def feed_thread():
        while True:
            # onboard new stories
            if news_index == 0:
-                feed_list = feed.list()
-                new_items = [(ref, source) for ref, source in feed_list if ref not in news_list]
-                for ref, source in new_items:
-                    news_list.insert(0, ref)
+                for ref, source in feed.list():
+                    if database.get_story_by_ref(ref):
+                        continue
+                    try:
                        nid = new_id()
-                    news_ref_to_id[ref] = nid
-                    news_cache[nid] = dict(id=nid, ref=ref, source=source)
+                        logging.info('Adding ref: {}, id: {}, source: {}'.format(ref, nid, source))
+                        database.put_ref(ref, nid, source)
+                    except database.IntegrityError:
+                        logging.info('Already have ID / ref, skipping.')
+                        continue

-                if len(new_items):
-                    logging.info('Added {} new refs.'.format(len(new_items)))
-
-                # drop old ones
-                while len(news_list) > CACHE_LENGTH:
-                    old_ref = news_list[-1]
-                    remove_ref(old_ref)
+            ref_list = database.get_reflist(settings.FEED_LENGTH)

            # update current stories
-            if news_index < len(news_list):
-                update_ref = news_list[news_index]
-                update_id = news_ref_to_id[update_ref]
-                news_story = news_cache[update_id]
-                valid = feed.update_story(news_story)
+            if news_index < len(ref_list):
+                item = ref_list[news_index]
+
+                try:
+                    story_json = database.get_story(item['sid']).full_json
+                    story = json.loads(story_json)
+                except AttributeError:
+                    story = dict(id=item['sid'], ref=item['ref'], source=item['source'])
+
+                logging.info('Updating {} story: {}, index: {}'.format(story['source'], story['ref'], news_index))
+
+                valid = feed.update_story(story)
                if valid:
-                    archive.update(news_story)
+                    database.put_story(story)
+                    search.put_story(story)
                else:
-                    remove_ref(update_ref)
+                    database.del_ref(item['ref'])
+                    logging.info('Removed ref {}'.format(item['ref']))
            else:
-                logging.info('Skipping update - no story #' + str(news_index+1))
+                logging.info('Skipping index: ' + str(news_index))

            gevent.sleep(6)

            news_index += 1
-            if news_index == CACHE_LENGTH: news_index = 0
+            if news_index == settings.FEED_LENGTH: news_index = 0

    except KeyboardInterrupt:
        logging.info('Ending feed thread...')
    except ValueError as e:
-        logging.error('feed_thread error: {} {}'.format(e.__class__.__name__, e))
+        logging.critical('feed_thread error: {} {}'.format(e.__class__.__name__, e))
        http_server.stop()

-print('Starting Feed thread...')
+logging.info('Starting Feed thread...')
 gevent.spawn(feed_thread)

-print('Starting HTTP thread...')
+logging.info('Starting HTTP thread...')
 try:
    http_server.serve_forever()
 except KeyboardInterrupt:
    logging.info('Exiting...')
-finally:
-    with shelve.open(DATA_FILE) as db:
-        logging.info('Writing caches to disk...')
-        db['news_list'] = news_list
-        db['news_ref_to_id'] = news_ref_to_id
-        db['news_cache'] = news_cache
--- a/apiserver/settings.py.example
+++ b/apiserver/settings.py.example
@@ -0,0 +1,50 @@
+# QotNews settings
+# edit this file and save it as settings.py
+
+# Feed Lengths
+# Number of top items from each site to pull
+# set to 0 to disable that site
+FEED_LENGTH = 75
+NUM_HACKERNEWS = 15
+NUM_LOBSTERS = 10
+NUM_REDDIT = 15
+NUM_TILDES = 5
+
+# Meilisearch server URL
+# Leave blank if not using search
+#MEILI_URL = 'http://127.0.0.1:7700/'
+MEILI_URL = ''
+
+# Readerserver URL
+# Leave blank if not using, but that defeats the whole point
+READER_URL = 'http://127.0.0.1:33843/'
+
+# Reddit account info
+# leave blank if not using Reddit
+REDDIT_CLIENT_ID = ''
+REDDIT_CLIENT_SECRET = ''
+REDDIT_USER_AGENT = ''
+
+SUBREDDITS = [
+    'Economics',
+    'AcademicPhilosophy',
+    'DepthHub',
+    'Foodforthought',
+    'HistoryofIdeas',
+    'LaymanJournals',
+    'PhilosophyofScience',
+    'StateOfTheUnion',
+    'TheAgora',
+    'TrueReddit',
+    'culturalstudies',
+    'hardscience',
+    'indepthsports',
+    'indepthstories',
+    'ludology',
+    'neurophilosophy',
+    'resilientcommunities',
+    'worldevents',
+    'StallmanWasRight',
+    'EverythingScience',
+    'longevity',
+]
--- a/apiserver/utils.py
+++ b/apiserver/utils.py
@@ -8,6 +8,14 @@ import string

 from bleach.sanitizer import Cleaner

+def alert_tanner(message):
+    try:
+        logging.info('Alerting Tanner: ' + message)
+        params = dict(qotnews=message)
+        requests.get('https://tbot.tannercollin.com/message', params=params, timeout=4)
+    except BaseException as e:
+        logging.error('Problem alerting Tanner: ' + str(e))
+
 def gen_rand_id():
    return ''.join(random.choice(string.ascii_uppercase) for _ in range(4))

--- a/readerserver/main.js
+++ b/readerserver/main.js
@@ -4,7 +4,7 @@ const port = 33843;

 const request = require('request');
 const JSDOM = require('jsdom').JSDOM;
-const Readability = require('readability');
+const { Readability } = require('readability');

 app.use(express.urlencoded({ extended: true }));

@@ -35,8 +35,13 @@ app.post('/', (req, res) => {
 	const url = req.body.url;
 	const requestOptions = {
 		url: url,
+		gzip: true,
 		//headers: {'User-Agent': 'Googlebot/2.1 (+http://www.google.com/bot.html)'},
-		headers: {'User-Agent': 'Twitterbot/1.0'},
+		//headers: {'User-Agent': 'Twitterbot/1.0'},
+		headers: {
+			'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0',
+			'X-Forwarded-For': '66.249.66.1',
+		},
 	};

 	console.log('Parse request for:', url);
--- a/readerserver/yarn.lock
+++ b/readerserver/yarn.lock
--- a/searchserver/.gitignore
+++ b/searchserver/.gitignore
@@ -0,0 +1,6 @@
+# Editor
+*.swp
+*.swo
+
+meilisearch-linux-amd64
+data.ms/
--- a/searchserver/README.md
+++ b/searchserver/README.md
@@ -0,0 +1,14 @@
+# Qotnews Search Server
+
+Download MeiliSearch with:
+
+```
+wget https://github.com/meilisearch/meilisearch/releases/download/v0.27.0/meilisearch-linux-amd64
+chmod +x meilisearch-linux-amd64
+```
+
+Run with:
+
+```
+MEILI_NO_ANALYTICS=true ./meilisearch-linux-amd64
+```
--- a/webclient/public/index.html
+++ b/webclient/public/index.html
@@ -8,6 +8,8 @@
 			content="{{ description }}"
 		/>
 		<meta content="{{ url }}" name="og:site_name">
+		<meta name="robots" content="{{ robots }}">
+

 		<link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
 		<link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">
@@ -26,7 +28,7 @@
 			work correctly both with client-side routing and a non-root public URL.
 			Learn how to configure a non-root public URL by running `npm run build`.
 		-->
-	<title>{{ title }} - QotNews</title>
+		<title>{{ title }}</title>

 		<style>
 			html {
@@ -35,10 +37,26 @@
 			body {
 				background: #000;
 			}
+			.nojs {
+				color: white;
+				max-width: 32rem;
+			}
 		</style>
 	</head>
+
 	<body>
-    <noscript style="background: white">You need to enable JavaScript to run this app.</noscript>
+		<div class="nojs">
+			<noscript>
+				You need to enable JavaScript to run this app because it's written in React.
+				I was planning on writing a server-side version, but I've become distracted
+				by other projects -- sorry!
+				<br/>
+				I originally wrote this for myself, and of course I whitelist JavaScript on
+				all my own domains.
+				<br/><br/>
+				Alternatively, try activex.news.t0.vc for an ActiveX™ version.
+			</noscript>
+		</div>
 		<div id="root"></div>
 		<!--
 			This HTML file is a template.
--- a/webclient/src/App.js
+++ b/webclient/src/App.js
@@ -3,8 +3,10 @@ import { BrowserRouter as Router, Route, Link, Switch } from 'react-router-dom';
 import localForage from 'localforage';
 import './Style-light.css';
 import './Style-dark.css';
+import './Style-black.css';
+import './Style-red.css';
 import './fonts/Fonts.css';
-import { ForwardDot } from './utils.js';
+import { BackwardDot, ForwardDot } from './utils.js';
 import Feed from './Feed.js';
 import Article from './Article.js';
 import Comments from './Comments.js';
@@ -38,6 +40,16 @@ class App extends React.Component {
 		localStorage.setItem('theme', 'dark');
 	}

+	black() {
+		this.setState({ theme: 'black' });
+		localStorage.setItem('theme', 'black');
+	}
+
+	red() {
+		this.setState({ theme: 'red' });
+		localStorage.setItem('theme', 'red');
+	}
+
 	componentDidMount() {
 		if (!this.cache.length) {
 			localForage.iterate((value, key) => {
@@ -47,22 +59,61 @@ class App extends React.Component {
 		}
 	}

+	goFullScreen() {
+		if ('wakeLock' in navigator) {
+			navigator.wakeLock.request('screen');
+		}
+
+		document.body.requestFullscreen({ navigationUI: 'hide' }).then(() => {
+			window.addEventListener('resize', () => this.forceUpdate());
+			this.forceUpdate();
+		});
+	};
+
+	exitFullScreen() {
+		document.exitFullscreen().then(() => {
+			this.forceUpdate();
+		});
+	};
+
 	render() {
 		const theme = this.state.theme;
-		document.body.style.backgroundColor = theme === 'dark' ? '#000' : '#eeeeee';
+
+		if (theme === 'dark') {
+			document.body.style.backgroundColor = '#1a1a1a';
+		} else if (theme === 'black') {
+			document.body.style.backgroundColor = '#000';
+		} else if (theme === 'red') {
+			document.body.style.backgroundColor = '#000';
+		} else {
+			document.body.style.backgroundColor = '#eeeeee';
+		}
+
+		const fullScreenAvailable = document.fullscreenEnabled ||
+			document.mozFullscreenEnabled ||
+			document.webkitFullscreenEnabled ||
+			document.msFullscreenEnabled;

 		return (
 			<div className={theme}>
 				<Router>
 					<div className='container menu'>
 						<p>
-							<Link to='/'>QotNews - Feed</Link>
-							<span className='theme'>Theme: <a href='#' onClick={() => this.light()}>Light</a> - <a href='#' onClick={() => this.dark()}>Dark</a></span>
+							<Link to='/'>QotNews</Link>
+
+							<span className='theme'><a href='#' onClick={() => this.light()}>Light</a> - <a href='#' onClick={() => this.dark()}>Dark</a> - <a href='#' onClick={() => this.black()}>Black</a> - <a href='#' onClick={() => this.red()}>Red</a></span>
 							<br />
-							<span className='slogan'>Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode.</span>
+							<span className='slogan'>Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode.</span>
 						</p>
 						<Route path='/(|search)' component={Search} />
 						<Route path='/(|search)' component={Submit} />
+						{fullScreenAvailable &&
+							<Route path='/(|search)' render={() => !document.fullscreenElement ?
+								<button className='fullscreen' onClick={() => this.goFullScreen()}>Enter Fullscreen</button>
+							:
+								<button className='fullscreen' onClick={() => this.exitFullScreen()}>Exit Fullscreen</button>
+							} />
+						}
 					</div>

 					<Route path='/' exact render={(props) => <Feed {...props} updateCache={this.updateCache} />} />
@@ -72,6 +123,7 @@ class App extends React.Component {
 					</Switch>
 					<Route path='/:id/c' exact render={(props) => <Comments {...props} cache={this.cache} />} />

+					<BackwardDot />
 					<ForwardDot />

 					<ScrollToTop />
--- a/webclient/src/Article.js
+++ b/webclient/src/Article.js
@@ -15,6 +15,7 @@ class Article extends React.Component {
 		this.state = {
 			story: cache[id] || false,
 			error: false,
+			pConv: [],
 		};
 	}
 	
@@ -43,10 +44,22 @@ class Article extends React.Component {
 			);
 	}

+	pConvert = (n) => {
+		this.setState({ pConv: [...this.state.pConv, n]});
+	}
+
 	render() {
 		const id = this.props.match ? this.props.match.params.id : 'CLOL';
 		const story = this.state.story;
 		const error = this.state.error;
+		const pConv = this.state.pConv;
+		let nodes = null;
+
+		if (story.text) {
+			let div = document.createElement('div');
+			div.innerHTML = story.text;
+			nodes = div.childNodes;
+		}

 		return (
 			<div className='article-container'>
@@ -54,7 +67,8 @@ class Article extends React.Component {
 				{story ?
 					<div className='article'>
 						<Helmet>
-							<title>{story.title} - QotNews</title>
+							<title>{story.title} | QotNews</title>
+							<meta name="robots" content="noindex" />
 						</Helmet>

 						<h1>{story.title}</h1>
@@ -65,8 +79,24 @@ class Article extends React.Component {

 						{infoLine(story)}

-						{story.text ?
-							<div className='story-text' dangerouslySetInnerHTML={{ __html: story.text }} />
+						{nodes ?
+							<div className='story-text'>
+								{Object.entries(nodes).map(([k, v]) =>
+									pConv.includes(k) ?
+										v.innerHTML.split('\n\n').map(x =>
+											<p dangerouslySetInnerHTML={{ __html: x }} />
+										)
+									:
+										(v.nodeName === '#text' ?
+											<p>{v.data}</p>
+										:
+											<>
+												<v.localName dangerouslySetInnerHTML={v.innerHTML ? { __html: v.innerHTML } : null} />
+												{v.localName == 'pre' && <button onClick={() => this.pConvert(k)}>Convert Code to Paragraph</button>}
+											</>
+										)
+								)}
+							</div>
 						:
 							<p>Problem getting article :(</p>
 						}
--- a/webclient/src/Comments.js
+++ b/webclient/src/Comments.js
@@ -18,6 +18,8 @@ class Article extends React.Component {
 		this.state = {
 			story: cache[id] || false,
 			error: false,
+			collapsed: [],
+			expanded: [],
 		};
 	}

@@ -49,22 +51,54 @@ class Article extends React.Component {
 			);
 	}

+	collapseComment(cid) {
+		this.setState(prevState => ({
+			...prevState,
+			collapsed: [...prevState.collapsed, cid],
+			expanded: prevState.expanded.filter(x => x !== cid),
+		}));
+	}
+
+	expandComment(cid) {
+		this.setState(prevState => ({
+			...prevState,
+			collapsed: prevState.collapsed.filter(x => x !== cid),
+			expanded: [...prevState.expanded, cid],
+		}));
+	}
+
+	countComments(c) {
+		return c.comments.reduce((sum, x) => sum + this.countComments(x), 1);
+	}
+
 	displayComment(story, c, level) {
+		const cid = c.author+c.date;
+
+		const collapsed = this.state.collapsed.includes(cid);
+		const expanded = this.state.expanded.includes(cid);
+
+		const hidden = collapsed || (level == 4 && !expanded);
+		const hasChildren = c.comments.length !== 0;
+
 		return (
-			<div className={level ? 'comment lined' : 'comment'} key={c.author+c.date}>
+			<div className={level ? 'comment lined' : 'comment'} key={cid}>
 				<div className='info'>
 					<p>
 						{c.author === story.author ? '[OP]' : ''} {c.author || '[Deleted]'}
-						&#8203; | <HashLink to={'#'+c.author+c.date} id={c.author+c.date}>{moment.unix(c.date).fromNow()}</HashLink>
+						{' '} | <HashLink to={'#'+cid} id={cid}>{moment.unix(c.date).fromNow()}</HashLink>
+
+						{hidden || hasChildren &&
+							<span className='collapser pointer' onClick={() => this.collapseComment(cid)}>–</span>
+						}
 					</p>
 				</div>

-				<div className='text' dangerouslySetInnerHTML={{ __html: c.text }} />
+				<div className={collapsed ? 'text hidden' : 'text'}  dangerouslySetInnerHTML={{ __html: c.text }} />

-				{level < 5 ?
-					c.comments.map(i => this.displayComment(story, i, level + 1))
+				{hidden && hasChildren ?
+					<div className='comment lined info pointer' onClick={() => this.expandComment(cid)}>[show {this.countComments(c)-1} more]</div>
 				:
-					<div className='info'><p>[replies snipped]</p></div>
+					c.comments.map(i => this.displayComment(story, i, level + 1))
 				}
 			</div>
 		);
@@ -81,7 +115,8 @@ class Article extends React.Component {
 				{story ?
 					<div className='article'>
 						<Helmet>
-							<title>{story.title} - QotNews Comments</title>
+							<title>{story.title} | QotNews</title>
+							<meta name="robots" content="noindex" />
 						</Helmet>

 						<h1>{story.title}</h1>
--- a/webclient/src/Feed.js
+++ b/webclient/src/Feed.js
@@ -36,12 +36,6 @@ class Feed extends React.Component {
 									this.props.updateCache(x.id, result.story);
 								}, error => {}
 							);
-
-							if (i < 20 && x.img) {
-								const img = new Image();
-								img.src = x.img;
-								console.log('prefetched image', x.img);
-							}
 						});
 					}
 				},
@@ -58,24 +52,21 @@ class Feed extends React.Component {
 		return (
 			<div className='container'>
 				<Helmet>
-					<title>Feed - QotNews</title>
+					<title>QotNews</title>
+					<meta name="robots" content="index" />
 				</Helmet>
 				{error && <p>Connection error?</p>}
 				{stories ?
 					<div>
-						{stories.map((x, i) =>
-							<div className='item' key={i}>
-								<div className='num'>
-									{i+1}.
-								</div>
-
+						{stories.map(x =>
+							<div className='item' key={x.id}>
 								<div className='title'>
 									<Link className='link' to={'/' + x.id}>
 										<img className='source-logo' src={logos[x.source]} alt='source logo' /> {x.title}
 									</Link>

 									<span className='source'>
-										&#8203;({sourceLink(x)})
+										({sourceLink(x)})
 									</span>
 								</div>

--- a/webclient/src/Results.js
+++ b/webclient/src/Results.js
@@ -29,7 +29,7 @@ class Results extends React.Component {
 			.then(res => res.json())
 			.then(
 				(result) => {
-					this.setState({ stories: result.results });
+					this.setState({ stories: result.hits });
 				},
 				(error) => {
 					if (error.message !== 'The operation was aborted. ') {
@@ -56,25 +56,23 @@ class Results extends React.Component {
 		return (
 			<div className='container'>
 				<Helmet>
-					<title>Feed - QotNews</title>
+					<title>Search Results | QotNews</title>
 				</Helmet>
 				{error && <p>Connection error?</p>}
 				{stories ?
-					<div>
+					<>
+						<p>Search results:</p>
+						<div className='comment lined'>
 							{stories.length ?
-							stories.map((x, i) =>
-								<div className='item' key={i}>
-									<div className='num'>
-										{i+1}.
-									</div>
-
+								stories.map(x =>
+									<div className='item' key={x.id}>
 										<div className='title'>
 											<Link className='link' to={'/' + x.id}>
 												<img className='source-logo' src={logos[x.source]} alt='source logo' /> {x.title}
 											</Link>

 											<span className='source'>
-											&#8203;({sourceLink(x)})
+												({sourceLink(x)})
 											</span>
 										</div>

@@ -82,9 +80,10 @@ class Results extends React.Component {
 									</div>
 								)
 							:
-							<p>no results</p>
+								<p>none</p>
 							}
 						</div>
+					</>
 				:
 					<p>loading...</p>
 				}
--- a/webclient/src/ScrollToTop.js
+++ b/webclient/src/ScrollToTop.js
@@ -15,6 +15,7 @@ class ScrollToTop extends React.Component {
 		}

 		window.scrollTo(0, 0);
+		document.body.scrollTop = 0;
 	}

 	render() {
--- a/webclient/src/Style-black.css
+++ b/webclient/src/Style-black.css
@@ -0,0 +1,68 @@
+.black {
+	color: #ddd;
+}
+
+.black a {
+	color: #ddd;
+}
+
+.black input {
+	color: #ddd;
+	border: 1px solid #828282;
+}
+
+.black button {
+	background-color: #444444;
+	border-color: #bbb;
+	color: #ddd;
+}
+
+.black .item {
+	color: #828282;
+}
+
+.black .item .source-logo {
+	filter: grayscale(1);
+}
+
+.black .item a {
+	color: #828282;
+}
+
+.black .item a.link {
+	color: #ddd;
+}
+.black .item a.link:visited {
+	color: #828282;
+}
+
+.black .item .info a.hot {
+	color: #cccccc;
+}
+
+.black .article a {
+	border-bottom: 1px solid #aaaaaa;
+}
+
+.black .article u {
+	border-bottom: 1px solid #aaaaaa;
+	text-decoration: none;
+}
+
+.black .story-text video,
+.black .story-text img {
+	filter: brightness(50%);
+}
+
+.black .article .info {
+	color: #828282;
+}
+
+.black .article .info a {
+	border-bottom: none;
+	color: #828282;
+}
+
+.black .comment.lined {
+	border-left: 1px solid #444444;
+}
--- a/webclient/src/Style-dark.css
+++ b/webclient/src/Style-dark.css
@@ -11,12 +11,14 @@
 	border: 1px solid #828282;
 }

-.dark .item {
-	color: #828282;
+.dark button {
+	background-color: #444444;
+	border-color: #bbb;
+	color: #ddd;
 }

-.dark .item .source-logo {
-	filter: grayscale(1);
+.dark .item {
+	color: #828282;
 }

 .dark .item a {
@@ -43,6 +45,7 @@
 	text-decoration: none;
 }

+.dark .story-text video,
 .dark .story-text img {
 	filter: brightness(50%);
 }
--- a/webclient/src/Style-light.css
+++ b/webclient/src/Style-light.css
@@ -2,8 +2,30 @@ body {
 	text-rendering: optimizeLegibility;
 	font: 1rem/1.3 sans-serif;
 	color: #000000;
-	margin-bottom: 100vh;
 	word-break: break-word;
+	font-kerning: normal;
+	margin: 0;
+}
+
+::backdrop {
+	background-color: rgba(0,0,0,0);
+}
+
+body:fullscreen {
+	overflow-y: scroll !important;
+}
+body:-ms-fullscreen {
+	overflow-y: scroll !important;
+}
+body:-webkit-full-screen {
+	overflow-y: scroll !important;
+}
+body:-moz-full-screen {
+	overflow-y: scroll !important;
+}
+
+#root {
+	margin: 8px 8px 100vh 8px !important;
 }

 a {
@@ -21,6 +43,12 @@ input {
 	border-radius: 4px;
 }

+.fullscreen {
+	margin: 0.25rem;
+	padding: 0.25rem;
+}
+
+
 pre {
 	overflow: auto;
 }
@@ -46,7 +74,7 @@ pre {
 .item {
 	display: table;
 	color: #828282;
-	margin-bottom: 0.6rem;
+	margin-bottom: 0.7rem;
 }

 .item .source-logo {
@@ -61,11 +89,6 @@ pre {
 	text-decoration: underline;
 }

-.item .num {
-	display: table-cell;
-	width: 2em;
-}
-
 .item a.link {
 	font-size: 1.1rem;
 	color: #000000;
@@ -121,6 +144,11 @@ span.source {
 	margin: 0;
 }

+.article table {
+	width: 100%;
+	table-layout: fixed;
+}
+
 .article iframe {
 	display: none;
 }
@@ -163,16 +191,41 @@ span.source {
 	margin-top: -0.5rem;
 }

-.toggleDot {
+.comment .text.hidden > p {
+	white-space: nowrap;
+	overflow: hidden;
+	text-overflow: ellipsis;
+	display: none;
+	color: #828282;
+}
+
+.comment .text.hidden > p:first-child {
+	display: block;
+}
+
+.comment .collapser {
+	padding-left: 0.5rem;
+	padding-right: 1.5rem;
+}
+
+.comment .pointer {
+	cursor: pointer;
+}
+
+.dot {
+	cursor: pointer;
 	position: fixed;
-	bottom: 1rem;
-	left: 1rem;
 	height: 3rem;
 	width: 3rem;
 	background-color: #828282;
 	border-radius: 50%;
 }

+.toggleDot {
+	bottom: 1rem;
+	left: 1rem;
+}
+
 .toggleDot .button {
 	font: 2rem/1 'icomoon';
 	position: relative;
@@ -181,21 +234,27 @@ span.source {
 }

 .forwardDot {
-	cursor: pointer;
-	position: fixed;
 	bottom: 1rem;
 	right: 1rem;
-	height: 3rem;
-	width: 3rem;
-	background-color: #828282;
-	border-radius: 50%;
 }

 .forwardDot .button {
-	font: 2.5rem/1 'icomoon';
+	font: 2rem/1 'icomoon';
 	position: relative;
-	top: 0.25rem;
-	left: 0.3rem;
+	top: 0.5rem;
+	left: 0.5rem;
+}
+
+.backwardDot {
+	bottom: 1rem;
+	right: 5rem;
+}
+
+.backwardDot .button {
+	font: 2rem/1 'icomoon';
+	position: relative;
+	top: 0.5rem;
+	left: 0.5rem;
 }

 .search form {
--- a/webclient/src/Style-red.css
+++ b/webclient/src/Style-red.css
@@ -0,0 +1,82 @@
+.red {
+	color: #b00;
+	scrollbar-color: #b00 #440000;
+}
+
+.red a {
+	color: #b00;
+}
+
+.red input {
+	color: #b00;
+	border: 1px solid #690000;
+}
+
+.red input::placeholder {
+	color: #690000;
+}
+
+.red hr {
+	background-color: #690000;
+}
+
+.red button {
+	background-color: #440000;
+	border-color: #b00;
+	color: #b00;
+}
+
+.red .item,
+.red .slogan {
+	color: #690000;
+}
+
+.red .item .source-logo {
+	display: none;
+}
+
+.red .item a {
+	color: #690000;
+}
+
+.red .item a.link {
+	color: #b00;
+}
+.red .item a.link:visited {
+	color: #690000;
+}
+
+.red .item .info a.hot {
+	color: #cc0000;
+}
+
+.red .article a {
+	border-bottom: 1px solid #aa0000;
+}
+
+.red .article u {
+	border-bottom: 1px solid #aa0000;
+	text-decoration: none;
+}
+
+.red .story-text video,
+.red .story-text img {
+	filter: grayscale(100%) brightness(20%) sepia(100%) hue-rotate(-50deg) saturate(600%) contrast(0.8);
+}
+
+.red .article .info {
+	color: #690000;
+}
+
+.red .article .info a {
+	border-bottom: none;
+	color: #690000;
+}
+
+.red .comment.lined {
+	border-left: 1px solid #440000;
+}
+
+.red .dot {
+	background-color: #440000;
+}
--- a/webclient/src/Submit.js
+++ b/webclient/src/Submit.js
@@ -41,7 +41,7 @@ class Submit extends Component {
 			<span className='search'>
 				<form onSubmit={this.submitArticle}>
 					<input
-						placeholder='Submit Article'
+						placeholder='Submit URL'
 						ref={this.inputRef}
 					/>
 				</form>
--- a/webclient/src/fonts/icomoon.ttf
+++ b/webclient/src/fonts/icomoon.ttf
--- a/webclient/src/utils.js
+++ b/webclient/src/utils.js
Author	SHA1	Message	Date
Tanner Collin	9ec61ea5bc	Ignore dead and political stories	2025-05-27 18:47:17 +00:00
Tanner Collin	bdc7a6c10d	Fix Better HN api content extraction	2025-02-01 22:39:13 +00:00
Tanner Collin	4858516b01	Add Better HN as an API backup	2025-02-01 21:42:06 +00:00
Tanner Collin	f10e6063fc	Bug fixes	2025-02-01 20:31:35 +00:00
Tanner Collin	249a616531	Alert on story update error	2024-03-16 20:41:24 +00:00
Tanner Collin	ab92bd5441	Adjust score and comment thresholds	2024-03-08 03:08:18 +00:00
Tanner Collin	6b16a768a7	Fix deletion script	2024-03-08 03:08:03 +00:00
Tanner Collin	57de076fec	Increase database timeout	2024-02-27 18:48:56 +00:00
Tanner Collin	074b898508	Fix lobsters comment parsing	2024-02-27 18:47:00 +00:00
Tanner Collin	f049d194ab	Move scripts into own folder	2024-02-27 18:32:29 +00:00
Tanner Collin	c2b9a1cb7a	Update readability	2024-02-27 18:32:19 +00:00
Tanner Collin	4435f49e17	Make "dark" theme grey, add "black" theme	2023-09-13 01:19:47 +00:00
Tanner Collin	494d89ac30	Disable lobsters	2023-09-13 01:02:15 +00:00
Tanner Collin	e79fca6ecc	Replace "indent_level" with "depth" in lobsters API See: `fe09e5aa31`	2023-08-31 07:35:44 +00:00
Tanner Collin	c65fb69092	Handle Lobsters comment parsing TypeErrors Too lazy to debug this: 2023-08-29 12:56:35,111 - root - INFO - Updating lobsters story: yktkwr, index: 55 Traceback (most recent call last): File "src/gevent/greenlet.py", line 854, in gevent._gevent_cgreenlet.Greenlet.run File "/home/tanner/qotnews/apiserver/server.py", line 194, in feed_thread valid = feed.update_story(story) File "/home/tanner/qotnews/apiserver/feed.py", line 74, in update_story res = lobsters.story(story['ref']) File "/home/tanner/qotnews/apiserver/feeds/lobsters.py", line 103, in story s['comments'] = iter_comments(r['comments']) File "/home/tanner/qotnews/apiserver/feeds/lobsters.py", line 76, in iter_comments parent_stack = parent_stack[:indent-1] TypeError: unsupported operand type(s) for -: 'NoneType' and 'int' 2023-08-29T12:56:35Z <Greenlet at 0x7f92ad840ae0: feed_thread> failed with TypeError	2023-08-31 07:30:39 +00:00
Tanner Collin	632d028e4c	Add Tildes group whitelist	2023-07-13 22:54:36 +00:00
Tanner Collin	ea8e9e5a23	Increase again	2023-06-13 17:11:50 +00:00
Tanner Collin	2838ea9b41	Increase Tildes story score requirement	2023-06-11 01:01:31 +00:00
Tanner Collin	f15d108971	Catch all possible Reddit API exceptions	2023-03-15 21:16:37 +00:00
Tanner Collin	f777348af8	Fix darkmode fullscreen button color	2022-08-11 19:36:36 +00:00
Tanner Collin	486404a413	Fix fix-stories bug	2022-08-10 04:06:39 +00:00
Tanner Collin	7c9c07a4cf	Hide fullscreen button if it's not available	2022-08-10 04:05:25 +00:00
Tanner Collin	08d02f6013	Add fullscreen mode	2022-08-08 23:21:49 +00:00
Tanner Collin	1b54342702	Add red theme	2022-08-08 20:14:57 +00:00
Tanner Collin	9e9571a3c0	Write fixed stories to database	2022-07-05 00:57:56 +00:00
Tanner Collin	dc83a70887	Begin script to fix bad gzip text	2022-07-04 20:32:01 +00:00
Tanner Collin	2e2c9ae837	Move FEED_LENGTH to settings.py, use for search results	2022-07-04 19:08:24 +00:00
Tanner Collin	61021d8f91	Small UI changes	2022-07-04 19:08:24 +00:00
Tanner Collin	e65047fead	Add accept gzip header to readability server	2022-07-04 19:07:31 +00:00
Tanner Collin	8e775c189f	Add test file	2022-07-04 05:56:06 +00:00
Tanner Collin	3d9274309a	Fix requests text encoding slowness	2022-07-04 05:55:52 +00:00
Tanner Collin	7bdbbf10b2	Return search results directly from the server	2022-07-04 04:33:01 +00:00
Tanner Collin	6aa0f78536	Remove Article / Comments, etc thing after name	2022-07-04 04:33:01 +00:00
Tanner Collin	bf3663bbec	Remove hard-coded title	2022-06-30 00:12:22 +00:00
Tanner Collin	e6589dc61c	Adjust title	2022-06-30 00:05:15 +00:00
Tanner Collin	307e8349f3	Change header based on page	2022-06-30 00:00:30 +00:00
Tanner Collin	04cd56daa8	Add index / noindex to client	2022-06-29 23:30:39 +00:00
Tanner Collin	c80769def6	Add noindex meta tag to stories	2022-06-29 23:20:53 +00:00
Tanner Collin	ebd1ad2140	Increase database timeout	2022-06-24 20:50:27 +00:00
Tanner Collin	2cc7dd0d6d	Update software	2022-05-31 04:24:12 +00:00
Tanner Collin	6e7cb86d2e	Explain no javascript	2022-05-31 04:23:52 +00:00
Tanner Collin	a25457254f	Improve logging, sends tweets to nitter.net	2022-03-05 23:48:46 +00:00
Tanner Collin	a693ea5342	Remove outline API	2022-03-05 22:05:29 +00:00
Tanner Collin	7386e1d8b0	Include option to disable readerserver	2022-03-05 22:04:25 +00:00
Tanner Collin	f8e8597e3a	Include option to disable search	2022-03-05 21:58:35 +00:00
Tanner Collin	55c282ee69	Fix search to work with low-RAM server	2022-03-05 21:33:07 +00:00
Tanner Collin	3f774a9e38	Improve logging	2021-09-06 00:21:05 +00:00
Tanner Collin	dcedd4caa1	Add script to reindex search, abstract search API	2021-09-06 00:20:21 +00:00
Tanner Collin	7a131ebd03	Change the order by which content-type is grabbed	2021-01-30 06:36:02 +00:00
Tanner Collin	6f64401785	Add optional skip and limit to API route	2021-01-18 03:59:33 +00:00
Tanner Collin	3ff917e806	Remove colons from date string so Python 3.5 can parse	2020-12-15 23:19:50 +00:00
Tanner Collin	c9fb9bd5df	Add Lobsters to feed	2020-12-12 05:26:33 +00:00
Tanner Collin	fd9c9c888d	Update gitignore	2020-12-11 23:49:45 +00:00
Tanner Collin	42dcf15374	Increase sqlite lock timeout	2020-11-19 21:38:18 +00:00
Tanner Collin	d8a0b77765	Blacklist sec.gov website	2020-11-19 21:37:59 +00:00
Tanner Collin	9a279d44b1	Add header to get content type	2020-11-03 20:27:43 +00:00
Tanner Collin	e506804666	Clean code up	2020-11-03 03:45:56 +00:00
Tanner Collin	ca78a6d7a9	Move feed and Praw config to settings.py	2020-11-02 02:26:54 +00:00
Tanner Collin	7acce407e9	Fix index.html indentation	2020-11-02 00:38:34 +00:00
Tanner Collin	5281672000	Fix noscript font color	2020-11-02 00:36:11 +00:00
Tanner Collin	e59acefda9	Remove Whoosh	2020-11-02 00:22:40 +00:00
Tanner Collin	cbc802b7e9	Try Hackernews API twice	2020-11-02 00:17:22 +00:00
Tanner Collin	4579dfce00	Improve logging	2020-11-02 00:13:43 +00:00
Tanner Collin	0d16bec6f6	Fix table width CSS	2020-11-01 00:47:18 +00:00
Tanner Collin	feba8b7aa0	Make qotnews work with WaPo	2020-10-29 04:55:34 +00:00
Tanner Collin	ee5105743d	Upgrade readability	2020-10-29 01:24:13 +00:00
Tanner Collin	72802a6fcf	Show exerpt of hidden comments	2020-10-27 00:41:36 +00:00
Tanner Collin	99d3a234f4	Fix bug with rendering text nodes	2020-10-26 21:58:36 +00:00
Tanner Collin	f95df227f1	Add instructions to download search server	2020-10-26 21:58:36 +00:00
Tanner Collin	b82095ca7a	Add buttons to collapse / expand comments	2020-10-26 21:57:10 +00:00
Tanner Collin	992c1c1233	Monkeypatch earlier	2020-10-24 22:30:00 +00:00
Tanner Collin	88d2216627	Add a script to delete a story	2020-10-03 23:42:21 +00:00
Tanner Collin	6cf2f01b08	Adjust feeds	2020-10-03 23:41:57 +00:00
Tanner Collin	607573dd44	Add buttons to convert <pre> to <p>	2020-10-03 23:23:25 +00:00
Tanner Collin	c554ecd890	Add a line on UI to make search results obvious	2020-08-14 03:58:11 +00:00
Tanner Collin	6576eb1bac	Adjust content-type request timeout	2020-08-14 03:57:43 +00:00
Tanner Collin	472af76d1a	Adjust port	2020-08-14 03:57:18 +00:00
Tanner Collin	4727d34eb6	Delete displayed-attributes when init search	2020-08-14 03:56:47 +00:00
Tanner Collin	0e086b60b8	Remove business subreddit from feed	2020-08-14 03:55:28 +00:00
Tanner Collin	b46ce36c63	Update requirements	2020-07-08 05:24:32 +00:00
Tanner Collin	9a449bf3ca	Remove extra logging	2020-07-08 02:36:40 +00:00
Tanner Collin	0bd9f05250	Fix crash when HN feed fails	2020-07-08 02:36:40 +00:00
Tanner Collin	9c116bde4a	Remove document img and ignore r/technology	2020-07-08 02:36:40 +00:00
Tanner Collin	ebedaef00b	Tune search rankings and attributes	2020-07-08 02:36:40 +00:00
Tanner Collin	d7f0643bd7	Add more logging	2020-07-08 02:36:40 +00:00
Tanner Collin	eb1137299d	Remove article numbers	2020-07-08 02:36:40 +00:00
Tanner Collin	72d4a68929	Remove pre-fetching image	2020-07-08 02:36:40 +00:00
Tanner Collin	f1c846acd0	Remove get first image	2020-07-08 02:36:40 +00:00
Tanner Collin	850b30e353	Add requests timeouts and temporary logging	2020-07-08 02:36:40 +00:00
Tanner Collin	d614ad0743	Integrate with external MeiliSearch server	2020-07-08 02:36:40 +00:00
Tanner Collin	f46cafdc90	Integrate sqlite database with server	2020-07-08 02:36:40 +00:00
Tanner Collin	873dc44cb1	Update whoosh migration script	2020-07-08 02:36:40 +00:00
Tanner Collin	1fb9db3f4b	Store ref list in database too	2020-07-08 02:36:40 +00:00
Tanner Collin	b923908a45	Begin initial sqlite conversion	2020-07-08 02:36:40 +00:00
Tanner Collin	dbdcfaa921	Check if cache is broken	2020-07-08 02:36:40 +00:00
Tanner Collin	8799b10525	Fall back to ref on manual submission title	2020-07-08 02:36:40 +00:00
Tanner Collin	6430fe5e9f	Check content-type	2020-07-08 02:36:40 +00:00
Tanner Collin	a4cf719cb8	Remove technology subreddit	2020-07-08 02:36:40 +00:00
Tanner Collin	595f469b4a	Update tildes parser group tag	2020-07-08 02:36:40 +00:00