From d8a0b777651d33b20a0d192a07030ece40e88e58 Mon Sep 17 00:00:00 2001
From: Tanner Collin
Date: Thu, 19 Nov 2020 21:37:59 +0000
Subject: [PATCH 1/4] Blacklist sec.gov website
---
apiserver/feed.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/apiserver/feed.py b/apiserver/feed.py
index cbf36ed..5a884a9 100644
--- a/apiserver/feed.py
+++ b/apiserver/feed.py
@@ -13,7 +13,7 @@ from feeds import hackernews, reddit, tildes, manual
OUTLINE_API = 'https://api.outline.com/v3/parse_article'
READ_API = 'http://127.0.0.1:33843'
-INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com']
+INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com', 'sec.gov']
TWO_DAYS = 60*60*24*2
def list():
From 42dcf15374893ab32c24a276e2b4caa83f5b5bfc Mon Sep 17 00:00:00 2001
From: Tanner Collin
Date: Thu, 19 Nov 2020 21:38:18 +0000
Subject: [PATCH 2/4] Increase sqlite lock timeout
---
apiserver/database.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/apiserver/database.py b/apiserver/database.py
index 24582c6..9b84b47 100644
--- a/apiserver/database.py
+++ b/apiserver/database.py
@@ -5,7 +5,7 @@ from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy.exc import IntegrityError
-engine = create_engine('sqlite:///data/qotnews.sqlite')
+engine = create_engine('sqlite:///data/qotnews.sqlite', connect_args={'timeout': 120})
Session = sessionmaker(bind=engine)
Base = declarative_base()
From fd9c9c888dfce2f6a7afdd80527212f335c4ef54 Mon Sep 17 00:00:00 2001
From: Tanner Collin
Date: Fri, 11 Dec 2020 23:49:45 +0000
Subject: [PATCH 3/4] Update gitignore
---
apiserver/.gitignore | 1 +
1 file changed, 1 insertion(+)
diff --git a/apiserver/.gitignore b/apiserver/.gitignore
index 23abd96..fe157dd 100644
--- a/apiserver/.gitignore
+++ b/apiserver/.gitignore
@@ -109,4 +109,5 @@ settings.py
data.db
data.db.bak
data/archive/*
+data/backup/*
qotnews.sqlite
From c9fb9bd5df486e654c014ac5f40c697174ef080c Mon Sep 17 00:00:00 2001
From: Tanner Collin
Date: Sat, 12 Dec 2020 05:26:33 +0000
Subject: [PATCH 4/4] Add Lobsters to feed
---
apiserver/feed.py | 7 ++-
apiserver/feeds/lobsters.py | 113 ++++++++++++++++++++++++++++++++++
apiserver/server.py | 5 +-
apiserver/settings.py.example | 6 +-
webclient/src/App.js | 2 +-
webclient/src/utils.js | 1 +
6 files changed, 129 insertions(+), 5 deletions(-)
create mode 100644 apiserver/feeds/lobsters.py
diff --git a/apiserver/feed.py b/apiserver/feed.py
index 5a884a9..ad6753d 100644
--- a/apiserver/feed.py
+++ b/apiserver/feed.py
@@ -8,7 +8,7 @@ import time
from bs4 import BeautifulSoup
import settings
-from feeds import hackernews, reddit, tildes, manual
+from feeds import hackernews, reddit, tildes, manual, lobsters
OUTLINE_API = 'https://api.outline.com/v3/parse_article'
READ_API = 'http://127.0.0.1:33843'
@@ -21,6 +21,9 @@ def list():
if settings.NUM_HACKERNEWS:
feed += [(x, 'hackernews') for x in hackernews.feed()[:settings.NUM_HACKERNEWS]]
+ if settings.NUM_LOBSTERS:
+ feed += [(x, 'lobsters') for x in lobsters.feed()[:settings.NUM_LOBSTERS]]
+
if settings.NUM_REDDIT:
feed += [(x, 'reddit') for x in reddit.feed()[:settings.NUM_REDDIT]]
@@ -83,6 +86,8 @@ def update_story(story, is_manual=False):
if story['source'] == 'hackernews':
res = hackernews.story(story['ref'])
+ elif story['source'] == 'lobsters':
+ res = lobsters.story(story['ref'])
elif story['source'] == 'reddit':
res = reddit.story(story['ref'])
elif story['source'] == 'tildes':
diff --git a/apiserver/feeds/lobsters.py b/apiserver/feeds/lobsters.py
new file mode 100644
index 0000000..c3fea2e
--- /dev/null
+++ b/apiserver/feeds/lobsters.py
@@ -0,0 +1,113 @@
+import logging
+logging.basicConfig(
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+ level=logging.DEBUG)
+
+if __name__ == '__main__':
+ import sys
+ sys.path.insert(0,'.')
+
+import requests
+from datetime import datetime
+
+from utils import clean
+
+API_HOTTEST = lambda x: 'https://lobste.rs/hottest.json'
+API_ITEM = lambda x : 'https://lobste.rs/s/{}.json'.format(x)
+
+SITE_LINK = lambda x : 'https://lobste.rs/s/{}'.format(x)
+SITE_AUTHOR_LINK = lambda x : 'https://lobste.rs/u/{}'.format(x)
+
+def api(route, ref=None):
+ try:
+ r = requests.get(route(ref), timeout=5)
+ if r.status_code != 200:
+ raise Exception('Bad response code ' + str(r.status_code))
+ return r.json()
+ except KeyboardInterrupt:
+ raise
+ except BaseException as e:
+ logging.error('Problem hitting lobsters API: {}, trying again'.format(str(e)))
+
+ try:
+ r = requests.get(route(ref), timeout=15)
+ if r.status_code != 200:
+ raise Exception('Bad response code ' + str(r.status_code))
+ return r.json()
+ except KeyboardInterrupt:
+ raise
+ except BaseException as e:
+ logging.error('Problem hitting lobsters API: {}'.format(str(e)))
+ return False
+
+def feed():
+ return [x['short_id'] for x in api(API_HOTTEST) or []]
+
+def unix(date_str):
+ return int(datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S.%f%z').timestamp())
+
+def make_comment(i):
+ c = {}
+ try:
+ c['author'] = i['commenting_user']['username']
+ except KeyError:
+ c['author'] = ''
+ c['score'] = i.get('score', 0)
+ try:
+ c['date'] = unix(i['created_at'])
+ except KeyError:
+ c['date'] = 0
+ c['text'] = clean(i.get('comment', '') or '')
+ c['comments'] = []
+ return c
+
+def iter_comments(flat_comments):
+ nested_comments = []
+ parent_stack = []
+ for comment in flat_comments:
+ c = make_comment(comment)
+ indent = comment['indent_level']
+
+ if indent == 1:
+ nested_comments.append(c)
+ parent_stack = [c]
+ else:
+ parent_stack = parent_stack[:indent-1]
+ p = parent_stack[-1]
+ p['comments'].append(c)
+ parent_stack.append(c)
+ return nested_comments
+
+def story(ref):
+ r = api(API_ITEM, ref)
+ if not r: return False
+
+ s = {}
+ try:
+ s['author'] = r['submitter_user']['username']
+ s['author_link'] = SITE_AUTHOR_LINK(s['author'])
+ except KeyError:
+ s['author'] = ''
+ s['author_link'] = ''
+ s['score'] = r.get('score', 0)
+ try:
+ s['date'] = unix(r['created_at'])
+ except KeyError:
+ s['date'] = 0
+ s['title'] = r.get('title', '')
+ s['link'] = SITE_LINK(ref)
+ s['url'] = r.get('url', '')
+ s['comments'] = iter_comments(r['comments'])
+ s['num_comments'] = r['comment_count']
+
+ if 'description' in r and r['description']:
+ s['text'] = clean(r['description'] or '')
+
+ return s
+
+# scratchpad so I can quickly develop the parser
+if __name__ == '__main__':
+ #print(feed())
+ import json
+ print(json.dumps(story('fzvd1v')))
+ #print(story(20802050))
diff --git a/apiserver/server.py b/apiserver/server.py
index cdf740f..c0756d3 100644
--- a/apiserver/server.py
+++ b/apiserver/server.py
@@ -70,6 +70,9 @@ def submit():
elif 'tildes.net' in parse.hostname and '~' in url:
source = 'tildes'
ref = parse.path.split('/')[2]
+ elif 'lobste.rs' in parse.hostname and '/s/' in url:
+ source = 'lobsters'
+ ref = parse.path.split('/')[2]
elif 'reddit.com' in parse.hostname and 'comments' in url:
source = 'reddit'
ref = parse.path.split('/')[4]
@@ -115,7 +118,7 @@ def index():
return render_template('index.html',
title='Feed',
url='news.t0.vc',
- description='Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode')
+ description='Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode')
@flask_app.route('/', strict_slashes=False)
@flask_app.route('//c', strict_slashes=False)
diff --git a/apiserver/settings.py.example b/apiserver/settings.py.example
index 26f4fd6..9b99d2a 100644
--- a/apiserver/settings.py.example
+++ b/apiserver/settings.py.example
@@ -5,6 +5,7 @@
# Number of top items from each site to pull
# set to 0 to disable that site
NUM_HACKERNEWS = 15
+NUM_LOBSTERS = 10
NUM_REDDIT = 10
NUM_TILDES = 5
@@ -22,8 +23,6 @@ SUBREDDITS = [
'HistoryofIdeas',
'LaymanJournals',
'PhilosophyofScience',
- 'PoliticsPDFs',
- 'Scholar',
'StateOfTheUnion',
'TheAgora',
'TrueFilm',
@@ -37,4 +36,7 @@ SUBREDDITS = [
'neurophilosophy',
'resilientcommunities',
'worldevents',
+ 'StallmanWasRight',
+ 'DarkFuturology',
+ 'EverythingScience',
]
diff --git a/webclient/src/App.js b/webclient/src/App.js
index 85cb474..d5ebac2 100644
--- a/webclient/src/App.js
+++ b/webclient/src/App.js
@@ -59,7 +59,7 @@ class App extends React.Component {
QotNews - Feed
Theme: this.light()}>Light - this.dark()}>Dark
- Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode.
+ Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode.
diff --git a/webclient/src/utils.js b/webclient/src/utils.js
index a81aa29..f592dd6 100644
--- a/webclient/src/utils.js
+++ b/webclient/src/utils.js
@@ -61,5 +61,6 @@ export const logos = {
hackernews: '',
reddit: '',
tildes: '',
+ lobsters: '',
manual: '',
};