From c9fb9bd5df486e654c014ac5f40c697174ef080c Mon Sep 17 00:00:00 2001
From: Tanner Collin
Date: Sat, 12 Dec 2020 05:26:33 +0000
Subject: [PATCH] Add Lobsters to feed
---
apiserver/feed.py | 7 ++-
apiserver/feeds/lobsters.py | 113 ++++++++++++++++++++++++++++++++++
apiserver/server.py | 5 +-
apiserver/settings.py.example | 6 +-
webclient/src/App.js | 2 +-
webclient/src/utils.js | 1 +
6 files changed, 129 insertions(+), 5 deletions(-)
create mode 100644 apiserver/feeds/lobsters.py
diff --git a/apiserver/feed.py b/apiserver/feed.py
index 5a884a9..ad6753d 100644
--- a/apiserver/feed.py
+++ b/apiserver/feed.py
@@ -8,7 +8,7 @@ import time
from bs4 import BeautifulSoup
import settings
-from feeds import hackernews, reddit, tildes, manual
+from feeds import hackernews, reddit, tildes, manual, lobsters
OUTLINE_API = 'https://api.outline.com/v3/parse_article'
READ_API = 'http://127.0.0.1:33843'
@@ -21,6 +21,9 @@ def list():
if settings.NUM_HACKERNEWS:
feed += [(x, 'hackernews') for x in hackernews.feed()[:settings.NUM_HACKERNEWS]]
+ if settings.NUM_LOBSTERS:
+ feed += [(x, 'lobsters') for x in lobsters.feed()[:settings.NUM_LOBSTERS]]
+
if settings.NUM_REDDIT:
feed += [(x, 'reddit') for x in reddit.feed()[:settings.NUM_REDDIT]]
@@ -83,6 +86,8 @@ def update_story(story, is_manual=False):
if story['source'] == 'hackernews':
res = hackernews.story(story['ref'])
+ elif story['source'] == 'lobsters':
+ res = lobsters.story(story['ref'])
elif story['source'] == 'reddit':
res = reddit.story(story['ref'])
elif story['source'] == 'tildes':
diff --git a/apiserver/feeds/lobsters.py b/apiserver/feeds/lobsters.py
new file mode 100644
index 0000000..c3fea2e
--- /dev/null
+++ b/apiserver/feeds/lobsters.py
@@ -0,0 +1,113 @@
+import logging
+logging.basicConfig(
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+ level=logging.DEBUG)
+
+if __name__ == '__main__':
+ import sys
+ sys.path.insert(0,'.')
+
+import requests
+from datetime import datetime
+
+from utils import clean
+
+API_HOTTEST = lambda x: 'https://lobste.rs/hottest.json'
+API_ITEM = lambda x : 'https://lobste.rs/s/{}.json'.format(x)
+
+SITE_LINK = lambda x : 'https://lobste.rs/s/{}'.format(x)
+SITE_AUTHOR_LINK = lambda x : 'https://lobste.rs/u/{}'.format(x)
+
+def api(route, ref=None):
+ try:
+ r = requests.get(route(ref), timeout=5)
+ if r.status_code != 200:
+ raise Exception('Bad response code ' + str(r.status_code))
+ return r.json()
+ except KeyboardInterrupt:
+ raise
+ except BaseException as e:
+ logging.error('Problem hitting lobsters API: {}, trying again'.format(str(e)))
+
+ try:
+ r = requests.get(route(ref), timeout=15)
+ if r.status_code != 200:
+ raise Exception('Bad response code ' + str(r.status_code))
+ return r.json()
+ except KeyboardInterrupt:
+ raise
+ except BaseException as e:
+ logging.error('Problem hitting lobsters API: {}'.format(str(e)))
+ return False
+
+def feed():
+ return [x['short_id'] for x in api(API_HOTTEST) or []]
+
+def unix(date_str):
+ return int(datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S.%f%z').timestamp())
+
+def make_comment(i):
+ c = {}
+ try:
+ c['author'] = i['commenting_user']['username']
+ except KeyError:
+ c['author'] = ''
+ c['score'] = i.get('score', 0)
+ try:
+ c['date'] = unix(i['created_at'])
+ except KeyError:
+ c['date'] = 0
+ c['text'] = clean(i.get('comment', '') or '')
+ c['comments'] = []
+ return c
+
+def iter_comments(flat_comments):
+ nested_comments = []
+ parent_stack = []
+ for comment in flat_comments:
+ c = make_comment(comment)
+ indent = comment['indent_level']
+
+ if indent == 1:
+ nested_comments.append(c)
+ parent_stack = [c]
+ else:
+ parent_stack = parent_stack[:indent-1]
+ p = parent_stack[-1]
+ p['comments'].append(c)
+ parent_stack.append(c)
+ return nested_comments
+
+def story(ref):
+ r = api(API_ITEM, ref)
+ if not r: return False
+
+ s = {}
+ try:
+ s['author'] = r['submitter_user']['username']
+ s['author_link'] = SITE_AUTHOR_LINK(s['author'])
+ except KeyError:
+ s['author'] = ''
+ s['author_link'] = ''
+ s['score'] = r.get('score', 0)
+ try:
+ s['date'] = unix(r['created_at'])
+ except KeyError:
+ s['date'] = 0
+ s['title'] = r.get('title', '')
+ s['link'] = SITE_LINK(ref)
+ s['url'] = r.get('url', '')
+ s['comments'] = iter_comments(r['comments'])
+ s['num_comments'] = r['comment_count']
+
+ if 'description' in r and r['description']:
+ s['text'] = clean(r['description'] or '')
+
+ return s
+
+# scratchpad so I can quickly develop the parser
+if __name__ == '__main__':
+ #print(feed())
+ import json
+ print(json.dumps(story('fzvd1v')))
+ #print(story(20802050))
diff --git a/apiserver/server.py b/apiserver/server.py
index cdf740f..c0756d3 100644
--- a/apiserver/server.py
+++ b/apiserver/server.py
@@ -70,6 +70,9 @@ def submit():
elif 'tildes.net' in parse.hostname and '~' in url:
source = 'tildes'
ref = parse.path.split('/')[2]
+ elif 'lobste.rs' in parse.hostname and '/s/' in url:
+ source = 'lobsters'
+ ref = parse.path.split('/')[2]
elif 'reddit.com' in parse.hostname and 'comments' in url:
source = 'reddit'
ref = parse.path.split('/')[4]
@@ -115,7 +118,7 @@ def index():
return render_template('index.html',
title='Feed',
url='news.t0.vc',
- description='Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode')
+ description='Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode')
@flask_app.route('/', strict_slashes=False)
@flask_app.route('//c', strict_slashes=False)
diff --git a/apiserver/settings.py.example b/apiserver/settings.py.example
index 26f4fd6..9b99d2a 100644
--- a/apiserver/settings.py.example
+++ b/apiserver/settings.py.example
@@ -5,6 +5,7 @@
# Number of top items from each site to pull
# set to 0 to disable that site
NUM_HACKERNEWS = 15
+NUM_LOBSTERS = 10
NUM_REDDIT = 10
NUM_TILDES = 5
@@ -22,8 +23,6 @@ SUBREDDITS = [
'HistoryofIdeas',
'LaymanJournals',
'PhilosophyofScience',
- 'PoliticsPDFs',
- 'Scholar',
'StateOfTheUnion',
'TheAgora',
'TrueFilm',
@@ -37,4 +36,7 @@ SUBREDDITS = [
'neurophilosophy',
'resilientcommunities',
'worldevents',
+ 'StallmanWasRight',
+ 'DarkFuturology',
+ 'EverythingScience',
]
diff --git a/webclient/src/App.js b/webclient/src/App.js
index 85cb474..d5ebac2 100644
--- a/webclient/src/App.js
+++ b/webclient/src/App.js
@@ -59,7 +59,7 @@ class App extends React.Component {
QotNews - Feed
Theme: this.light()}>Light - this.dark()}>Dark
- Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode.
+ Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode.
diff --git a/webclient/src/utils.js b/webclient/src/utils.js
index a81aa29..f592dd6 100644
--- a/webclient/src/utils.js
+++ b/webclient/src/utils.js
@@ -61,5 +61,6 @@ export const logos = {
hackernews: '',
reddit: '',
tildes: '',
+ lobsters: '',
manual: '',
};