diff --git a/apiserver/.gitignore b/apiserver/.gitignore
index 23abd96..fe157dd 100644
--- a/apiserver/.gitignore
+++ b/apiserver/.gitignore
@@ -109,4 +109,5 @@ settings.py
data.db
data.db.bak
data/archive/*
+data/backup/*
qotnews.sqlite
diff --git a/apiserver/database.py b/apiserver/database.py
index e8b6a8a..a291685 100644
--- a/apiserver/database.py
+++ b/apiserver/database.py
@@ -5,7 +5,7 @@ from sqlalchemy.orm import sessionmaker
from sqlalchemy.exc import IntegrityError
from sqlalchemy.types import JSON
-engine = create_engine('sqlite:///data/qotnews.sqlite')
+engine = create_engine('sqlite:///data/qotnews.sqlite', connect_args={'timeout': 120})
Session = sessionmaker(bind=engine)
Base = declarative_base()
diff --git a/apiserver/feed.py b/apiserver/feed.py
index bc0f097..2ab79a2 100644
--- a/apiserver/feed.py
+++ b/apiserver/feed.py
@@ -9,13 +9,13 @@ from bs4 import BeautifulSoup
import itertools
import settings
-from feeds import hackernews, reddit, tildes, substack, manual
+from feeds import hackernews, reddit, tildes, substack, manual, lobsters
from feeds.sitemap import Sitemap
from feeds.category import Category
from scrapers import outline
from scrapers.declutter import declutter, headless, simple
-INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com']
+INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com', 'sec.gov']
substacks = {}
for key, value in settings.SUBSTACK.items():
@@ -33,6 +33,9 @@ def get_list():
if settings.NUM_HACKERNEWS:
feeds['hackernews'] = [(x, 'hackernews', x) for x in hackernews.feed()[:settings.NUM_HACKERNEWS]]
+ if settings.NUM_LOBSTERS:
+ feed += [(x, 'lobsters', x) for x in lobsters.feed()[:settings.NUM_LOBSTERS]]
+
if settings.NUM_REDDIT:
feeds['reddit'] = [(x, 'reddit', x) for x in reddit.feed()[:settings.NUM_REDDIT]]
@@ -107,6 +110,8 @@ def update_story(story, is_manual=False, urlref=None):
if story['source'] == 'hackernews':
res = hackernews.story(story['ref'])
+ elif story['source'] == 'lobsters':
+ res = lobsters.story(story['ref'])
elif story['source'] == 'reddit':
res = reddit.story(story['ref'])
elif story['source'] == 'tildes':
diff --git a/apiserver/feeds/lobsters.py b/apiserver/feeds/lobsters.py
new file mode 100644
index 0000000..c3fea2e
--- /dev/null
+++ b/apiserver/feeds/lobsters.py
@@ -0,0 +1,113 @@
+import logging
+logging.basicConfig(
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+ level=logging.DEBUG)
+
+if __name__ == '__main__':
+ import sys
+ sys.path.insert(0,'.')
+
+import requests
+from datetime import datetime
+
+from utils import clean
+
+API_HOTTEST = lambda x: 'https://lobste.rs/hottest.json'
+API_ITEM = lambda x : 'https://lobste.rs/s/{}.json'.format(x)
+
+SITE_LINK = lambda x : 'https://lobste.rs/s/{}'.format(x)
+SITE_AUTHOR_LINK = lambda x : 'https://lobste.rs/u/{}'.format(x)
+
+def api(route, ref=None):
+ try:
+ r = requests.get(route(ref), timeout=5)
+ if r.status_code != 200:
+ raise Exception('Bad response code ' + str(r.status_code))
+ return r.json()
+ except KeyboardInterrupt:
+ raise
+ except BaseException as e:
+ logging.error('Problem hitting lobsters API: {}, trying again'.format(str(e)))
+
+ try:
+ r = requests.get(route(ref), timeout=15)
+ if r.status_code != 200:
+ raise Exception('Bad response code ' + str(r.status_code))
+ return r.json()
+ except KeyboardInterrupt:
+ raise
+ except BaseException as e:
+ logging.error('Problem hitting lobsters API: {}'.format(str(e)))
+ return False
+
+def feed():
+ return [x['short_id'] for x in api(API_HOTTEST) or []]
+
+def unix(date_str):
+ return int(datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S.%f%z').timestamp())
+
+def make_comment(i):
+ c = {}
+ try:
+ c['author'] = i['commenting_user']['username']
+ except KeyError:
+ c['author'] = ''
+ c['score'] = i.get('score', 0)
+ try:
+ c['date'] = unix(i['created_at'])
+ except KeyError:
+ c['date'] = 0
+ c['text'] = clean(i.get('comment', '') or '')
+ c['comments'] = []
+ return c
+
+def iter_comments(flat_comments):
+ nested_comments = []
+ parent_stack = []
+ for comment in flat_comments:
+ c = make_comment(comment)
+ indent = comment['indent_level']
+
+ if indent == 1:
+ nested_comments.append(c)
+ parent_stack = [c]
+ else:
+ parent_stack = parent_stack[:indent-1]
+ p = parent_stack[-1]
+ p['comments'].append(c)
+ parent_stack.append(c)
+ return nested_comments
+
+def story(ref):
+ r = api(API_ITEM, ref)
+ if not r: return False
+
+ s = {}
+ try:
+ s['author'] = r['submitter_user']['username']
+ s['author_link'] = SITE_AUTHOR_LINK(s['author'])
+ except KeyError:
+ s['author'] = ''
+ s['author_link'] = ''
+ s['score'] = r.get('score', 0)
+ try:
+ s['date'] = unix(r['created_at'])
+ except KeyError:
+ s['date'] = 0
+ s['title'] = r.get('title', '')
+ s['link'] = SITE_LINK(ref)
+ s['url'] = r.get('url', '')
+ s['comments'] = iter_comments(r['comments'])
+ s['num_comments'] = r['comment_count']
+
+ if 'description' in r and r['description']:
+ s['text'] = clean(r['description'] or '')
+
+ return s
+
+# scratchpad so I can quickly develop the parser
+if __name__ == '__main__':
+ #print(feed())
+ import json
+ print(json.dumps(story('fzvd1v')))
+ #print(story(20802050))
diff --git a/apiserver/server.py b/apiserver/server.py
index a7e7dad..355017b 100644
--- a/apiserver/server.py
+++ b/apiserver/server.py
@@ -72,6 +72,9 @@ def submit():
elif 'tildes.net' in parse.hostname and '~' in url:
source = 'tildes'
ref = parse.path.split('/')[2]
+ elif 'lobste.rs' in parse.hostname and '/s/' in url:
+ source = 'lobsters'
+ ref = parse.path.split('/')[2]
elif 'reddit.com' in parse.hostname and 'comments' in url:
source = 'reddit'
ref = parse.path.split('/')[4]
@@ -120,7 +123,7 @@ def index():
return render_template('index.html',
title='Feed',
url=settings.HOSTNAME,
- description='Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode')
+ description='Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode')
@flask_app.route('/', strict_slashes=False)
@flask_app.route('//c', strict_slashes=False)
diff --git a/apiserver/settings.py.example b/apiserver/settings.py.example
index e00f10a..2d741c9 100644
--- a/apiserver/settings.py.example
+++ b/apiserver/settings.py.example
@@ -13,6 +13,7 @@ HEADLESS_READER_PORT = 33843
# Number of top items from each site to pull
# set to 0 to disable that site
NUM_HACKERNEWS = 15
+NUM_LOBSTERS = 10
NUM_REDDIT = 10
NUM_TILDES = 5
NUM_SUBSTACK = 10
@@ -74,8 +75,6 @@ SUBREDDITS = [
'HistoryofIdeas',
'LaymanJournals',
'PhilosophyofScience',
- 'PoliticsPDFs',
- 'Scholar',
'StateOfTheUnion',
'TheAgora',
'TrueFilm',
@@ -89,4 +88,7 @@ SUBREDDITS = [
'neurophilosophy',
'resilientcommunities',
'worldevents',
+ 'StallmanWasRight',
+ 'DarkFuturology',
+ 'EverythingScience',
]
diff --git a/webclient/src/App.js b/webclient/src/App.js
index 617bfe9..e6bad10 100644
--- a/webclient/src/App.js
+++ b/webclient/src/App.js
@@ -66,7 +66,7 @@ class App extends React.Component {
QotNews - Feed
Theme: this.light()}>Light - this.dark()}>Dark
- Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode.
+ Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode.
diff --git a/webclient/src/utils.js b/webclient/src/utils.js
index 311ce6e..71b30ef 100644
--- a/webclient/src/utils.js
+++ b/webclient/src/utils.js
@@ -111,10 +111,11 @@ export class ForwardDot extends React.Component {
}
export const logos = {
- hackernews: "",
- reddit: "",
- tildes: "",
- manual: "",
+ hackernews: '',
+ reddit: '',
+ tildes: '',
+ lobsters: '',
+ manual: '',
substack: "",
"the bulletin": "",
webworm: "",