From d8a0b777651d33b20a0d192a07030ece40e88e58 Mon Sep 17 00:00:00 2001 From: Tanner Collin Date: Thu, 19 Nov 2020 21:37:59 +0000 Subject: [PATCH 1/4] Blacklist sec.gov website --- apiserver/feed.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apiserver/feed.py b/apiserver/feed.py index cbf36ed..5a884a9 100644 --- a/apiserver/feed.py +++ b/apiserver/feed.py @@ -13,7 +13,7 @@ from feeds import hackernews, reddit, tildes, manual OUTLINE_API = 'https://api.outline.com/v3/parse_article' READ_API = 'http://127.0.0.1:33843' -INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com'] +INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com', 'sec.gov'] TWO_DAYS = 60*60*24*2 def list(): From 42dcf15374893ab32c24a276e2b4caa83f5b5bfc Mon Sep 17 00:00:00 2001 From: Tanner Collin Date: Thu, 19 Nov 2020 21:38:18 +0000 Subject: [PATCH 2/4] Increase sqlite lock timeout --- apiserver/database.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apiserver/database.py b/apiserver/database.py index 24582c6..9b84b47 100644 --- a/apiserver/database.py +++ b/apiserver/database.py @@ -5,7 +5,7 @@ from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker from sqlalchemy.exc import IntegrityError -engine = create_engine('sqlite:///data/qotnews.sqlite') +engine = create_engine('sqlite:///data/qotnews.sqlite', connect_args={'timeout': 120}) Session = sessionmaker(bind=engine) Base = declarative_base() From fd9c9c888dfce2f6a7afdd80527212f335c4ef54 Mon Sep 17 00:00:00 2001 From: Tanner Collin Date: Fri, 11 Dec 2020 23:49:45 +0000 Subject: [PATCH 3/4] Update gitignore --- apiserver/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/apiserver/.gitignore b/apiserver/.gitignore index 23abd96..fe157dd 100644 --- a/apiserver/.gitignore +++ b/apiserver/.gitignore @@ -109,4 +109,5 @@ settings.py data.db data.db.bak data/archive/* +data/backup/* qotnews.sqlite From c9fb9bd5df486e654c014ac5f40c697174ef080c Mon Sep 17 00:00:00 2001 From: Tanner Collin Date: Sat, 12 Dec 2020 05:26:33 +0000 Subject: [PATCH 4/4] Add Lobsters to feed --- apiserver/feed.py | 7 ++- apiserver/feeds/lobsters.py | 113 ++++++++++++++++++++++++++++++++++ apiserver/server.py | 5 +- apiserver/settings.py.example | 6 +- webclient/src/App.js | 2 +- webclient/src/utils.js | 1 + 6 files changed, 129 insertions(+), 5 deletions(-) create mode 100644 apiserver/feeds/lobsters.py diff --git a/apiserver/feed.py b/apiserver/feed.py index 5a884a9..ad6753d 100644 --- a/apiserver/feed.py +++ b/apiserver/feed.py @@ -8,7 +8,7 @@ import time from bs4 import BeautifulSoup import settings -from feeds import hackernews, reddit, tildes, manual +from feeds import hackernews, reddit, tildes, manual, lobsters OUTLINE_API = 'https://api.outline.com/v3/parse_article' READ_API = 'http://127.0.0.1:33843' @@ -21,6 +21,9 @@ def list(): if settings.NUM_HACKERNEWS: feed += [(x, 'hackernews') for x in hackernews.feed()[:settings.NUM_HACKERNEWS]] + if settings.NUM_LOBSTERS: + feed += [(x, 'lobsters') for x in lobsters.feed()[:settings.NUM_LOBSTERS]] + if settings.NUM_REDDIT: feed += [(x, 'reddit') for x in reddit.feed()[:settings.NUM_REDDIT]] @@ -83,6 +86,8 @@ def update_story(story, is_manual=False): if story['source'] == 'hackernews': res = hackernews.story(story['ref']) + elif story['source'] == 'lobsters': + res = lobsters.story(story['ref']) elif story['source'] == 'reddit': res = reddit.story(story['ref']) elif story['source'] == 'tildes': diff --git a/apiserver/feeds/lobsters.py b/apiserver/feeds/lobsters.py new file mode 100644 index 0000000..c3fea2e --- /dev/null +++ b/apiserver/feeds/lobsters.py @@ -0,0 +1,113 @@ +import logging +logging.basicConfig( + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + level=logging.DEBUG) + +if __name__ == '__main__': + import sys + sys.path.insert(0,'.') + +import requests +from datetime import datetime + +from utils import clean + +API_HOTTEST = lambda x: 'https://lobste.rs/hottest.json' +API_ITEM = lambda x : 'https://lobste.rs/s/{}.json'.format(x) + +SITE_LINK = lambda x : 'https://lobste.rs/s/{}'.format(x) +SITE_AUTHOR_LINK = lambda x : 'https://lobste.rs/u/{}'.format(x) + +def api(route, ref=None): + try: + r = requests.get(route(ref), timeout=5) + if r.status_code != 200: + raise Exception('Bad response code ' + str(r.status_code)) + return r.json() + except KeyboardInterrupt: + raise + except BaseException as e: + logging.error('Problem hitting lobsters API: {}, trying again'.format(str(e))) + + try: + r = requests.get(route(ref), timeout=15) + if r.status_code != 200: + raise Exception('Bad response code ' + str(r.status_code)) + return r.json() + except KeyboardInterrupt: + raise + except BaseException as e: + logging.error('Problem hitting lobsters API: {}'.format(str(e))) + return False + +def feed(): + return [x['short_id'] for x in api(API_HOTTEST) or []] + +def unix(date_str): + return int(datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S.%f%z').timestamp()) + +def make_comment(i): + c = {} + try: + c['author'] = i['commenting_user']['username'] + except KeyError: + c['author'] = '' + c['score'] = i.get('score', 0) + try: + c['date'] = unix(i['created_at']) + except KeyError: + c['date'] = 0 + c['text'] = clean(i.get('comment', '') or '') + c['comments'] = [] + return c + +def iter_comments(flat_comments): + nested_comments = [] + parent_stack = [] + for comment in flat_comments: + c = make_comment(comment) + indent = comment['indent_level'] + + if indent == 1: + nested_comments.append(c) + parent_stack = [c] + else: + parent_stack = parent_stack[:indent-1] + p = parent_stack[-1] + p['comments'].append(c) + parent_stack.append(c) + return nested_comments + +def story(ref): + r = api(API_ITEM, ref) + if not r: return False + + s = {} + try: + s['author'] = r['submitter_user']['username'] + s['author_link'] = SITE_AUTHOR_LINK(s['author']) + except KeyError: + s['author'] = '' + s['author_link'] = '' + s['score'] = r.get('score', 0) + try: + s['date'] = unix(r['created_at']) + except KeyError: + s['date'] = 0 + s['title'] = r.get('title', '') + s['link'] = SITE_LINK(ref) + s['url'] = r.get('url', '') + s['comments'] = iter_comments(r['comments']) + s['num_comments'] = r['comment_count'] + + if 'description' in r and r['description']: + s['text'] = clean(r['description'] or '') + + return s + +# scratchpad so I can quickly develop the parser +if __name__ == '__main__': + #print(feed()) + import json + print(json.dumps(story('fzvd1v'))) + #print(story(20802050)) diff --git a/apiserver/server.py b/apiserver/server.py index cdf740f..c0756d3 100644 --- a/apiserver/server.py +++ b/apiserver/server.py @@ -70,6 +70,9 @@ def submit(): elif 'tildes.net' in parse.hostname and '~' in url: source = 'tildes' ref = parse.path.split('/')[2] + elif 'lobste.rs' in parse.hostname and '/s/' in url: + source = 'lobsters' + ref = parse.path.split('/')[2] elif 'reddit.com' in parse.hostname and 'comments' in url: source = 'reddit' ref = parse.path.split('/')[4] @@ -115,7 +118,7 @@ def index(): return render_template('index.html', title='Feed', url='news.t0.vc', - description='Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode') + description='Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode') @flask_app.route('/', strict_slashes=False) @flask_app.route('//c', strict_slashes=False) diff --git a/apiserver/settings.py.example b/apiserver/settings.py.example index 26f4fd6..9b99d2a 100644 --- a/apiserver/settings.py.example +++ b/apiserver/settings.py.example @@ -5,6 +5,7 @@ # Number of top items from each site to pull # set to 0 to disable that site NUM_HACKERNEWS = 15 +NUM_LOBSTERS = 10 NUM_REDDIT = 10 NUM_TILDES = 5 @@ -22,8 +23,6 @@ SUBREDDITS = [ 'HistoryofIdeas', 'LaymanJournals', 'PhilosophyofScience', - 'PoliticsPDFs', - 'Scholar', 'StateOfTheUnion', 'TheAgora', 'TrueFilm', @@ -37,4 +36,7 @@ SUBREDDITS = [ 'neurophilosophy', 'resilientcommunities', 'worldevents', + 'StallmanWasRight', + 'DarkFuturology', + 'EverythingScience', ] diff --git a/webclient/src/App.js b/webclient/src/App.js index 85cb474..d5ebac2 100644 --- a/webclient/src/App.js +++ b/webclient/src/App.js @@ -59,7 +59,7 @@ class App extends React.Component { QotNews - Feed Theme: this.light()}>Light - this.dark()}>Dark
- Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode. + Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode.

diff --git a/webclient/src/utils.js b/webclient/src/utils.js index a81aa29..f592dd6 100644 --- a/webclient/src/utils.js +++ b/webclient/src/utils.js @@ -61,5 +61,6 @@ export const logos = { hackernews: '', reddit: '', tildes: '', + lobsters: '', manual: '', };