Merge remote-tracking branch 'tanner/master'

master
Jason Schwarzenberger 3 years ago
commit a6e1644ddf
  1. 1
      apiserver/.gitignore
  2. 2
      apiserver/database.py
  3. 9
      apiserver/feed.py
  4. 113
      apiserver/feeds/lobsters.py
  5. 5
      apiserver/server.py
  6. 6
      apiserver/settings.py.example
  7. 2
      webclient/src/App.js
  8. 9
      webclient/src/utils.js

@ -109,4 +109,5 @@ settings.py
data.db
data.db.bak
data/archive/*
data/backup/*
qotnews.sqlite

@ -5,7 +5,7 @@ from sqlalchemy.orm import sessionmaker
from sqlalchemy.exc import IntegrityError
from sqlalchemy.types import JSON
engine = create_engine('sqlite:///data/qotnews.sqlite')
engine = create_engine('sqlite:///data/qotnews.sqlite', connect_args={'timeout': 120})
Session = sessionmaker(bind=engine)
Base = declarative_base()

@ -9,13 +9,13 @@ from bs4 import BeautifulSoup
import itertools
import settings
from feeds import hackernews, reddit, tildes, substack, manual
from feeds import hackernews, reddit, tildes, substack, manual, lobsters
from feeds.sitemap import Sitemap
from feeds.category import Category
from scrapers import outline
from scrapers.declutter import declutter, headless, simple
INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com']
INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com', 'sec.gov']
substacks = {}
for key, value in settings.SUBSTACK.items():
@ -33,6 +33,9 @@ def get_list():
if settings.NUM_HACKERNEWS:
feeds['hackernews'] = [(x, 'hackernews', x) for x in hackernews.feed()[:settings.NUM_HACKERNEWS]]
if settings.NUM_LOBSTERS:
feed += [(x, 'lobsters', x) for x in lobsters.feed()[:settings.NUM_LOBSTERS]]
if settings.NUM_REDDIT:
feeds['reddit'] = [(x, 'reddit', x) for x in reddit.feed()[:settings.NUM_REDDIT]]
@ -107,6 +110,8 @@ def update_story(story, is_manual=False, urlref=None):
if story['source'] == 'hackernews':
res = hackernews.story(story['ref'])
elif story['source'] == 'lobsters':
res = lobsters.story(story['ref'])
elif story['source'] == 'reddit':
res = reddit.story(story['ref'])
elif story['source'] == 'tildes':

@ -0,0 +1,113 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
if __name__ == '__main__':
import sys
sys.path.insert(0,'.')
import requests
from datetime import datetime
from utils import clean
API_HOTTEST = lambda x: 'https://lobste.rs/hottest.json'
API_ITEM = lambda x : 'https://lobste.rs/s/{}.json'.format(x)
SITE_LINK = lambda x : 'https://lobste.rs/s/{}'.format(x)
SITE_AUTHOR_LINK = lambda x : 'https://lobste.rs/u/{}'.format(x)
def api(route, ref=None):
try:
r = requests.get(route(ref), timeout=5)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting lobsters API: {}, trying again'.format(str(e)))
try:
r = requests.get(route(ref), timeout=15)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting lobsters API: {}'.format(str(e)))
return False
def feed():
return [x['short_id'] for x in api(API_HOTTEST) or []]
def unix(date_str):
return int(datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S.%f%z').timestamp())
def make_comment(i):
c = {}
try:
c['author'] = i['commenting_user']['username']
except KeyError:
c['author'] = ''
c['score'] = i.get('score', 0)
try:
c['date'] = unix(i['created_at'])
except KeyError:
c['date'] = 0
c['text'] = clean(i.get('comment', '') or '')
c['comments'] = []
return c
def iter_comments(flat_comments):
nested_comments = []
parent_stack = []
for comment in flat_comments:
c = make_comment(comment)
indent = comment['indent_level']
if indent == 1:
nested_comments.append(c)
parent_stack = [c]
else:
parent_stack = parent_stack[:indent-1]
p = parent_stack[-1]
p['comments'].append(c)
parent_stack.append(c)
return nested_comments
def story(ref):
r = api(API_ITEM, ref)
if not r: return False
s = {}
try:
s['author'] = r['submitter_user']['username']
s['author_link'] = SITE_AUTHOR_LINK(s['author'])
except KeyError:
s['author'] = ''
s['author_link'] = ''
s['score'] = r.get('score', 0)
try:
s['date'] = unix(r['created_at'])
except KeyError:
s['date'] = 0
s['title'] = r.get('title', '')
s['link'] = SITE_LINK(ref)
s['url'] = r.get('url', '')
s['comments'] = iter_comments(r['comments'])
s['num_comments'] = r['comment_count']
if 'description' in r and r['description']:
s['text'] = clean(r['description'] or '')
return s
# scratchpad so I can quickly develop the parser
if __name__ == '__main__':
#print(feed())
import json
print(json.dumps(story('fzvd1v')))
#print(story(20802050))

@ -72,6 +72,9 @@ def submit():
elif 'tildes.net' in parse.hostname and '~' in url:
source = 'tildes'
ref = parse.path.split('/')[2]
elif 'lobste.rs' in parse.hostname and '/s/' in url:
source = 'lobsters'
ref = parse.path.split('/')[2]
elif 'reddit.com' in parse.hostname and 'comments' in url:
source = 'reddit'
ref = parse.path.split('/')[4]
@ -120,7 +123,7 @@ def index():
return render_template('index.html',
title='Feed',
url=settings.HOSTNAME,
description='Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode')
description='Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode')
@flask_app.route('/<sid>', strict_slashes=False)
@flask_app.route('/<sid>/c', strict_slashes=False)

@ -13,6 +13,7 @@ HEADLESS_READER_PORT = 33843
# Number of top items from each site to pull
# set to 0 to disable that site
NUM_HACKERNEWS = 15
NUM_LOBSTERS = 10
NUM_REDDIT = 10
NUM_TILDES = 5
NUM_SUBSTACK = 10
@ -74,8 +75,6 @@ SUBREDDITS = [
'HistoryofIdeas',
'LaymanJournals',
'PhilosophyofScience',
'PoliticsPDFs',
'Scholar',
'StateOfTheUnion',
'TheAgora',
'TrueFilm',
@ -89,4 +88,7 @@ SUBREDDITS = [
'neurophilosophy',
'resilientcommunities',
'worldevents',
'StallmanWasRight',
'DarkFuturology',
'EverythingScience',
]

@ -66,7 +66,7 @@ class App extends React.Component {
<Link to='/'>QotNews - Feed</Link>
<span className='theme'>Theme: <a href='#' onClick={() => this.light()}>Light</a> - <a href='#' onClick={() => this.dark()}>Dark</a></span>
<br />
<span className='slogan'>Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode.</span>
<span className='slogan'>Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode.</span>
</p>
<Route path='/(|search)' component={Search} />
<Route path='/(|search)' component={Submit} />

File diff suppressed because one or more lines are too long
Loading…
Cancel
Save