Abstract api server feeds

This commit is contained in:
Tanner Collin 2019-08-24 08:49:11 +00:00
parent 82074eb8aa
commit d341d4422f
5 changed files with 55 additions and 31 deletions

View File

@ -104,3 +104,6 @@ ENV/
# DB # DB
db.sqlite3 db.sqlite3
praw.ini
data.db

Binary file not shown.

41
apiserver/feed.py Normal file
View File

@ -0,0 +1,41 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.INFO)
import requests
from feeds import hackernews
READ_API = 'http://127.0.0.1:33843'
def list():
feed = []
feed += [(x, 'hackernews') for x in hackernews.feed()]
return feed
def get_article(url):
try:
r = requests.post(READ_API, data=dict(url=url), timeout=10)
if r.status_code != 200:
raise
return r.text
except BaseException as e:
logging.error('Problem getting article: {}'.format(str(e)))
return ''
def update_story(story):
res = {}
if story['source'] == 'hackernews':
res = hackernews.story(story['ref'])
else:
return
if res:
story.update(res)
if story.get('url', '') and not story.get('text', ''):
if not story['url'].endswith('.pdf'):
story['text'] = get_article(story['url'])
else:
story['text'] = '<p>Unsupported article type.</p>'

View File

@ -54,7 +54,6 @@ def story(ref):
s['date'] = r.get('created_at_i', 0) s['date'] = r.get('created_at_i', 0)
s['title'] = r.get('title', '') s['title'] = r.get('title', '')
s['link'] = SITE_LINK(ref) s['link'] = SITE_LINK(ref)
s['source'] = 'hackernews'
s['url'] = r.get('url', '') s['url'] = r.get('url', '')
s['comments'] = [comment(i) for i in r['children']] s['comments'] = [comment(i) for i in r['children']]
s['num_comments'] = comment_count(s) - 1 s['num_comments'] = comment_count(s) - 1

View File

@ -7,17 +7,16 @@ import copy
import threading import threading
import time import time
import random import random
import requests
import shelve import shelve
import string import string
from feeds import hackernews import feed
from flask import abort, Flask, request from flask import abort, Flask, request
from flask_cors import CORS from flask_cors import CORS
CACHE_LENGTH = 300 CACHE_LENGTH = 300
DATA_FILE = 'data/data' DATA_FILE = 'data/data'
READ_API = 'http://127.0.0.1:33843'
news_index = 0 news_index = 0
@ -65,51 +64,33 @@ def new_id():
nid = gen_rand_id() nid = gen_rand_id()
return nid return nid
def get_article(url):
try:
r = requests.post(READ_API, data=dict(url=url), timeout=10)
if r.status_code != 200:
raise
return r.text
except BaseException as e:
logging.error('Problem getting article: {}'.format(str(e)))
return ''
try: try:
while True: while True:
if news_index == 0: if news_index == 0:
feed = hackernews.feed() feed_list = feed.list()
new_refs = [ref for ref in feed if ref not in news_list] new_items = [(ref, source) for ref, source in feed_list if ref not in news_list]
for ref in new_refs: for ref, source in new_items:
news_list.insert(0, ref) news_list.insert(0, ref)
nid = new_id() nid = new_id()
news_ref_to_id[ref] = nid news_ref_to_id[ref] = nid
news_cache[nid] = dict(id=nid, ref=ref) news_cache[nid] = dict(id=nid, ref=ref, source=source)
if len(new_refs): if len(new_items):
logging.info('Added {} new refs.'.format(len(new_refs))) logging.info('Added {} new refs.'.format(len(new_items)))
while len(news_list) > CACHE_LENGTH: while len(news_list) > CACHE_LENGTH:
old_ref = news_list.pop() old_ref = news_list.pop()
old_story = news_cache.pop(news_ref_to_id[old_ref]) old_story = news_cache.pop(news_ref_to_id[old_ref])
old_id = news_ref_to_id.pop(old_ref) old_id = news_ref_to_id.pop(old_ref)
logging.info('Removed ref {} id {}.'.format(old_ref, old_id)) logging.info('Removed ref {} id {}.'.format(old_ref, old_id))
if old_story and old_id: with shelve.open(DATA_FILE) as db:
with shelve.open(DATA_FILE) as db: db[old_id] = old_story
db[old_id] = old_story
if news_index < len(news_list): if news_index < len(news_list):
update_ref = news_list[news_index] update_ref = news_list[news_index]
update_id = news_ref_to_id[update_ref] update_id = news_ref_to_id[update_ref]
news_story = news_cache[update_id] news_story = news_cache[update_id]
story = hackernews.story(update_ref) feed.update_story(news_story)
if story:
news_story.update(story)
if news_story.get('url', '') and not news_story.get('text', ''):
if not news_story['url'].endswith('.pdf'):
news_story['text'] = get_article(news_story['url'])
else:
news_story['text'] = '<p>Unsupported article type.</p>'
time.sleep(1) time.sleep(1)