Abstract api server feeds

This commit is contained in:
Tanner Collin 2019-08-24 08:49:11 +00:00
parent 82074eb8aa
commit d341d4422f
5 changed files with 55 additions and 31 deletions

View File

@ -104,3 +104,6 @@ ENV/
# DB
db.sqlite3
praw.ini
data.db

Binary file not shown.

41
apiserver/feed.py Normal file
View File

@ -0,0 +1,41 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.INFO)
import requests
from feeds import hackernews
READ_API = 'http://127.0.0.1:33843'
def list():
feed = []
feed += [(x, 'hackernews') for x in hackernews.feed()]
return feed
def get_article(url):
try:
r = requests.post(READ_API, data=dict(url=url), timeout=10)
if r.status_code != 200:
raise
return r.text
except BaseException as e:
logging.error('Problem getting article: {}'.format(str(e)))
return ''
def update_story(story):
res = {}
if story['source'] == 'hackernews':
res = hackernews.story(story['ref'])
else:
return
if res:
story.update(res)
if story.get('url', '') and not story.get('text', ''):
if not story['url'].endswith('.pdf'):
story['text'] = get_article(story['url'])
else:
story['text'] = '<p>Unsupported article type.</p>'

View File

@ -54,7 +54,6 @@ def story(ref):
s['date'] = r.get('created_at_i', 0)
s['title'] = r.get('title', '')
s['link'] = SITE_LINK(ref)
s['source'] = 'hackernews'
s['url'] = r.get('url', '')
s['comments'] = [comment(i) for i in r['children']]
s['num_comments'] = comment_count(s) - 1

View File

@ -7,17 +7,16 @@ import copy
import threading
import time
import random
import requests
import shelve
import string
from feeds import hackernews
import feed
from flask import abort, Flask, request
from flask_cors import CORS
CACHE_LENGTH = 300
DATA_FILE = 'data/data'
READ_API = 'http://127.0.0.1:33843'
news_index = 0
@ -65,36 +64,25 @@ def new_id():
nid = gen_rand_id()
return nid
def get_article(url):
try:
r = requests.post(READ_API, data=dict(url=url), timeout=10)
if r.status_code != 200:
raise
return r.text
except BaseException as e:
logging.error('Problem getting article: {}'.format(str(e)))
return ''
try:
while True:
if news_index == 0:
feed = hackernews.feed()
new_refs = [ref for ref in feed if ref not in news_list]
for ref in new_refs:
feed_list = feed.list()
new_items = [(ref, source) for ref, source in feed_list if ref not in news_list]
for ref, source in new_items:
news_list.insert(0, ref)
nid = new_id()
news_ref_to_id[ref] = nid
news_cache[nid] = dict(id=nid, ref=ref)
news_cache[nid] = dict(id=nid, ref=ref, source=source)
if len(new_refs):
logging.info('Added {} new refs.'.format(len(new_refs)))
if len(new_items):
logging.info('Added {} new refs.'.format(len(new_items)))
while len(news_list) > CACHE_LENGTH:
old_ref = news_list.pop()
old_story = news_cache.pop(news_ref_to_id[old_ref])
old_id = news_ref_to_id.pop(old_ref)
logging.info('Removed ref {} id {}.'.format(old_ref, old_id))
if old_story and old_id:
with shelve.open(DATA_FILE) as db:
db[old_id] = old_story
@ -102,14 +90,7 @@ try:
update_ref = news_list[news_index]
update_id = news_ref_to_id[update_ref]
news_story = news_cache[update_id]
story = hackernews.story(update_ref)
if story:
news_story.update(story)
if news_story.get('url', '') and not news_story.get('text', ''):
if not news_story['url'].endswith('.pdf'):
news_story['text'] = get_article(news_story['url'])
else:
news_story['text'] = '<p>Unsupported article type.</p>'
feed.update_story(news_story)
time.sleep(1)