diff --git a/apiserver/.gitignore b/apiserver/.gitignore index 028b0f4..f4e05cc 100644 --- a/apiserver/.gitignore +++ b/apiserver/.gitignore @@ -104,3 +104,6 @@ ENV/ # DB db.sqlite3 + +praw.ini +data.db diff --git a/apiserver/data/data.db b/apiserver/data/data.db deleted file mode 100644 index 2199644..0000000 Binary files a/apiserver/data/data.db and /dev/null differ diff --git a/apiserver/feed.py b/apiserver/feed.py new file mode 100644 index 0000000..fa8dc38 --- /dev/null +++ b/apiserver/feed.py @@ -0,0 +1,41 @@ +import logging +logging.basicConfig( + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + level=logging.INFO) + +import requests + +from feeds import hackernews + +READ_API = 'http://127.0.0.1:33843' + +def list(): + feed = [] + feed += [(x, 'hackernews') for x in hackernews.feed()] + return feed + +def get_article(url): + try: + r = requests.post(READ_API, data=dict(url=url), timeout=10) + if r.status_code != 200: + raise + return r.text + except BaseException as e: + logging.error('Problem getting article: {}'.format(str(e))) + return '' + +def update_story(story): + res = {} + + if story['source'] == 'hackernews': + res = hackernews.story(story['ref']) + else: + return + + if res: + story.update(res) + if story.get('url', '') and not story.get('text', ''): + if not story['url'].endswith('.pdf'): + story['text'] = get_article(story['url']) + else: + story['text'] = '
Unsupported article type.
' diff --git a/apiserver/feeds/hackernews.py b/apiserver/feeds/hackernews.py index 691533a..e1898b0 100644 --- a/apiserver/feeds/hackernews.py +++ b/apiserver/feeds/hackernews.py @@ -54,7 +54,6 @@ def story(ref): s['date'] = r.get('created_at_i', 0) s['title'] = r.get('title', '') s['link'] = SITE_LINK(ref) - s['source'] = 'hackernews' s['url'] = r.get('url', '') s['comments'] = [comment(i) for i in r['children']] s['num_comments'] = comment_count(s) - 1 diff --git a/apiserver/server.py b/apiserver/server.py index cd25afe..2af6b74 100644 --- a/apiserver/server.py +++ b/apiserver/server.py @@ -7,17 +7,16 @@ import copy import threading import time import random -import requests import shelve import string -from feeds import hackernews +import feed + from flask import abort, Flask, request from flask_cors import CORS CACHE_LENGTH = 300 DATA_FILE = 'data/data' -READ_API = 'http://127.0.0.1:33843' news_index = 0 @@ -65,51 +64,33 @@ def new_id(): nid = gen_rand_id() return nid -def get_article(url): - try: - r = requests.post(READ_API, data=dict(url=url), timeout=10) - if r.status_code != 200: - raise - return r.text - except BaseException as e: - logging.error('Problem getting article: {}'.format(str(e))) - return '' - try: while True: if news_index == 0: - feed = hackernews.feed() - new_refs = [ref for ref in feed if ref not in news_list] - for ref in new_refs: + feed_list = feed.list() + new_items = [(ref, source) for ref, source in feed_list if ref not in news_list] + for ref, source in new_items: news_list.insert(0, ref) nid = new_id() news_ref_to_id[ref] = nid - news_cache[nid] = dict(id=nid, ref=ref) + news_cache[nid] = dict(id=nid, ref=ref, source=source) - if len(new_refs): - logging.info('Added {} new refs.'.format(len(new_refs))) + if len(new_items): + logging.info('Added {} new refs.'.format(len(new_items))) while len(news_list) > CACHE_LENGTH: old_ref = news_list.pop() old_story = news_cache.pop(news_ref_to_id[old_ref]) old_id = news_ref_to_id.pop(old_ref) logging.info('Removed ref {} id {}.'.format(old_ref, old_id)) - if old_story and old_id: - with shelve.open(DATA_FILE) as db: - db[old_id] = old_story + with shelve.open(DATA_FILE) as db: + db[old_id] = old_story if news_index < len(news_list): update_ref = news_list[news_index] update_id = news_ref_to_id[update_ref] news_story = news_cache[update_id] - story = hackernews.story(update_ref) - if story: - news_story.update(story) - if news_story.get('url', '') and not news_story.get('text', ''): - if not news_story['url'].endswith('.pdf'): - news_story['text'] = get_article(news_story['url']) - else: - news_story['text'] = 'Unsupported article type.
' + feed.update_story(news_story) time.sleep(1)