From 62d68da415704425b1b1c623d93acfbbe7326e16 Mon Sep 17 00:00:00 2001 From: Tanner Collin Date: Fri, 23 Aug 2019 08:23:48 +0000 Subject: [PATCH] Finish prototype api server --- apiserver/feeds/hackernews.py | 56 +++++++++++++++++++++ apiserver/server.py | 95 +++++++++++++++++++++++++++++++++++ 2 files changed, 151 insertions(+) create mode 100644 apiserver/feeds/hackernews.py create mode 100644 apiserver/server.py diff --git a/apiserver/feeds/hackernews.py b/apiserver/feeds/hackernews.py new file mode 100644 index 0000000..b8fc8a7 --- /dev/null +++ b/apiserver/feeds/hackernews.py @@ -0,0 +1,56 @@ +import requests + +API_TOPSTORIES = lambda x: 'https://hacker-news.firebaseio.com/v0/topstories.json' +API_ITEM = lambda x : 'https://hn.algolia.com/api/v1/items/{}'.format(x) + +SITE_LINK = lambda x : 'https://news.ycombinator.com/item?id={}'.format(x) +SITE_AUTHOR_LINK = lambda x : 'https://news.ycombinator.com/user?id={}'.format(x) + +def api(route, ref=None): + r = requests.get(route(ref), timeout=5) + return r.json() + +def feed(): + return api(API_TOPSTORIES)[:30] + +def comment(i): + c = {} + c['author'] = i.get('author', '') + c['score'] = i.get('points', 0) + c['date'] = i.get('created_at_i', 0) + c['text'] = i.get('text', '') + c['link'] = SITE_LINK(i['id']) + c['comments'] = [comment(j) for j in i['children']] + return c + +def comment_count(i): + alive = 1 if i['author'] else 0 + return sum([comment_count(c) for c in i['comments']]) + alive + +def story(ref): + r = api(API_ITEM, ref) + + if 'deleted' in r: + return False + elif r.get('type', '') != 'story': + return False + + s = {} + s['author'] = r.get('author', '') + s['author_link'] = SITE_AUTHOR_LINK(r.get('author', '')) + s['score'] = r.get('points', 0) + s['date'] = r.get('created_at_i', 0) + s['title'] = r.get('title', '') + s['link'] = SITE_LINK(ref) + s['source'] = 'hackernews' + s['url'] = r.get('url', '') + s['comments'] = [comment(i) for i in r['children']] + s['num_comments'] = comment_count(s) - 1 + + if 'text' in r and r['text']: + s['text'] = r['text'] + + return s + +if __name__ == '__main__': + print(story(20763961)) diff --git a/apiserver/server.py b/apiserver/server.py new file mode 100644 index 0000000..1f43635 --- /dev/null +++ b/apiserver/server.py @@ -0,0 +1,95 @@ +import logging +logging.basicConfig( + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + level=logging.INFO) + +import copy +import threading +import time +import random +import requests +import string + +from feeds import hackernews +from flask import abort, Flask, request +from flask_cors import CORS + +CACHE_LENGTH = 300 +READ_API = 'http://127.0.0.1:33843' + +news_index = 0 +news_list = [] +news_ref_to_id = {} +news_cache = {} + +flask_app = Flask(__name__) +cors = CORS(flask_app) + +@flask_app.route('/') +def index(): + front_page = [news_cache[news_ref_to_id[ref]] for ref in news_list] + front_page = [copy.deepcopy(x) for x in front_page if 'title' in x] + for story in front_page: + if 'comments' in story: story.pop('comments') + if 'text' in story: story.pop('text') + return {'stories': front_page} + +@flask_app.route('/') +def comments(id): + if id in news_cache: + return {'story': news_cache[id]} + else: + abort(404) + +print('Starting Flask...') +web_thread = threading.Thread(target=flask_app.run, kwargs={'port': 33842}) +web_thread.setDaemon(True) +web_thread.start() + +def new_id(): + return ''.join(random.choice(string.ascii_uppercase) for _ in range(4)) + +def get_article(url): + try: + r = requests.post(READ_API, data=dict(url=url), timeout=10) + + if r.status_code != 200: + raise + + return r.text + except: + return '

Problem parsing article :(

' + +while True: + if news_index == 0: + feed = hackernews.feed() + new_refs = [ref for ref in feed if ref not in news_list] + for ref in new_refs: + news_list.insert(0, ref) + nid = new_id() + news_ref_to_id[ref] = nid + news_cache[nid] = dict(id=nid, ref=ref) + + if len(new_refs): + logging.info('Added {} new refs.'.format(len(new_refs))) + + while len(news_list) > CACHE_LENGTH: + old_ref = news_list.pop() + del news_cache[news_ref_to_id[old_ref]] + del news_ref_to_id[old_ref] + logging.info('Removed ref {}.'.format(old_ref)) + + if news_index < len(news_list): + update_ref = news_list[news_index] + update_id = news_ref_to_id[update_ref] + news_story = news_cache[update_id] + story = hackernews.story(update_ref) + if story: + news_story.update(story) + if news_story.get('url', '') and not news_story.get('text', ''): + news_story['text'] = get_article(news_story['url']) + + time.sleep(1) + + news_index += 1 + if news_index == CACHE_LENGTH: news_index = 0