Write news stories to disk
This commit is contained in:
		
							
								
								
									
										
											BIN
										
									
								
								apiserver/data/data.db
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								apiserver/data/data.db
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| @@ -1,3 +1,8 @@ | ||||
| import logging | ||||
| logging.basicConfig( | ||||
|         format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | ||||
|         level=logging.INFO) | ||||
|  | ||||
| import requests | ||||
|  | ||||
| API_TOPSTORIES = lambda x: 'https://hacker-news.firebaseio.com/v0/topstories.json' | ||||
| @@ -7,11 +12,17 @@ SITE_LINK = lambda x : 'https://news.ycombinator.com/item?id={}'.format(x) | ||||
| SITE_AUTHOR_LINK = lambda x : 'https://news.ycombinator.com/user?id={}'.format(x) | ||||
|  | ||||
| def api(route, ref=None): | ||||
|     r = requests.get(route(ref), timeout=5) | ||||
|     return r.json() | ||||
|     try: | ||||
|         r = requests.get(route(ref), timeout=5) | ||||
|         if r.status_code != 200: | ||||
|             raise | ||||
|         return r.json() | ||||
|     except BaseException as e: | ||||
|         logging.error('Problem hitting hackernews API: {}'.format(str(e))) | ||||
|         return False | ||||
|  | ||||
| def feed(): | ||||
|     return api(API_TOPSTORIES)[:30] | ||||
|     return api(API_TOPSTORIES)[:30] or [] | ||||
|  | ||||
| def comment(i): | ||||
|     c = {} | ||||
| @@ -29,6 +40,7 @@ def comment_count(i): | ||||
|  | ||||
| def story(ref): | ||||
|     r = api(API_ITEM, ref) | ||||
|     if not r: return False | ||||
|  | ||||
|     if 'deleted' in r: | ||||
|         return False | ||||
|   | ||||
| @@ -8,6 +8,7 @@ import threading | ||||
| import time | ||||
| import random | ||||
| import requests | ||||
| import shelve | ||||
| import string | ||||
|  | ||||
| from feeds import hackernews | ||||
| @@ -15,12 +16,16 @@ from flask import abort, Flask, request | ||||
| from flask_cors import CORS | ||||
|  | ||||
| CACHE_LENGTH = 300 | ||||
| DATA_FILE = 'data/data' | ||||
| READ_API = 'http://127.0.0.1:33843' | ||||
|  | ||||
| news_index = 0 | ||||
| news_list = [] | ||||
| news_ref_to_id = {} | ||||
| news_cache = {} | ||||
|  | ||||
| with shelve.open(DATA_FILE) as db: | ||||
|     logging.info('Reading caches from disk...') | ||||
|     news_list = db.get('news_list', []) | ||||
|     news_ref_to_id = db.get('news_ref_to_id', {}) | ||||
|     news_cache = db.get('news_cache', {}) | ||||
|  | ||||
| flask_app = Flask(__name__) | ||||
| cors = CORS(flask_app) | ||||
| @@ -38,58 +43,82 @@ def index(): | ||||
| def comments(id): | ||||
|     if id in news_cache: | ||||
|         return {'story': news_cache[id]} | ||||
|     else: | ||||
|         abort(404) | ||||
|  | ||||
|     with shelve.open(DATA_FILE) as db: | ||||
|         if id in db: | ||||
|             return {'story': db[id]} | ||||
|  | ||||
|     abort(404) | ||||
|  | ||||
| print('Starting Flask...') | ||||
| web_thread = threading.Thread(target=flask_app.run, kwargs={'port': 33842}) | ||||
| web_thread.setDaemon(True) | ||||
| web_thread.start() | ||||
|  | ||||
| def new_id(): | ||||
| def gen_rand_id(): | ||||
|     return ''.join(random.choice(string.ascii_uppercase) for _ in range(4)) | ||||
|  | ||||
| def new_id(): | ||||
|     nid = gen_rand_id() | ||||
|     with shelve.open(DATA_FILE) as db: | ||||
|         while nid in news_cache or nid in db: | ||||
|             nid = gen_rand_id() | ||||
|     return nid | ||||
|  | ||||
| def get_article(url): | ||||
|     try: | ||||
|         r = requests.post(READ_API, data=dict(url=url), timeout=10) | ||||
|  | ||||
|         if r.status_code != 200: | ||||
|             raise | ||||
|  | ||||
|         return r.text | ||||
|     except: | ||||
|         return '<p>Problem parsing article :(</p>' | ||||
|     except BaseException as e: | ||||
|         logging.error('Problem getting article: {}'.format(str(e))) | ||||
|         return '' | ||||
|  | ||||
| while True: | ||||
|     if news_index == 0: | ||||
|         feed = hackernews.feed() | ||||
|         new_refs = [ref for ref in feed if ref not in news_list] | ||||
|         for ref in new_refs: | ||||
|             news_list.insert(0, ref) | ||||
|             nid = new_id() | ||||
|             news_ref_to_id[ref] = nid | ||||
|             news_cache[nid] = dict(id=nid, ref=ref) | ||||
| try: | ||||
|     while True: | ||||
|         if news_index == 0: | ||||
|             feed = hackernews.feed() | ||||
|             new_refs = [ref for ref in feed if ref not in news_list] | ||||
|             for ref in new_refs: | ||||
|                 news_list.insert(0, ref) | ||||
|                 nid = new_id() | ||||
|                 news_ref_to_id[ref] = nid | ||||
|                 news_cache[nid] = dict(id=nid, ref=ref) | ||||
|  | ||||
|         if len(new_refs): | ||||
|             logging.info('Added {} new refs.'.format(len(new_refs))) | ||||
|             if len(new_refs): | ||||
|                 logging.info('Added {} new refs.'.format(len(new_refs))) | ||||
|  | ||||
|         while len(news_list) > CACHE_LENGTH: | ||||
|             old_ref = news_list.pop() | ||||
|             del news_cache[news_ref_to_id[old_ref]] | ||||
|             del news_ref_to_id[old_ref] | ||||
|             logging.info('Removed ref {}.'.format(old_ref)) | ||||
|             while len(news_list) > CACHE_LENGTH: | ||||
|                 old_ref = news_list.pop() | ||||
|                 old_story = news_cache.pop(news_ref_to_id[old_ref]) | ||||
|                 old_id = news_ref_to_id.pop(old_ref) | ||||
|                 logging.info('Removed ref {} id {}.'.format(old_ref, old_id)) | ||||
|                 if old_story and old_id: | ||||
|                     with shelve.open(DATA_FILE) as db: | ||||
|                         db[old_id] = old_story | ||||
|  | ||||
|     if news_index < len(news_list): | ||||
|         update_ref = news_list[news_index] | ||||
|         update_id = news_ref_to_id[update_ref] | ||||
|         news_story = news_cache[update_id] | ||||
|         story = hackernews.story(update_ref) | ||||
|         if story: | ||||
|             news_story.update(story) | ||||
|         if news_story.get('url', '') and not news_story.get('text', ''): | ||||
|             news_story['text'] = get_article(news_story['url']) | ||||
|         if news_index < len(news_list): | ||||
|             update_ref = news_list[news_index] | ||||
|             update_id = news_ref_to_id[update_ref] | ||||
|             news_story = news_cache[update_id] | ||||
|             story = hackernews.story(update_ref) | ||||
|             if story: | ||||
|                 news_story.update(story) | ||||
|             if news_story.get('url', '') and not news_story.get('text', ''): | ||||
|                 if not news_story['url'].endswith('.pdf'): | ||||
|                     news_story['text'] = get_article(news_story['url']) | ||||
|                 else: | ||||
|                     news_story['text'] = '<p>Unsupported article type.</p>' | ||||
|  | ||||
|     time.sleep(1) | ||||
|         time.sleep(1) | ||||
|  | ||||
|     news_index += 1 | ||||
|     if news_index == CACHE_LENGTH: news_index = 0 | ||||
|         news_index += 1 | ||||
|         if news_index == CACHE_LENGTH: news_index = 0 | ||||
|  | ||||
| finally: | ||||
|     with shelve.open(DATA_FILE) as db: | ||||
|         logging.info('Writing caches to disk...') | ||||
|         db['news_list'] = news_list | ||||
|         db['news_ref_to_id'] = news_ref_to_id | ||||
|         db['news_cache'] = news_cache | ||||
|   | ||||
		Reference in New Issue
	
	Block a user