Abstract api server feeds
This commit is contained in:
		
							
								
								
									
										3
									
								
								apiserver/.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								apiserver/.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -104,3 +104,6 @@ ENV/ | ||||
|  | ||||
| # DB | ||||
| db.sqlite3 | ||||
|  | ||||
| praw.ini | ||||
| data.db | ||||
|   | ||||
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										41
									
								
								apiserver/feed.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								apiserver/feed.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,41 @@ | ||||
| import logging | ||||
| logging.basicConfig( | ||||
|         format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | ||||
|         level=logging.INFO) | ||||
|  | ||||
| import requests | ||||
|  | ||||
| from feeds import hackernews | ||||
|  | ||||
| READ_API = 'http://127.0.0.1:33843' | ||||
|  | ||||
| def list(): | ||||
|     feed = [] | ||||
|     feed += [(x, 'hackernews') for x in hackernews.feed()] | ||||
|     return feed | ||||
|  | ||||
| def get_article(url): | ||||
|     try: | ||||
|         r = requests.post(READ_API, data=dict(url=url), timeout=10) | ||||
|         if r.status_code != 200: | ||||
|             raise | ||||
|         return r.text | ||||
|     except BaseException as e: | ||||
|         logging.error('Problem getting article: {}'.format(str(e))) | ||||
|         return '' | ||||
|  | ||||
| def update_story(story): | ||||
|     res = {} | ||||
|  | ||||
|     if story['source'] == 'hackernews': | ||||
|         res = hackernews.story(story['ref']) | ||||
|     else: | ||||
|         return | ||||
|  | ||||
|     if res: | ||||
|         story.update(res) | ||||
|     if story.get('url', '') and not story.get('text', ''): | ||||
|         if not story['url'].endswith('.pdf'): | ||||
|             story['text'] = get_article(story['url']) | ||||
|         else: | ||||
|             story['text'] = '<p>Unsupported article type.</p>' | ||||
| @@ -54,7 +54,6 @@ def story(ref): | ||||
|     s['date'] = r.get('created_at_i', 0) | ||||
|     s['title'] = r.get('title', '') | ||||
|     s['link'] = SITE_LINK(ref) | ||||
|     s['source'] = 'hackernews' | ||||
|     s['url'] = r.get('url', '') | ||||
|     s['comments'] = [comment(i) for i in r['children']] | ||||
|     s['num_comments'] = comment_count(s) - 1 | ||||
|   | ||||
| @@ -7,17 +7,16 @@ import copy | ||||
| import threading | ||||
| import time | ||||
| import random | ||||
| import requests | ||||
| import shelve | ||||
| import string | ||||
|  | ||||
| from feeds import hackernews | ||||
| import feed | ||||
|  | ||||
| from flask import abort, Flask, request | ||||
| from flask_cors import CORS | ||||
|  | ||||
| CACHE_LENGTH = 300 | ||||
| DATA_FILE = 'data/data' | ||||
| READ_API = 'http://127.0.0.1:33843' | ||||
|  | ||||
| news_index = 0 | ||||
|  | ||||
| @@ -65,51 +64,33 @@ def new_id(): | ||||
|             nid = gen_rand_id() | ||||
|     return nid | ||||
|  | ||||
| def get_article(url): | ||||
|     try: | ||||
|         r = requests.post(READ_API, data=dict(url=url), timeout=10) | ||||
|         if r.status_code != 200: | ||||
|             raise | ||||
|         return r.text | ||||
|     except BaseException as e: | ||||
|         logging.error('Problem getting article: {}'.format(str(e))) | ||||
|         return '' | ||||
|  | ||||
| try: | ||||
|     while True: | ||||
|         if news_index == 0: | ||||
|             feed = hackernews.feed() | ||||
|             new_refs = [ref for ref in feed if ref not in news_list] | ||||
|             for ref in new_refs: | ||||
|             feed_list = feed.list() | ||||
|             new_items = [(ref, source) for ref, source in feed_list if ref not in news_list] | ||||
|             for ref, source in new_items: | ||||
|                 news_list.insert(0, ref) | ||||
|                 nid = new_id() | ||||
|                 news_ref_to_id[ref] = nid | ||||
|                 news_cache[nid] = dict(id=nid, ref=ref) | ||||
|                 news_cache[nid] = dict(id=nid, ref=ref, source=source) | ||||
|  | ||||
|             if len(new_refs): | ||||
|                 logging.info('Added {} new refs.'.format(len(new_refs))) | ||||
|             if len(new_items): | ||||
|                 logging.info('Added {} new refs.'.format(len(new_items))) | ||||
|  | ||||
|             while len(news_list) > CACHE_LENGTH: | ||||
|                 old_ref = news_list.pop() | ||||
|                 old_story = news_cache.pop(news_ref_to_id[old_ref]) | ||||
|                 old_id = news_ref_to_id.pop(old_ref) | ||||
|                 logging.info('Removed ref {} id {}.'.format(old_ref, old_id)) | ||||
|                 if old_story and old_id: | ||||
|                     with shelve.open(DATA_FILE) as db: | ||||
|                         db[old_id] = old_story | ||||
|                 with shelve.open(DATA_FILE) as db: | ||||
|                     db[old_id] = old_story | ||||
|  | ||||
|         if news_index < len(news_list): | ||||
|             update_ref = news_list[news_index] | ||||
|             update_id = news_ref_to_id[update_ref] | ||||
|             news_story = news_cache[update_id] | ||||
|             story = hackernews.story(update_ref) | ||||
|             if story: | ||||
|                 news_story.update(story) | ||||
|             if news_story.get('url', '') and not news_story.get('text', ''): | ||||
|                 if not news_story['url'].endswith('.pdf'): | ||||
|                     news_story['text'] = get_article(news_story['url']) | ||||
|                 else: | ||||
|                     news_story['text'] = '<p>Unsupported article type.</p>' | ||||
|             feed.update_story(news_story) | ||||
|  | ||||
|         time.sleep(1) | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user