Abstract api server feeds
This commit is contained in:
		
							
								
								
									
										3
									
								
								apiserver/.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								apiserver/.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -104,3 +104,6 @@ ENV/ | |||||||
|  |  | ||||||
| # DB | # DB | ||||||
| db.sqlite3 | db.sqlite3 | ||||||
|  |  | ||||||
|  | praw.ini | ||||||
|  | data.db | ||||||
|   | |||||||
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										41
									
								
								apiserver/feed.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								apiserver/feed.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,41 @@ | |||||||
|  | import logging | ||||||
|  | logging.basicConfig( | ||||||
|  |         format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | ||||||
|  |         level=logging.INFO) | ||||||
|  |  | ||||||
|  | import requests | ||||||
|  |  | ||||||
|  | from feeds import hackernews | ||||||
|  |  | ||||||
|  | READ_API = 'http://127.0.0.1:33843' | ||||||
|  |  | ||||||
|  | def list(): | ||||||
|  |     feed = [] | ||||||
|  |     feed += [(x, 'hackernews') for x in hackernews.feed()] | ||||||
|  |     return feed | ||||||
|  |  | ||||||
|  | def get_article(url): | ||||||
|  |     try: | ||||||
|  |         r = requests.post(READ_API, data=dict(url=url), timeout=10) | ||||||
|  |         if r.status_code != 200: | ||||||
|  |             raise | ||||||
|  |         return r.text | ||||||
|  |     except BaseException as e: | ||||||
|  |         logging.error('Problem getting article: {}'.format(str(e))) | ||||||
|  |         return '' | ||||||
|  |  | ||||||
|  | def update_story(story): | ||||||
|  |     res = {} | ||||||
|  |  | ||||||
|  |     if story['source'] == 'hackernews': | ||||||
|  |         res = hackernews.story(story['ref']) | ||||||
|  |     else: | ||||||
|  |         return | ||||||
|  |  | ||||||
|  |     if res: | ||||||
|  |         story.update(res) | ||||||
|  |     if story.get('url', '') and not story.get('text', ''): | ||||||
|  |         if not story['url'].endswith('.pdf'): | ||||||
|  |             story['text'] = get_article(story['url']) | ||||||
|  |         else: | ||||||
|  |             story['text'] = '<p>Unsupported article type.</p>' | ||||||
| @@ -54,7 +54,6 @@ def story(ref): | |||||||
|     s['date'] = r.get('created_at_i', 0) |     s['date'] = r.get('created_at_i', 0) | ||||||
|     s['title'] = r.get('title', '') |     s['title'] = r.get('title', '') | ||||||
|     s['link'] = SITE_LINK(ref) |     s['link'] = SITE_LINK(ref) | ||||||
|     s['source'] = 'hackernews' |  | ||||||
|     s['url'] = r.get('url', '') |     s['url'] = r.get('url', '') | ||||||
|     s['comments'] = [comment(i) for i in r['children']] |     s['comments'] = [comment(i) for i in r['children']] | ||||||
|     s['num_comments'] = comment_count(s) - 1 |     s['num_comments'] = comment_count(s) - 1 | ||||||
|   | |||||||
| @@ -7,17 +7,16 @@ import copy | |||||||
| import threading | import threading | ||||||
| import time | import time | ||||||
| import random | import random | ||||||
| import requests |  | ||||||
| import shelve | import shelve | ||||||
| import string | import string | ||||||
|  |  | ||||||
| from feeds import hackernews | import feed | ||||||
|  |  | ||||||
| from flask import abort, Flask, request | from flask import abort, Flask, request | ||||||
| from flask_cors import CORS | from flask_cors import CORS | ||||||
|  |  | ||||||
| CACHE_LENGTH = 300 | CACHE_LENGTH = 300 | ||||||
| DATA_FILE = 'data/data' | DATA_FILE = 'data/data' | ||||||
| READ_API = 'http://127.0.0.1:33843' |  | ||||||
|  |  | ||||||
| news_index = 0 | news_index = 0 | ||||||
|  |  | ||||||
| @@ -65,51 +64,33 @@ def new_id(): | |||||||
|             nid = gen_rand_id() |             nid = gen_rand_id() | ||||||
|     return nid |     return nid | ||||||
|  |  | ||||||
| def get_article(url): |  | ||||||
|     try: |  | ||||||
|         r = requests.post(READ_API, data=dict(url=url), timeout=10) |  | ||||||
|         if r.status_code != 200: |  | ||||||
|             raise |  | ||||||
|         return r.text |  | ||||||
|     except BaseException as e: |  | ||||||
|         logging.error('Problem getting article: {}'.format(str(e))) |  | ||||||
|         return '' |  | ||||||
|  |  | ||||||
| try: | try: | ||||||
|     while True: |     while True: | ||||||
|         if news_index == 0: |         if news_index == 0: | ||||||
|             feed = hackernews.feed() |             feed_list = feed.list() | ||||||
|             new_refs = [ref for ref in feed if ref not in news_list] |             new_items = [(ref, source) for ref, source in feed_list if ref not in news_list] | ||||||
|             for ref in new_refs: |             for ref, source in new_items: | ||||||
|                 news_list.insert(0, ref) |                 news_list.insert(0, ref) | ||||||
|                 nid = new_id() |                 nid = new_id() | ||||||
|                 news_ref_to_id[ref] = nid |                 news_ref_to_id[ref] = nid | ||||||
|                 news_cache[nid] = dict(id=nid, ref=ref) |                 news_cache[nid] = dict(id=nid, ref=ref, source=source) | ||||||
|  |  | ||||||
|             if len(new_refs): |             if len(new_items): | ||||||
|                 logging.info('Added {} new refs.'.format(len(new_refs))) |                 logging.info('Added {} new refs.'.format(len(new_items))) | ||||||
|  |  | ||||||
|             while len(news_list) > CACHE_LENGTH: |             while len(news_list) > CACHE_LENGTH: | ||||||
|                 old_ref = news_list.pop() |                 old_ref = news_list.pop() | ||||||
|                 old_story = news_cache.pop(news_ref_to_id[old_ref]) |                 old_story = news_cache.pop(news_ref_to_id[old_ref]) | ||||||
|                 old_id = news_ref_to_id.pop(old_ref) |                 old_id = news_ref_to_id.pop(old_ref) | ||||||
|                 logging.info('Removed ref {} id {}.'.format(old_ref, old_id)) |                 logging.info('Removed ref {} id {}.'.format(old_ref, old_id)) | ||||||
|                 if old_story and old_id: |                 with shelve.open(DATA_FILE) as db: | ||||||
|                     with shelve.open(DATA_FILE) as db: |                     db[old_id] = old_story | ||||||
|                         db[old_id] = old_story |  | ||||||
|  |  | ||||||
|         if news_index < len(news_list): |         if news_index < len(news_list): | ||||||
|             update_ref = news_list[news_index] |             update_ref = news_list[news_index] | ||||||
|             update_id = news_ref_to_id[update_ref] |             update_id = news_ref_to_id[update_ref] | ||||||
|             news_story = news_cache[update_id] |             news_story = news_cache[update_id] | ||||||
|             story = hackernews.story(update_ref) |             feed.update_story(news_story) | ||||||
|             if story: |  | ||||||
|                 news_story.update(story) |  | ||||||
|             if news_story.get('url', '') and not news_story.get('text', ''): |  | ||||||
|                 if not news_story['url'].endswith('.pdf'): |  | ||||||
|                     news_story['text'] = get_article(news_story['url']) |  | ||||||
|                 else: |  | ||||||
|                     news_story['text'] = '<p>Unsupported article type.</p>' |  | ||||||
|  |  | ||||||
|         time.sleep(1) |         time.sleep(1) | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user