forked from tanner/qotnews
		
	Write news stories to disk
This commit is contained in:
		
							
								
								
									
										
											BIN
										
									
								
								apiserver/data/data.db
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								apiserver/data/data.db
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							@@ -1,3 +1,8 @@
 | 
				
			|||||||
 | 
					import logging
 | 
				
			||||||
 | 
					logging.basicConfig(
 | 
				
			||||||
 | 
					        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 | 
				
			||||||
 | 
					        level=logging.INFO)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import requests
 | 
					import requests
 | 
				
			||||||
 | 
					
 | 
				
			||||||
API_TOPSTORIES = lambda x: 'https://hacker-news.firebaseio.com/v0/topstories.json'
 | 
					API_TOPSTORIES = lambda x: 'https://hacker-news.firebaseio.com/v0/topstories.json'
 | 
				
			||||||
@@ -7,11 +12,17 @@ SITE_LINK = lambda x : 'https://news.ycombinator.com/item?id={}'.format(x)
 | 
				
			|||||||
SITE_AUTHOR_LINK = lambda x : 'https://news.ycombinator.com/user?id={}'.format(x)
 | 
					SITE_AUTHOR_LINK = lambda x : 'https://news.ycombinator.com/user?id={}'.format(x)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def api(route, ref=None):
 | 
					def api(route, ref=None):
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
        r = requests.get(route(ref), timeout=5)
 | 
					        r = requests.get(route(ref), timeout=5)
 | 
				
			||||||
 | 
					        if r.status_code != 200:
 | 
				
			||||||
 | 
					            raise
 | 
				
			||||||
        return r.json()
 | 
					        return r.json()
 | 
				
			||||||
 | 
					    except BaseException as e:
 | 
				
			||||||
 | 
					        logging.error('Problem hitting hackernews API: {}'.format(str(e)))
 | 
				
			||||||
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def feed():
 | 
					def feed():
 | 
				
			||||||
    return api(API_TOPSTORIES)[:30]
 | 
					    return api(API_TOPSTORIES)[:30] or []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def comment(i):
 | 
					def comment(i):
 | 
				
			||||||
    c = {}
 | 
					    c = {}
 | 
				
			||||||
@@ -29,6 +40,7 @@ def comment_count(i):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
def story(ref):
 | 
					def story(ref):
 | 
				
			||||||
    r = api(API_ITEM, ref)
 | 
					    r = api(API_ITEM, ref)
 | 
				
			||||||
 | 
					    if not r: return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if 'deleted' in r:
 | 
					    if 'deleted' in r:
 | 
				
			||||||
        return False
 | 
					        return False
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -8,6 +8,7 @@ import threading
 | 
				
			|||||||
import time
 | 
					import time
 | 
				
			||||||
import random
 | 
					import random
 | 
				
			||||||
import requests
 | 
					import requests
 | 
				
			||||||
 | 
					import shelve
 | 
				
			||||||
import string
 | 
					import string
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from feeds import hackernews
 | 
					from feeds import hackernews
 | 
				
			||||||
@@ -15,12 +16,16 @@ from flask import abort, Flask, request
 | 
				
			|||||||
from flask_cors import CORS
 | 
					from flask_cors import CORS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CACHE_LENGTH = 300
 | 
					CACHE_LENGTH = 300
 | 
				
			||||||
 | 
					DATA_FILE = 'data/data'
 | 
				
			||||||
READ_API = 'http://127.0.0.1:33843'
 | 
					READ_API = 'http://127.0.0.1:33843'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
news_index = 0
 | 
					news_index = 0
 | 
				
			||||||
news_list = []
 | 
					
 | 
				
			||||||
news_ref_to_id = {}
 | 
					with shelve.open(DATA_FILE) as db:
 | 
				
			||||||
news_cache = {}
 | 
					    logging.info('Reading caches from disk...')
 | 
				
			||||||
 | 
					    news_list = db.get('news_list', [])
 | 
				
			||||||
 | 
					    news_ref_to_id = db.get('news_ref_to_id', {})
 | 
				
			||||||
 | 
					    news_cache = db.get('news_cache', {})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
flask_app = Flask(__name__)
 | 
					flask_app = Flask(__name__)
 | 
				
			||||||
cors = CORS(flask_app)
 | 
					cors = CORS(flask_app)
 | 
				
			||||||
@@ -38,7 +43,11 @@ def index():
 | 
				
			|||||||
def comments(id):
 | 
					def comments(id):
 | 
				
			||||||
    if id in news_cache:
 | 
					    if id in news_cache:
 | 
				
			||||||
        return {'story': news_cache[id]}
 | 
					        return {'story': news_cache[id]}
 | 
				
			||||||
    else:
 | 
					
 | 
				
			||||||
 | 
					    with shelve.open(DATA_FILE) as db:
 | 
				
			||||||
 | 
					        if id in db:
 | 
				
			||||||
 | 
					            return {'story': db[id]}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    abort(404)
 | 
					    abort(404)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
print('Starting Flask...')
 | 
					print('Starting Flask...')
 | 
				
			||||||
@@ -46,20 +55,27 @@ web_thread = threading.Thread(target=flask_app.run, kwargs={'port': 33842})
 | 
				
			|||||||
web_thread.setDaemon(True)
 | 
					web_thread.setDaemon(True)
 | 
				
			||||||
web_thread.start()
 | 
					web_thread.start()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def new_id():
 | 
					def gen_rand_id():
 | 
				
			||||||
    return ''.join(random.choice(string.ascii_uppercase) for _ in range(4))
 | 
					    return ''.join(random.choice(string.ascii_uppercase) for _ in range(4))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def new_id():
 | 
				
			||||||
 | 
					    nid = gen_rand_id()
 | 
				
			||||||
 | 
					    with shelve.open(DATA_FILE) as db:
 | 
				
			||||||
 | 
					        while nid in news_cache or nid in db:
 | 
				
			||||||
 | 
					            nid = gen_rand_id()
 | 
				
			||||||
 | 
					    return nid
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_article(url):
 | 
					def get_article(url):
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
        r = requests.post(READ_API, data=dict(url=url), timeout=10)
 | 
					        r = requests.post(READ_API, data=dict(url=url), timeout=10)
 | 
				
			||||||
 | 
					 | 
				
			||||||
        if r.status_code != 200:
 | 
					        if r.status_code != 200:
 | 
				
			||||||
            raise
 | 
					            raise
 | 
				
			||||||
 | 
					 | 
				
			||||||
        return r.text
 | 
					        return r.text
 | 
				
			||||||
    except:
 | 
					    except BaseException as e:
 | 
				
			||||||
        return '<p>Problem parsing article :(</p>'
 | 
					        logging.error('Problem getting article: {}'.format(str(e)))
 | 
				
			||||||
 | 
					        return ''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					try:
 | 
				
			||||||
    while True:
 | 
					    while True:
 | 
				
			||||||
        if news_index == 0:
 | 
					        if news_index == 0:
 | 
				
			||||||
            feed = hackernews.feed()
 | 
					            feed = hackernews.feed()
 | 
				
			||||||
@@ -75,9 +91,12 @@ while True:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
            while len(news_list) > CACHE_LENGTH:
 | 
					            while len(news_list) > CACHE_LENGTH:
 | 
				
			||||||
                old_ref = news_list.pop()
 | 
					                old_ref = news_list.pop()
 | 
				
			||||||
            del news_cache[news_ref_to_id[old_ref]]
 | 
					                old_story = news_cache.pop(news_ref_to_id[old_ref])
 | 
				
			||||||
            del news_ref_to_id[old_ref]
 | 
					                old_id = news_ref_to_id.pop(old_ref)
 | 
				
			||||||
            logging.info('Removed ref {}.'.format(old_ref))
 | 
					                logging.info('Removed ref {} id {}.'.format(old_ref, old_id))
 | 
				
			||||||
 | 
					                if old_story and old_id:
 | 
				
			||||||
 | 
					                    with shelve.open(DATA_FILE) as db:
 | 
				
			||||||
 | 
					                        db[old_id] = old_story
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if news_index < len(news_list):
 | 
					        if news_index < len(news_list):
 | 
				
			||||||
            update_ref = news_list[news_index]
 | 
					            update_ref = news_list[news_index]
 | 
				
			||||||
@@ -87,9 +106,19 @@ while True:
 | 
				
			|||||||
            if story:
 | 
					            if story:
 | 
				
			||||||
                news_story.update(story)
 | 
					                news_story.update(story)
 | 
				
			||||||
            if news_story.get('url', '') and not news_story.get('text', ''):
 | 
					            if news_story.get('url', '') and not news_story.get('text', ''):
 | 
				
			||||||
 | 
					                if not news_story['url'].endswith('.pdf'):
 | 
				
			||||||
                    news_story['text'] = get_article(news_story['url'])
 | 
					                    news_story['text'] = get_article(news_story['url'])
 | 
				
			||||||
 | 
					                else:
 | 
				
			||||||
 | 
					                    news_story['text'] = '<p>Unsupported article type.</p>'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        time.sleep(1)
 | 
					        time.sleep(1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        news_index += 1
 | 
					        news_index += 1
 | 
				
			||||||
        if news_index == CACHE_LENGTH: news_index = 0
 | 
					        if news_index == CACHE_LENGTH: news_index = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					finally:
 | 
				
			||||||
 | 
					    with shelve.open(DATA_FILE) as db:
 | 
				
			||||||
 | 
					        logging.info('Writing caches to disk...')
 | 
				
			||||||
 | 
					        db['news_list'] = news_list
 | 
				
			||||||
 | 
					        db['news_ref_to_id'] = news_ref_to_id
 | 
				
			||||||
 | 
					        db['news_cache'] = news_cache
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user