From db5097ac57fb394da7f17c64af72276a9ac32599 Mon Sep 17 00:00:00 2001 From: Tanner Collin Date: Fri, 8 Nov 2019 21:50:33 +0000 Subject: [PATCH] Drop articles more than two days old --- apiserver/feed.py | 5 +++++ apiserver/server.py | 4 +++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/apiserver/feed.py b/apiserver/feed.py index 0cb8e42..34c7c18 100644 --- a/apiserver/feed.py +++ b/apiserver/feed.py @@ -16,6 +16,7 @@ READ_API = 'http://127.0.0.1:33843' ARCHIVE_FIRST = ['bloomberg.com', 'wsj.com'] INVALID_FILES = ['.pdf', '.png', '.jpg', '.gif'] INVALID_DOMAINS = ['youtube.com'] +TWO_DAYS = 60*60*24*2 def list(): feed = [] @@ -108,6 +109,10 @@ def update_story(story): logging.info('Article not ready yet') return False + if story['date'] and story['date'] + TWO_DAYS < time.time(): + logging.info('Article too old, removing') + return False + if story.get('url', '') and not story.get('text', ''): if any([story['url'].endswith(ext) for ext in INVALID_FILES]): logging.info('URL invalid file type') diff --git a/apiserver/server.py b/apiserver/server.py index 0d7b254..10e7a99 100644 --- a/apiserver/server.py +++ b/apiserver/server.py @@ -75,7 +75,7 @@ cors = CORS(flask_app) def api(): front_page = [news_cache[news_ref_to_id[ref]] for ref in news_list] front_page = [copy.copy(x) for x in front_page if 'title' in x and x['title']] - front_page = front_page[:100] + front_page = front_page[:60] for story in front_page: story.pop('text', None) story.pop('comments', None) @@ -179,6 +179,8 @@ def feed_thread(): archive.update(news_story) else: remove_ref(update_ref) + else: + logging.info('Skipping update - no story #' + str(news_index+1)) gevent.sleep(3)