From a25457254f90933171119a2c8042e19f56da626b Mon Sep 17 00:00:00 2001 From: Tanner Collin Date: Sat, 5 Mar 2022 23:48:46 +0000 Subject: [PATCH] Improve logging, sends tweets to nitter.net --- apiserver/feed.py | 6 +++++- apiserver/feeds/hackernews.py | 6 +++++- apiserver/feeds/lobsters.py | 4 +++- apiserver/feeds/manual.py | 4 +++- apiserver/feeds/reddit.py | 5 ++++- apiserver/feeds/tildes.py | 10 ++++++++-- apiserver/server.py | 9 +++++---- 7 files changed, 33 insertions(+), 11 deletions(-) diff --git a/apiserver/feed.py b/apiserver/feed.py index 9a32e6a..9b3635d 100644 --- a/apiserver/feed.py +++ b/apiserver/feed.py @@ -34,6 +34,10 @@ def get_article(url): logging.info('Readerserver not configured, aborting.') return '' + if url.startswith('https://twitter.com'): + logging.info('Replacing twitter.com url with nitter.net') + url = url.replace('twitter.com', 'nitter.net') + try: r = requests.post(settings.READER_URL, data=dict(url=url), timeout=20) if r.status_code != 200: @@ -82,7 +86,7 @@ def update_story(story, is_manual=False): return False if story['date'] and not is_manual and story['date'] + TWO_DAYS < time.time(): - logging.info('Story too old, removing') + logging.info('Story too old, removing. Date: {}'.format(story['date'])) return False if story.get('url', '') and not story.get('text', ''): diff --git a/apiserver/feeds/hackernews.py b/apiserver/feeds/hackernews.py index 5e951b0..91acd76 100644 --- a/apiserver/feeds/hackernews.py +++ b/apiserver/feeds/hackernews.py @@ -61,11 +61,15 @@ def comment_count(i): def story(ref): r = api(API_ITEM, ref) - if not r: return False + if not r: + logging.info('Bad Hackernews API response.') + return False if 'deleted' in r: + logging.info('Story was deleted.') return False elif r.get('type', '') != 'story': + logging.info('Type "{}" is not "story".'.format(r.get('type', ''))) return False s = {} diff --git a/apiserver/feeds/lobsters.py b/apiserver/feeds/lobsters.py index 98d991f..c333e4a 100644 --- a/apiserver/feeds/lobsters.py +++ b/apiserver/feeds/lobsters.py @@ -81,7 +81,9 @@ def iter_comments(flat_comments): def story(ref): r = api(API_ITEM, ref) - if not r: return False + if not r: + logging.info('Bad Lobsters API response.') + return False s = {} try: diff --git a/apiserver/feeds/manual.py b/apiserver/feeds/manual.py index c23ed70..de9a5c2 100644 --- a/apiserver/feeds/manual.py +++ b/apiserver/feeds/manual.py @@ -27,7 +27,9 @@ def api(route): def story(ref): html = api(ref) - if not html: return False + if not html: + logging.info('Bad http GET response.') + return False soup = BeautifulSoup(html, features='html.parser') diff --git a/apiserver/feeds/reddit.py b/apiserver/feeds/reddit.py index e3c5c6d..f173b61 100644 --- a/apiserver/feeds/reddit.py +++ b/apiserver/feeds/reddit.py @@ -59,7 +59,9 @@ def comment(i): def story(ref): try: r = reddit.submission(ref) - if not r: return False + if not r: + logging.info('Bad Reddit API response.') + return False s = {} s['author'] = r.author.name if r.author else '[Deleted]' @@ -74,6 +76,7 @@ def story(ref): s['num_comments'] = r.num_comments if s['score'] < 25 and s['num_comments'] < 10: + logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments'])) return False if r.selftext: diff --git a/apiserver/feeds/tildes.py b/apiserver/feeds/tildes.py index 5c7a9e9..372f2de 100644 --- a/apiserver/feeds/tildes.py +++ b/apiserver/feeds/tildes.py @@ -71,11 +71,15 @@ def story(ref): html = api(SITE_LINK(group_lookup[ref], ref)) else: html = api(API_ITEM(ref)) - if not html: return False + if not html: + logging.info('Bad Tildes API response.') + return False soup = BeautifulSoup(html, features='html.parser') a = soup.find('article', class_='topic-full') - if a is None: return False + if a is None: + logging.info('Tildes
element not found.') + return False h = a.find('header') lu = h.find('a', class_='link-user') @@ -83,6 +87,7 @@ def story(ref): error = a.find('div', class_='text-error') if error: if 'deleted' in error.string or 'removed' in error.string: + logging.info('Article was deleted or removed.') return False s = {} @@ -103,6 +108,7 @@ def story(ref): s['num_comments'] = int(ch.h2.string.split(' ')[0]) if ch else 0 if s['score'] < 8 and s['num_comments'] < 6: + logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments'])) return False td = a.find('div', class_='topic-full-text') diff --git a/apiserver/server.py b/apiserver/server.py index 4e2116a..ae19a33 100644 --- a/apiserver/server.py +++ b/apiserver/server.py @@ -169,9 +169,10 @@ def feed_thread(): continue try: nid = new_id() + logging.info('Adding ref: {}, id: {}, source: {}'.format(ref, nid, source)) database.put_ref(ref, nid, source) - logging.info('Added ref ' + ref) except database.IntegrityError: + logging.info('Already have ID / ref, skipping.') continue ref_list = database.get_reflist(FEED_LENGTH) @@ -186,7 +187,7 @@ def feed_thread(): except AttributeError: story = dict(id=item['sid'], ref=item['ref'], source=item['source']) - logging.info('Updating story: ' + str(story['ref']) + ', index: ' + str(news_index)) + logging.info('Updating {} story: {}, index: {}'.format(story['source'], story['ref'], news_index)) valid = feed.update_story(story) if valid: @@ -209,10 +210,10 @@ def feed_thread(): logging.critical('feed_thread error: {} {}'.format(e.__class__.__name__, e)) http_server.stop() -print('Starting Feed thread...') +logging.info('Starting Feed thread...') gevent.spawn(feed_thread) -print('Starting HTTP thread...') +logging.info('Starting HTTP thread...') try: http_server.serve_forever() except KeyboardInterrupt: