diff --git a/apiserver/feeds/hackernews.py b/apiserver/feeds/hackernews.py index e4efb11..0ecdadb 100644 --- a/apiserver/feeds/hackernews.py +++ b/apiserver/feeds/hackernews.py @@ -12,7 +12,8 @@ import requests from utils import clean API_TOPSTORIES = lambda x: 'https://hacker-news.firebaseio.com/v0/topstories.json' -API_ITEM = lambda x : 'https://hn.algolia.com/api/v1/items/{}'.format(x) +ALG_API_ITEM = lambda x : 'https://hn.algolia.com/api/v1/items/{}'.format(x) +BHN_API_ITEM = lambda x : 'https://api.hnpwa.com/v0/item/{}.json'.format(x) SITE_LINK = lambda x : 'https://news.ycombinator.com/item?id={}'.format(x) SITE_AUTHOR_LINK = lambda x : 'https://news.ycombinator.com/user?id={}'.format(x) @@ -42,7 +43,7 @@ def api(route, ref=None): def feed(): return [str(x) for x in api(API_TOPSTORIES) or []] -def comment(i): +def alg_comment(i): if 'author' not in i: return False @@ -51,19 +52,19 @@ def comment(i): c['score'] = i.get('points', 0) c['date'] = i.get('created_at_i', 0) c['text'] = clean(i.get('text', '') or '') - c['comments'] = [comment(j) for j in i['children']] + c['comments'] = [alg_comment(j) for j in i['children']] c['comments'] = list(filter(bool, c['comments'])) return c -def comment_count(i): +def alg_comment_count(i): alive = 1 if i['author'] else 0 - return sum([comment_count(c) for c in i['comments']]) + alive + return sum([alg_comment_count(c) for c in i['comments']]) + alive -def story(ref): - r = api(API_ITEM, ref) +def alg_story(ref): + r = api(ALG_API_ITEM, ref) if not r: - logging.info('Bad Hackernews API response.') - return False + logging.info('Bad Algolia Hackernews API response.') + return None if 'deleted' in r: logging.info('Story was deleted.') @@ -80,17 +81,72 @@ def story(ref): s['title'] = r.get('title', '') s['link'] = SITE_LINK(ref) s['url'] = r.get('url', '') - s['comments'] = [comment(i) for i in r['children']] + s['comments'] = [alg_comment(i) for i in r['children']] s['comments'] = list(filter(bool, s['comments'])) - s['num_comments'] = comment_count(s) - 1 + s['num_comments'] = alg_comment_count(s) - 1 + + if 'text' in r and r['text']: + s['text'] = clean(r['text'] or '') + + return s + +def bhn_comment(i): + if 'user' not in i: + return False + + c = {} + c['author'] = i.get('user', '') + c['score'] = 0 # Not present? + c['date'] = i.get('time', 0) + c['text'] = clean(i.get('content', '') or '') + c['comments'] = [bhn_comment(j) for j in i['comments']] + c['comments'] = list(filter(bool, c['comments'])) + return c + +def bhn_story(ref): + r = api(BHN_API_ITEM, ref) + if not r: + logging.info('Bad BetterHN Hackernews API response.') + return None + + if 'deleted' in r: # TODO: verify + logging.info('Story was deleted.') + return False + elif r.get('type', '') != 'link': + logging.info('Type "{}" is not "link".'.format(r.get('type', ''))) + return False + + s = {} + s['author'] = r.get('user', '') + s['author_link'] = SITE_AUTHOR_LINK(r.get('user', '')) + s['score'] = r.get('points', 0) + s['date'] = r.get('time', 0) + s['title'] = r.get('title', '') + s['link'] = SITE_LINK(ref) + s['url'] = r.get('url', '') + if s['url'].startswith('item'): + s['url'] = SITE_LINK(ref) + s['comments'] = [bhn_comment(i) for i in r['comments']] + s['comments'] = list(filter(bool, s['comments'])) + s['num_comments'] = r.get('comments_count', 0) + + if 'content' in r and r['content']: + s['text'] = clean(r['text'] or '') + + return s + +def story(ref): + s = alg_story(ref) + if s is None: + s = bhn_story(ref) + if not s: + return False + if s['score'] < 25 and s['num_comments'] < 10: logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments'])) return False - if 'text' in r and r['text']: - s['text'] = clean(r['text'] or '') - return s # scratchpad so I can quickly develop the parser @@ -98,3 +154,7 @@ if __name__ == '__main__': print(feed()) #print(story(20763961)) #print(story(20802050)) + + #print(story(42899834)) # type "job" + #print(story(42900076)) # Ask HN + print(story(42899703)) # normal