From cf9e197e6c87221d74a450dcd1fbf074441470c9 Mon Sep 17 00:00:00 2001 From: Tanner Collin Date: Sun, 25 Aug 2019 07:46:22 +0000 Subject: [PATCH] Fix tildes comments parsing bug --- apiserver/feed.py | 3 +++ apiserver/feeds/tildes.py | 10 +++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/apiserver/feed.py b/apiserver/feed.py index 1014147..8dbce5a 100644 --- a/apiserver/feed.py +++ b/apiserver/feed.py @@ -29,6 +29,8 @@ def get_article(url): def update_story(story): res = {} + logging.info('Updating story ' + str(story['ref'])) + if story['source'] == 'hackernews': res = hackernews.story(story['ref']) elif story['source'] == 'reddit': @@ -42,6 +44,7 @@ def update_story(story): story.update(res) if story.get('url', '') and not story.get('text', ''): if not story['url'].endswith('.pdf'): + logging.info('Getting article ' + story['url']) story['text'] = get_article(story['url']) else: story['text'] = '

Unsupported article type.

' diff --git a/apiserver/feeds/tildes.py b/apiserver/feeds/tildes.py index 20c3ac9..c0c8b51 100644 --- a/apiserver/feeds/tildes.py +++ b/apiserver/feeds/tildes.py @@ -41,7 +41,8 @@ def comment(i): c['score'] = 1 c['date'] = unix(i.find('time')['datetime']) c['text'] = i.find('div', class_='comment-text').encode_contents().decode() - c['comments'] = [comment(j) for j in i.find('ol', class_='comment-tree').findAll('li', recursive=False)] if i.ol else [] + ct = i.find('ol', class_='comment-tree') + c['comments'] = [comment(j) for j in ct.findAll('li', recursive=False)] if ct else [] return c def story(ref): @@ -61,7 +62,8 @@ def story(ref): s['link'] = SITE_LINK(ref) ud = a.find('div', class_='topic-full-link') s['url'] = ud.a['href'] if ud else s['link'] - s['comments'] = [comment(i) for i in a.find('ol', id='comments').findAll('li', recursive=False)] + sc = a.find('ol', id='comments') + s['comments'] = [comment(i) for i in sc.findAll('li', recursive=False)] ch = a.find('header', class_='topic-comments-header') s['num_comments'] = int(ch.h2.string.split(' ')[0]) if ch else 0 @@ -79,8 +81,10 @@ if __name__ == '__main__': print(no_comments) self_post = story('gsb') print(self_post) + li_comment = story('gqx') + print(li_comment) # make sure there's no self-reference import copy - for x in [normal, no_comments, self_post]: + for x in [normal, no_comments, self_post, li_comment]: _ = copy.deepcopy(x)