Fix tildes comments parsing bug

2019-08-25 07:46:22 +00:00
parent 2b1a352917
commit cf9e197e6c
2 changed files with 10 additions and 3 deletions
--- a/apiserver/feed.py
+++ b/apiserver/feed.py
@@ -29,6 +29,8 @@ def get_article(url):
 def update_story(story):
    res = {}

+    logging.info('Updating story ' + str(story['ref']))
+
    if story['source'] == 'hackernews':
        res = hackernews.story(story['ref'])
    elif story['source'] == 'reddit':
@@ -42,6 +44,7 @@ def update_story(story):
        story.update(res)
    if story.get('url', '') and not story.get('text', ''):
        if not story['url'].endswith('.pdf'):
+            logging.info('Getting article ' + story['url'])
            story['text'] = get_article(story['url'])
        else:
            story['text'] = '<p>Unsupported article type.</p>'
--- a/apiserver/feeds/tildes.py
+++ b/apiserver/feeds/tildes.py
@@ -41,7 +41,8 @@ def comment(i):
    c['score'] = 1
    c['date'] = unix(i.find('time')['datetime'])
    c['text'] = i.find('div', class_='comment-text').encode_contents().decode()
-    c['comments'] = [comment(j) for j in i.find('ol', class_='comment-tree').findAll('li', recursive=False)] if i.ol else []
+    ct = i.find('ol', class_='comment-tree')
+    c['comments'] = [comment(j) for j in ct.findAll('li', recursive=False)] if ct else []
    return c

 def story(ref):
@@ -61,7 +62,8 @@ def story(ref):
    s['link'] = SITE_LINK(ref)
    ud = a.find('div', class_='topic-full-link')
    s['url'] = ud.a['href'] if ud else s['link']
-    s['comments'] = [comment(i) for i in a.find('ol', id='comments').findAll('li', recursive=False)]
+    sc = a.find('ol', id='comments')
+    s['comments'] = [comment(i) for i in sc.findAll('li', recursive=False)]
    ch = a.find('header', class_='topic-comments-header')
    s['num_comments'] = int(ch.h2.string.split(' ')[0]) if ch else 0

@@ -79,8 +81,10 @@ if __name__ == '__main__':
    print(no_comments)
    self_post = story('gsb')
    print(self_post)
+    li_comment = story('gqx')
+    print(li_comment)

    # make sure there's no self-reference
    import copy
-    for x in [normal, no_comments, self_post]:
+    for x in [normal, no_comments, self_post, li_comment]:
        _ = copy.deepcopy(x)