Fix tildes comments parsing bug
This commit is contained in:
		| @@ -29,6 +29,8 @@ def get_article(url): | ||||
| def update_story(story): | ||||
|     res = {} | ||||
|  | ||||
|     logging.info('Updating story ' + str(story['ref'])) | ||||
|  | ||||
|     if story['source'] == 'hackernews': | ||||
|         res = hackernews.story(story['ref']) | ||||
|     elif story['source'] == 'reddit': | ||||
| @@ -42,6 +44,7 @@ def update_story(story): | ||||
|         story.update(res) | ||||
|     if story.get('url', '') and not story.get('text', ''): | ||||
|         if not story['url'].endswith('.pdf'): | ||||
|             logging.info('Getting article ' + story['url']) | ||||
|             story['text'] = get_article(story['url']) | ||||
|         else: | ||||
|             story['text'] = '<p>Unsupported article type.</p>' | ||||
|   | ||||
| @@ -41,7 +41,8 @@ def comment(i): | ||||
|     c['score'] = 1 | ||||
|     c['date'] = unix(i.find('time')['datetime']) | ||||
|     c['text'] = i.find('div', class_='comment-text').encode_contents().decode() | ||||
|     c['comments'] = [comment(j) for j in i.find('ol', class_='comment-tree').findAll('li', recursive=False)] if i.ol else [] | ||||
|     ct = i.find('ol', class_='comment-tree') | ||||
|     c['comments'] = [comment(j) for j in ct.findAll('li', recursive=False)] if ct else [] | ||||
|     return c | ||||
|  | ||||
| def story(ref): | ||||
| @@ -61,7 +62,8 @@ def story(ref): | ||||
|     s['link'] = SITE_LINK(ref) | ||||
|     ud = a.find('div', class_='topic-full-link') | ||||
|     s['url'] = ud.a['href'] if ud else s['link'] | ||||
|     s['comments'] = [comment(i) for i in a.find('ol', id='comments').findAll('li', recursive=False)] | ||||
|     sc = a.find('ol', id='comments') | ||||
|     s['comments'] = [comment(i) for i in sc.findAll('li', recursive=False)] | ||||
|     ch = a.find('header', class_='topic-comments-header') | ||||
|     s['num_comments'] = int(ch.h2.string.split(' ')[0]) if ch else 0 | ||||
|  | ||||
| @@ -79,8 +81,10 @@ if __name__ == '__main__': | ||||
|     print(no_comments) | ||||
|     self_post = story('gsb') | ||||
|     print(self_post) | ||||
|     li_comment = story('gqx') | ||||
|     print(li_comment) | ||||
|  | ||||
|     # make sure there's no self-reference | ||||
|     import copy | ||||
|     for x in [normal, no_comments, self_post]: | ||||
|     for x in [normal, no_comments, self_post, li_comment]: | ||||
|         _ = copy.deepcopy(x) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user