Fix tildes comments parsing bug
This commit is contained in:
parent
2b1a352917
commit
cf9e197e6c
|
@ -29,6 +29,8 @@ def get_article(url):
|
|||
def update_story(story):
|
||||
res = {}
|
||||
|
||||
logging.info('Updating story ' + str(story['ref']))
|
||||
|
||||
if story['source'] == 'hackernews':
|
||||
res = hackernews.story(story['ref'])
|
||||
elif story['source'] == 'reddit':
|
||||
|
@ -42,6 +44,7 @@ def update_story(story):
|
|||
story.update(res)
|
||||
if story.get('url', '') and not story.get('text', ''):
|
||||
if not story['url'].endswith('.pdf'):
|
||||
logging.info('Getting article ' + story['url'])
|
||||
story['text'] = get_article(story['url'])
|
||||
else:
|
||||
story['text'] = '<p>Unsupported article type.</p>'
|
||||
|
|
|
@ -41,7 +41,8 @@ def comment(i):
|
|||
c['score'] = 1
|
||||
c['date'] = unix(i.find('time')['datetime'])
|
||||
c['text'] = i.find('div', class_='comment-text').encode_contents().decode()
|
||||
c['comments'] = [comment(j) for j in i.find('ol', class_='comment-tree').findAll('li', recursive=False)] if i.ol else []
|
||||
ct = i.find('ol', class_='comment-tree')
|
||||
c['comments'] = [comment(j) for j in ct.findAll('li', recursive=False)] if ct else []
|
||||
return c
|
||||
|
||||
def story(ref):
|
||||
|
@ -61,7 +62,8 @@ def story(ref):
|
|||
s['link'] = SITE_LINK(ref)
|
||||
ud = a.find('div', class_='topic-full-link')
|
||||
s['url'] = ud.a['href'] if ud else s['link']
|
||||
s['comments'] = [comment(i) for i in a.find('ol', id='comments').findAll('li', recursive=False)]
|
||||
sc = a.find('ol', id='comments')
|
||||
s['comments'] = [comment(i) for i in sc.findAll('li', recursive=False)]
|
||||
ch = a.find('header', class_='topic-comments-header')
|
||||
s['num_comments'] = int(ch.h2.string.split(' ')[0]) if ch else 0
|
||||
|
||||
|
@ -79,8 +81,10 @@ if __name__ == '__main__':
|
|||
print(no_comments)
|
||||
self_post = story('gsb')
|
||||
print(self_post)
|
||||
li_comment = story('gqx')
|
||||
print(li_comment)
|
||||
|
||||
# make sure there's no self-reference
|
||||
import copy
|
||||
for x in [normal, no_comments, self_post]:
|
||||
for x in [normal, no_comments, self_post, li_comment]:
|
||||
_ = copy.deepcopy(x)
|
||||
|
|
Loading…
Reference in New Issue
Block a user