forked from tanner/qotnews
Filter out False comments
This commit is contained in:
@@ -41,6 +41,10 @@ def unix(date_str):
|
||||
|
||||
def comment(i):
|
||||
i = i.article
|
||||
|
||||
if i.find('div', class_='is-comment-removed'):
|
||||
return False
|
||||
|
||||
c = {}
|
||||
c['author'] = str(i.find('a', class_='link-user').string)
|
||||
c['score'] = 1
|
||||
@@ -48,6 +52,7 @@ def comment(i):
|
||||
c['text'] = i.find('div', class_='comment-text').encode_contents().decode()
|
||||
ct = i.find('ol', class_='comment-tree')
|
||||
c['comments'] = [comment(j) for j in ct.findAll('li', recursive=False)] if ct else []
|
||||
c['comments'] = list(filter(bool, c['comments']))
|
||||
return c
|
||||
|
||||
def story(ref):
|
||||
@@ -76,6 +81,7 @@ def story(ref):
|
||||
s['url'] = ud.a['href'] if ud else s['link']
|
||||
sc = a.find('ol', id='comments')
|
||||
s['comments'] = [comment(i) for i in sc.findAll('li', recursive=False)]
|
||||
s['comments'] = list(filter(bool, s['comments']))
|
||||
ch = a.find('header', class_='topic-comments-header')
|
||||
s['num_comments'] = int(ch.h2.string.split(' ')[0]) if ch else 0
|
||||
|
||||
@@ -87,17 +93,19 @@ def story(ref):
|
||||
|
||||
# scratchpad so I can quickly develop the parser
|
||||
if __name__ == '__main__':
|
||||
print(feed())
|
||||
normal = story('gxt')
|
||||
print(normal)
|
||||
no_comments = story('gxr')
|
||||
print(no_comments)
|
||||
self_post = story('gsb')
|
||||
print(self_post)
|
||||
li_comment = story('gqx')
|
||||
print(li_comment)
|
||||
#print(feed())
|
||||
#normal = story('gxt')
|
||||
#print(normal)
|
||||
#no_comments = story('gxr')
|
||||
#print(no_comments)
|
||||
#self_post = story('gsb')
|
||||
#print(self_post)
|
||||
#li_comment = story('gqx')
|
||||
#print(li_comment)
|
||||
broken = story('h23')
|
||||
print(broken)
|
||||
|
||||
# make sure there's no self-reference
|
||||
import copy
|
||||
for x in [normal, no_comments, self_post, li_comment]:
|
||||
_ = copy.deepcopy(x)
|
||||
#import copy
|
||||
#for x in [normal, no_comments, self_post, li_comment]:
|
||||
# _ = copy.deepcopy(x)
|
||||
|
Reference in New Issue
Block a user