Filter out False comments
This commit is contained in:
parent
20a9d9d452
commit
2ede5ed6ff
|
@ -25,12 +25,16 @@ def feed():
|
|||
return api(API_TOPSTORIES) or []
|
||||
|
||||
def comment(i):
|
||||
if 'author' not in i:
|
||||
return False
|
||||
|
||||
c = {}
|
||||
c['author'] = i.get('author', '')
|
||||
c['score'] = i.get('points', 0)
|
||||
c['date'] = i.get('created_at_i', 0)
|
||||
c['text'] = i.get('text', '')
|
||||
c['comments'] = [comment(j) for j in i['children']]
|
||||
c['comments'] = list(filter(bool, c['comments']))
|
||||
return c
|
||||
|
||||
def comment_count(i):
|
||||
|
@ -55,6 +59,7 @@ def story(ref):
|
|||
s['link'] = SITE_LINK(ref)
|
||||
s['url'] = r.get('url', '')
|
||||
s['comments'] = [comment(i) for i in r['children']]
|
||||
s['comments'] = list(filter(bool, s['comments']))
|
||||
s['num_comments'] = comment_count(s) - 1
|
||||
|
||||
if 'text' in r and r['text']:
|
||||
|
|
|
@ -22,22 +22,21 @@ reddit = praw.Reddit('bot')
|
|||
def feed():
|
||||
return [x.id for x in reddit.subreddit(SUBREDDITS).hot()]
|
||||
|
||||
def good_comment(c):
|
||||
if isinstance(c, MoreComments):
|
||||
return False
|
||||
if c.body == '[removed]':
|
||||
return False
|
||||
if c.author and c.author.name == 'AutoModerator':
|
||||
return False
|
||||
return True
|
||||
|
||||
def comment(i):
|
||||
if isinstance(i, MoreComments):
|
||||
return False
|
||||
if '[removed]' in i.body or '[deleted]' in i.body:
|
||||
return False
|
||||
if i.author and i.author.name == 'AutoModerator':
|
||||
return False
|
||||
|
||||
c = {}
|
||||
c['author'] = i.author.name if i.author else '[Deleted]'
|
||||
c['score'] = i.score
|
||||
c['date'] = i.created_utc
|
||||
c['text'] = render_md(i.body)
|
||||
c['comments'] = [comment(j) for j in i.replies if good_comment(j)]
|
||||
c['comments'] = [comment(j) for j in i.replies]
|
||||
c['comments'] = list(filter(bool, c['comments']))
|
||||
return c
|
||||
|
||||
def story(ref):
|
||||
|
@ -52,7 +51,8 @@ def story(ref):
|
|||
s['title'] = r.title
|
||||
s['link'] = SITE_LINK(r.permalink)
|
||||
s['url'] = r.url
|
||||
s['comments'] = [comment(i) for i in r.comments if good_comment(i)]
|
||||
s['comments'] = [comment(i) for i in r.comments]
|
||||
s['comments'] = list(filter(bool, s['comments']))
|
||||
s['num_comments'] = r.num_comments
|
||||
|
||||
if r.selftext:
|
||||
|
|
|
@ -41,6 +41,10 @@ def unix(date_str):
|
|||
|
||||
def comment(i):
|
||||
i = i.article
|
||||
|
||||
if i.find('div', class_='is-comment-removed'):
|
||||
return False
|
||||
|
||||
c = {}
|
||||
c['author'] = str(i.find('a', class_='link-user').string)
|
||||
c['score'] = 1
|
||||
|
@ -48,6 +52,7 @@ def comment(i):
|
|||
c['text'] = i.find('div', class_='comment-text').encode_contents().decode()
|
||||
ct = i.find('ol', class_='comment-tree')
|
||||
c['comments'] = [comment(j) for j in ct.findAll('li', recursive=False)] if ct else []
|
||||
c['comments'] = list(filter(bool, c['comments']))
|
||||
return c
|
||||
|
||||
def story(ref):
|
||||
|
@ -76,6 +81,7 @@ def story(ref):
|
|||
s['url'] = ud.a['href'] if ud else s['link']
|
||||
sc = a.find('ol', id='comments')
|
||||
s['comments'] = [comment(i) for i in sc.findAll('li', recursive=False)]
|
||||
s['comments'] = list(filter(bool, s['comments']))
|
||||
ch = a.find('header', class_='topic-comments-header')
|
||||
s['num_comments'] = int(ch.h2.string.split(' ')[0]) if ch else 0
|
||||
|
||||
|
@ -87,17 +93,19 @@ def story(ref):
|
|||
|
||||
# scratchpad so I can quickly develop the parser
|
||||
if __name__ == '__main__':
|
||||
print(feed())
|
||||
normal = story('gxt')
|
||||
print(normal)
|
||||
no_comments = story('gxr')
|
||||
print(no_comments)
|
||||
self_post = story('gsb')
|
||||
print(self_post)
|
||||
li_comment = story('gqx')
|
||||
print(li_comment)
|
||||
#print(feed())
|
||||
#normal = story('gxt')
|
||||
#print(normal)
|
||||
#no_comments = story('gxr')
|
||||
#print(no_comments)
|
||||
#self_post = story('gsb')
|
||||
#print(self_post)
|
||||
#li_comment = story('gqx')
|
||||
#print(li_comment)
|
||||
broken = story('h23')
|
||||
print(broken)
|
||||
|
||||
# make sure there's no self-reference
|
||||
import copy
|
||||
for x in [normal, no_comments, self_post, li_comment]:
|
||||
_ = copy.deepcopy(x)
|
||||
#import copy
|
||||
#for x in [normal, no_comments, self_post, li_comment]:
|
||||
# _ = copy.deepcopy(x)
|
||||
|
|
Loading…
Reference in New Issue
Block a user