Filter out False comments

This commit is contained in:
Tanner Collin 2019-08-30 06:23:14 +00:00
parent 20a9d9d452
commit 2ede5ed6ff
3 changed files with 36 additions and 23 deletions

View File

@ -25,12 +25,16 @@ def feed():
return api(API_TOPSTORIES) or [] return api(API_TOPSTORIES) or []
def comment(i): def comment(i):
if 'author' not in i:
return False
c = {} c = {}
c['author'] = i.get('author', '') c['author'] = i.get('author', '')
c['score'] = i.get('points', 0) c['score'] = i.get('points', 0)
c['date'] = i.get('created_at_i', 0) c['date'] = i.get('created_at_i', 0)
c['text'] = i.get('text', '') c['text'] = i.get('text', '')
c['comments'] = [comment(j) for j in i['children']] c['comments'] = [comment(j) for j in i['children']]
c['comments'] = list(filter(bool, c['comments']))
return c return c
def comment_count(i): def comment_count(i):
@ -55,6 +59,7 @@ def story(ref):
s['link'] = SITE_LINK(ref) s['link'] = SITE_LINK(ref)
s['url'] = r.get('url', '') s['url'] = r.get('url', '')
s['comments'] = [comment(i) for i in r['children']] s['comments'] = [comment(i) for i in r['children']]
s['comments'] = list(filter(bool, s['comments']))
s['num_comments'] = comment_count(s) - 1 s['num_comments'] = comment_count(s) - 1
if 'text' in r and r['text']: if 'text' in r and r['text']:

View File

@ -22,22 +22,21 @@ reddit = praw.Reddit('bot')
def feed(): def feed():
return [x.id for x in reddit.subreddit(SUBREDDITS).hot()] return [x.id for x in reddit.subreddit(SUBREDDITS).hot()]
def good_comment(c):
if isinstance(c, MoreComments):
return False
if c.body == '[removed]':
return False
if c.author and c.author.name == 'AutoModerator':
return False
return True
def comment(i): def comment(i):
if isinstance(i, MoreComments):
return False
if '[removed]' in i.body or '[deleted]' in i.body:
return False
if i.author and i.author.name == 'AutoModerator':
return False
c = {} c = {}
c['author'] = i.author.name if i.author else '[Deleted]' c['author'] = i.author.name if i.author else '[Deleted]'
c['score'] = i.score c['score'] = i.score
c['date'] = i.created_utc c['date'] = i.created_utc
c['text'] = render_md(i.body) c['text'] = render_md(i.body)
c['comments'] = [comment(j) for j in i.replies if good_comment(j)] c['comments'] = [comment(j) for j in i.replies]
c['comments'] = list(filter(bool, c['comments']))
return c return c
def story(ref): def story(ref):
@ -52,7 +51,8 @@ def story(ref):
s['title'] = r.title s['title'] = r.title
s['link'] = SITE_LINK(r.permalink) s['link'] = SITE_LINK(r.permalink)
s['url'] = r.url s['url'] = r.url
s['comments'] = [comment(i) for i in r.comments if good_comment(i)] s['comments'] = [comment(i) for i in r.comments]
s['comments'] = list(filter(bool, s['comments']))
s['num_comments'] = r.num_comments s['num_comments'] = r.num_comments
if r.selftext: if r.selftext:

View File

@ -41,6 +41,10 @@ def unix(date_str):
def comment(i): def comment(i):
i = i.article i = i.article
if i.find('div', class_='is-comment-removed'):
return False
c = {} c = {}
c['author'] = str(i.find('a', class_='link-user').string) c['author'] = str(i.find('a', class_='link-user').string)
c['score'] = 1 c['score'] = 1
@ -48,6 +52,7 @@ def comment(i):
c['text'] = i.find('div', class_='comment-text').encode_contents().decode() c['text'] = i.find('div', class_='comment-text').encode_contents().decode()
ct = i.find('ol', class_='comment-tree') ct = i.find('ol', class_='comment-tree')
c['comments'] = [comment(j) for j in ct.findAll('li', recursive=False)] if ct else [] c['comments'] = [comment(j) for j in ct.findAll('li', recursive=False)] if ct else []
c['comments'] = list(filter(bool, c['comments']))
return c return c
def story(ref): def story(ref):
@ -76,6 +81,7 @@ def story(ref):
s['url'] = ud.a['href'] if ud else s['link'] s['url'] = ud.a['href'] if ud else s['link']
sc = a.find('ol', id='comments') sc = a.find('ol', id='comments')
s['comments'] = [comment(i) for i in sc.findAll('li', recursive=False)] s['comments'] = [comment(i) for i in sc.findAll('li', recursive=False)]
s['comments'] = list(filter(bool, s['comments']))
ch = a.find('header', class_='topic-comments-header') ch = a.find('header', class_='topic-comments-header')
s['num_comments'] = int(ch.h2.string.split(' ')[0]) if ch else 0 s['num_comments'] = int(ch.h2.string.split(' ')[0]) if ch else 0
@ -87,17 +93,19 @@ def story(ref):
# scratchpad so I can quickly develop the parser # scratchpad so I can quickly develop the parser
if __name__ == '__main__': if __name__ == '__main__':
print(feed()) #print(feed())
normal = story('gxt') #normal = story('gxt')
print(normal) #print(normal)
no_comments = story('gxr') #no_comments = story('gxr')
print(no_comments) #print(no_comments)
self_post = story('gsb') #self_post = story('gsb')
print(self_post) #print(self_post)
li_comment = story('gqx') #li_comment = story('gqx')
print(li_comment) #print(li_comment)
broken = story('h23')
print(broken)
# make sure there's no self-reference # make sure there's no self-reference
import copy #import copy
for x in [normal, no_comments, self_post, li_comment]: #for x in [normal, no_comments, self_post, li_comment]:
_ = copy.deepcopy(x) # _ = copy.deepcopy(x)