Filter out False comments
This commit is contained in:
parent
20a9d9d452
commit
2ede5ed6ff
|
@ -25,12 +25,16 @@ def feed():
|
||||||
return api(API_TOPSTORIES) or []
|
return api(API_TOPSTORIES) or []
|
||||||
|
|
||||||
def comment(i):
|
def comment(i):
|
||||||
|
if 'author' not in i:
|
||||||
|
return False
|
||||||
|
|
||||||
c = {}
|
c = {}
|
||||||
c['author'] = i.get('author', '')
|
c['author'] = i.get('author', '')
|
||||||
c['score'] = i.get('points', 0)
|
c['score'] = i.get('points', 0)
|
||||||
c['date'] = i.get('created_at_i', 0)
|
c['date'] = i.get('created_at_i', 0)
|
||||||
c['text'] = i.get('text', '')
|
c['text'] = i.get('text', '')
|
||||||
c['comments'] = [comment(j) for j in i['children']]
|
c['comments'] = [comment(j) for j in i['children']]
|
||||||
|
c['comments'] = list(filter(bool, c['comments']))
|
||||||
return c
|
return c
|
||||||
|
|
||||||
def comment_count(i):
|
def comment_count(i):
|
||||||
|
@ -55,6 +59,7 @@ def story(ref):
|
||||||
s['link'] = SITE_LINK(ref)
|
s['link'] = SITE_LINK(ref)
|
||||||
s['url'] = r.get('url', '')
|
s['url'] = r.get('url', '')
|
||||||
s['comments'] = [comment(i) for i in r['children']]
|
s['comments'] = [comment(i) for i in r['children']]
|
||||||
|
s['comments'] = list(filter(bool, s['comments']))
|
||||||
s['num_comments'] = comment_count(s) - 1
|
s['num_comments'] = comment_count(s) - 1
|
||||||
|
|
||||||
if 'text' in r and r['text']:
|
if 'text' in r and r['text']:
|
||||||
|
|
|
@ -22,22 +22,21 @@ reddit = praw.Reddit('bot')
|
||||||
def feed():
|
def feed():
|
||||||
return [x.id for x in reddit.subreddit(SUBREDDITS).hot()]
|
return [x.id for x in reddit.subreddit(SUBREDDITS).hot()]
|
||||||
|
|
||||||
def good_comment(c):
|
|
||||||
if isinstance(c, MoreComments):
|
|
||||||
return False
|
|
||||||
if c.body == '[removed]':
|
|
||||||
return False
|
|
||||||
if c.author and c.author.name == 'AutoModerator':
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
def comment(i):
|
def comment(i):
|
||||||
|
if isinstance(i, MoreComments):
|
||||||
|
return False
|
||||||
|
if '[removed]' in i.body or '[deleted]' in i.body:
|
||||||
|
return False
|
||||||
|
if i.author and i.author.name == 'AutoModerator':
|
||||||
|
return False
|
||||||
|
|
||||||
c = {}
|
c = {}
|
||||||
c['author'] = i.author.name if i.author else '[Deleted]'
|
c['author'] = i.author.name if i.author else '[Deleted]'
|
||||||
c['score'] = i.score
|
c['score'] = i.score
|
||||||
c['date'] = i.created_utc
|
c['date'] = i.created_utc
|
||||||
c['text'] = render_md(i.body)
|
c['text'] = render_md(i.body)
|
||||||
c['comments'] = [comment(j) for j in i.replies if good_comment(j)]
|
c['comments'] = [comment(j) for j in i.replies]
|
||||||
|
c['comments'] = list(filter(bool, c['comments']))
|
||||||
return c
|
return c
|
||||||
|
|
||||||
def story(ref):
|
def story(ref):
|
||||||
|
@ -52,7 +51,8 @@ def story(ref):
|
||||||
s['title'] = r.title
|
s['title'] = r.title
|
||||||
s['link'] = SITE_LINK(r.permalink)
|
s['link'] = SITE_LINK(r.permalink)
|
||||||
s['url'] = r.url
|
s['url'] = r.url
|
||||||
s['comments'] = [comment(i) for i in r.comments if good_comment(i)]
|
s['comments'] = [comment(i) for i in r.comments]
|
||||||
|
s['comments'] = list(filter(bool, s['comments']))
|
||||||
s['num_comments'] = r.num_comments
|
s['num_comments'] = r.num_comments
|
||||||
|
|
||||||
if r.selftext:
|
if r.selftext:
|
||||||
|
|
|
@ -41,6 +41,10 @@ def unix(date_str):
|
||||||
|
|
||||||
def comment(i):
|
def comment(i):
|
||||||
i = i.article
|
i = i.article
|
||||||
|
|
||||||
|
if i.find('div', class_='is-comment-removed'):
|
||||||
|
return False
|
||||||
|
|
||||||
c = {}
|
c = {}
|
||||||
c['author'] = str(i.find('a', class_='link-user').string)
|
c['author'] = str(i.find('a', class_='link-user').string)
|
||||||
c['score'] = 1
|
c['score'] = 1
|
||||||
|
@ -48,6 +52,7 @@ def comment(i):
|
||||||
c['text'] = i.find('div', class_='comment-text').encode_contents().decode()
|
c['text'] = i.find('div', class_='comment-text').encode_contents().decode()
|
||||||
ct = i.find('ol', class_='comment-tree')
|
ct = i.find('ol', class_='comment-tree')
|
||||||
c['comments'] = [comment(j) for j in ct.findAll('li', recursive=False)] if ct else []
|
c['comments'] = [comment(j) for j in ct.findAll('li', recursive=False)] if ct else []
|
||||||
|
c['comments'] = list(filter(bool, c['comments']))
|
||||||
return c
|
return c
|
||||||
|
|
||||||
def story(ref):
|
def story(ref):
|
||||||
|
@ -76,6 +81,7 @@ def story(ref):
|
||||||
s['url'] = ud.a['href'] if ud else s['link']
|
s['url'] = ud.a['href'] if ud else s['link']
|
||||||
sc = a.find('ol', id='comments')
|
sc = a.find('ol', id='comments')
|
||||||
s['comments'] = [comment(i) for i in sc.findAll('li', recursive=False)]
|
s['comments'] = [comment(i) for i in sc.findAll('li', recursive=False)]
|
||||||
|
s['comments'] = list(filter(bool, s['comments']))
|
||||||
ch = a.find('header', class_='topic-comments-header')
|
ch = a.find('header', class_='topic-comments-header')
|
||||||
s['num_comments'] = int(ch.h2.string.split(' ')[0]) if ch else 0
|
s['num_comments'] = int(ch.h2.string.split(' ')[0]) if ch else 0
|
||||||
|
|
||||||
|
@ -87,17 +93,19 @@ def story(ref):
|
||||||
|
|
||||||
# scratchpad so I can quickly develop the parser
|
# scratchpad so I can quickly develop the parser
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print(feed())
|
#print(feed())
|
||||||
normal = story('gxt')
|
#normal = story('gxt')
|
||||||
print(normal)
|
#print(normal)
|
||||||
no_comments = story('gxr')
|
#no_comments = story('gxr')
|
||||||
print(no_comments)
|
#print(no_comments)
|
||||||
self_post = story('gsb')
|
#self_post = story('gsb')
|
||||||
print(self_post)
|
#print(self_post)
|
||||||
li_comment = story('gqx')
|
#li_comment = story('gqx')
|
||||||
print(li_comment)
|
#print(li_comment)
|
||||||
|
broken = story('h23')
|
||||||
|
print(broken)
|
||||||
|
|
||||||
# make sure there's no self-reference
|
# make sure there's no self-reference
|
||||||
import copy
|
#import copy
|
||||||
for x in [normal, no_comments, self_post, li_comment]:
|
#for x in [normal, no_comments, self_post, li_comment]:
|
||||||
_ = copy.deepcopy(x)
|
# _ = copy.deepcopy(x)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user