Filter out False comments

2019-08-30 06:23:14 +00:00
parent 20a9d9d452
commit 2ede5ed6ff
3 changed files with 36 additions and 23 deletions
@@ -25,12 +25,16 @@ def feed():
    return api(API_TOPSTORIES) or []
 def comment(i):
    if 'author' not in i:
        return False
    c = {}
    c['author'] = i.get('author', '')
    c['score'] = i.get('points', 0)
    c['date'] = i.get('created_at_i', 0)
    c['text'] = i.get('text', '')
    c['comments'] = [comment(j) for j in i['children']]
    c['comments'] = list(filter(bool, c['comments']))
    return c
 def comment_count(i):
@@ -55,6 +59,7 @@ def story(ref):
    s['link'] = SITE_LINK(ref)
    s['url'] = r.get('url', '')
    s['comments'] = [comment(i) for i in r['children']]
    s['comments'] = list(filter(bool, s['comments']))
    s['num_comments'] = comment_count(s) - 1
    if 'text' in r and r['text']:
@@ -22,22 +22,21 @@ reddit = praw.Reddit('bot')
 def feed():
    return [x.id for x in reddit.subreddit(SUBREDDITS).hot()]
 def good_comment(c):
    if isinstance(c, MoreComments):
        return False
    if c.body == '[removed]':
        return False
    if c.author and c.author.name == 'AutoModerator':
        return False
    return True
 def comment(i):
    if isinstance(i, MoreComments):
        return False
    if '[removed]' in i.body or '[deleted]' in i.body:
        return False
    if i.author and i.author.name == 'AutoModerator':
        return False
    c = {}
    c['author'] = i.author.name if i.author else '[Deleted]'
    c['score'] = i.score
    c['date'] = i.created_utc
    c['text'] = render_md(i.body)
-    c['comments'] = [comment(j) for j in i.replies if good_comment(j)]
+    c['comments'] = [comment(j) for j in i.replies]
    c['comments'] = list(filter(bool, c['comments']))
    return c
 def story(ref):
@@ -52,7 +51,8 @@ def story(ref):
    s['title'] = r.title
    s['link'] = SITE_LINK(r.permalink)
    s['url'] = r.url
-    s['comments'] = [comment(i) for i in r.comments if good_comment(i)]
+    s['comments'] = [comment(i) for i in r.comments]
    s['comments'] = list(filter(bool, s['comments']))
    s['num_comments'] = r.num_comments
    if r.selftext:
@@ -41,6 +41,10 @@ def unix(date_str):
 def comment(i):
    i = i.article
    if i.find('div', class_='is-comment-removed'):
        return False
    c = {}
    c['author'] = str(i.find('a', class_='link-user').string)
    c['score'] = 1
@@ -48,6 +52,7 @@ def comment(i):
    c['text'] = i.find('div', class_='comment-text').encode_contents().decode()
    ct = i.find('ol', class_='comment-tree')
    c['comments'] = [comment(j) for j in ct.findAll('li', recursive=False)] if ct else []
    c['comments'] = list(filter(bool, c['comments']))
    return c
 def story(ref):
@@ -76,6 +81,7 @@ def story(ref):
    s['url'] = ud.a['href'] if ud else s['link']
    sc = a.find('ol', id='comments')
    s['comments'] = [comment(i) for i in sc.findAll('li', recursive=False)]
    s['comments'] = list(filter(bool, s['comments']))
    ch = a.find('header', class_='topic-comments-header')
    s['num_comments'] = int(ch.h2.string.split(' ')[0]) if ch else 0
@@ -87,17 +93,19 @@ def story(ref):
 # scratchpad so I can quickly develop the parser
 if __name__ == '__main__':
-    print(feed())
+    #print(feed())
-    normal = story('gxt')
+    #normal = story('gxt')
-    print(normal)
+    #print(normal)
-    no_comments = story('gxr')
+    #no_comments = story('gxr')
-    print(no_comments)
+    #print(no_comments)
-    self_post = story('gsb')
+    #self_post = story('gsb')
-    print(self_post)
+    #print(self_post)
-    li_comment = story('gqx')
+    #li_comment = story('gqx')
-    print(li_comment)
+    #print(li_comment)
    broken = story('h23')
    print(broken)
    # make sure there's no self-reference
-    import copy
+    #import copy
-    for x in [normal, no_comments, self_post, li_comment]:
+    #for x in [normal, no_comments, self_post, li_comment]:
-        _ = copy.deepcopy(x)
+    #    _ = copy.deepcopy(x)