qotnews/apiserver/feeds/reddit.py

99 lines
2.9 KiB
Python
Raw Normal View History

2019-08-24 21:37:43 +00:00
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
if __name__ == '__main__':
import sys
sys.path.insert(0,'.')
2019-08-24 21:37:43 +00:00
import praw
2019-09-24 08:20:46 +00:00
from praw.exceptions import PRAWException
2019-08-24 21:37:43 +00:00
from praw.models import MoreComments
2019-09-24 08:20:46 +00:00
from prawcore.exceptions import PrawcoreException
2019-08-24 21:37:43 +00:00
import settings
2019-12-01 22:18:41 +00:00
from utils import render_md, clean
2020-06-25 23:35:45 +00:00
SITE_LINK = lambda x : 'https://old.reddit.com{}'.format(x)
2019-08-24 21:37:43 +00:00
SITE_AUTHOR_LINK = lambda x : 'https://old.reddit.com/u/{}'.format(x)
if settings.NUM_REDDIT:
reddit = praw.Reddit(
client_id=settings.REDDIT_CLIENT_ID,
client_secret=settings.REDDIT_CLIENT_SECRET,
user_agent=settings.REDDIT_USER_AGENT,
)
subs = '+'.join(settings.SUBREDDITS)
2019-08-24 21:37:43 +00:00
def feed():
2019-09-24 08:20:46 +00:00
try:
return [x.id for x in reddit.subreddit(subs).hot()]
except KeyboardInterrupt:
raise
2019-09-24 08:20:46 +00:00
except PRAWException as e:
logging.error('Problem hitting reddit API: {}'.format(str(e)))
return []
except PrawcoreException as e:
logging.error('Problem hitting reddit API: {}'.format(str(e)))
return []
2019-08-24 21:37:43 +00:00
2019-08-30 06:23:14 +00:00
def comment(i):
if isinstance(i, MoreComments):
2019-08-24 21:37:43 +00:00
return False
2019-08-30 06:23:14 +00:00
if '[removed]' in i.body or '[deleted]' in i.body:
2019-08-24 21:37:43 +00:00
return False
2019-08-30 06:23:14 +00:00
if i.author and i.author.name == 'AutoModerator':
2019-08-24 21:37:43 +00:00
return False
c = {}
c['author'] = i.author.name if i.author else '[Deleted]'
c['score'] = i.score
c['date'] = i.created_utc
2019-12-01 22:18:41 +00:00
c['text'] = render_md(clean(i.body))
2019-08-30 06:23:14 +00:00
c['comments'] = [comment(j) for j in i.replies]
c['comments'] = list(filter(bool, c['comments']))
2019-08-24 21:37:43 +00:00
return c
def story(ref):
2019-09-24 08:20:46 +00:00
try:
r = reddit.submission(ref)
if not r: return False
2019-08-24 21:37:43 +00:00
2019-09-24 08:20:46 +00:00
s = {}
s['author'] = r.author.name if r.author else '[Deleted]'
s['author_link'] = SITE_AUTHOR_LINK(r.author)
s['score'] = r.score
s['date'] = r.created_utc
s['title'] = r.title
s['link'] = SITE_LINK(r.permalink)
s['url'] = r.url
s['comments'] = [comment(i) for i in r.comments]
s['comments'] = list(filter(bool, s['comments']))
s['num_comments'] = r.num_comments
2019-08-24 21:37:43 +00:00
if s['score'] < settings.REDDIT_SCORE_THRESHOLD and s['num_comments'] < settings.REDDIT_COMMENT_THRESHOLD:
2020-10-03 23:41:57 +00:00
return False
2019-09-24 08:20:46 +00:00
if r.selftext:
2019-12-01 22:18:41 +00:00
s['text'] = render_md(clean(r.selftext))
2019-08-24 21:37:43 +00:00
2019-09-24 08:20:46 +00:00
return s
except KeyboardInterrupt:
raise
2019-09-24 08:20:46 +00:00
except PRAWException as e:
logging.error('Problem hitting reddit API: {}'.format(str(e)))
return False
except PrawcoreException as e:
logging.error('Problem hitting reddit API: {}'.format(str(e)))
return False
2019-08-24 21:37:43 +00:00
# scratchpad so I can quickly develop the parser
2019-08-24 21:37:43 +00:00
if __name__ == '__main__':
2019-12-01 22:18:41 +00:00
#print(feed())
#print(reddit.submission(feed()[0]).permalink)
#print()
print(story('e4asnp'))