scraper settings, ordering and loop.

This commit is contained in:
Jason Schwarzenberger 2020-11-04 15:47:12 +13:00
parent 6ea9844d00
commit 9bfc6fc6fa
2 changed files with 21 additions and 20 deletions

View File

@ -54,27 +54,26 @@ def list():
return feed return feed
def get_article(url): def get_article(url):
try: scrapers = {
return declutter.get_html(url) 'declutter': declutter,
except KeyboardInterrupt: 'outline': outline,
raise 'local': local,
except: }
pass available = settings.SCRAPERS or ['local']
if 'local' not in available:
try: available += ['local']
return outline.get_html(url)
except KeyboardInterrupt:
raise
except:
pass
try:
return local.get_html(url)
except KeyboardInterrupt:
raise
except:
pass
for scraper in available:
if scraper not in scrapers.keys():
continue
try:
html = scrapers[scraper].get_html(url)
if html:
return html
except KeyboardInterrupt:
raise
except:
pass
return '' return ''
def get_content_type(url): def get_content_type(url):

View File

@ -23,6 +23,8 @@ NUM_SUBSTACK = 10
# 'rnz national': { 'url': "https://www.rnz.co.nz/news/national", 'count': 10}, # 'rnz national': { 'url': "https://www.rnz.co.nz/news/national", 'count': 10},
# } # }
SCRAPERS = ['declutter', 'outline', 'local']
# Reddit account info # Reddit account info
# leave blank if not using Reddit # leave blank if not using Reddit
REDDIT_CLIENT_ID = '' REDDIT_CLIENT_ID = ''