forked from tanner/qotnews
add excerpt and scraper details.
This commit is contained in:
@@ -76,14 +76,14 @@ def get_article(url):
|
||||
if scraper not in scrapers.keys():
|
||||
continue
|
||||
try:
|
||||
html = scrapers[scraper].get_html(url)
|
||||
if html:
|
||||
return html
|
||||
details = scrapers[scraper].get_details(url)
|
||||
if details and details.get('content'):
|
||||
return details, scraper
|
||||
except KeyboardInterrupt:
|
||||
raise
|
||||
except:
|
||||
pass
|
||||
return ''
|
||||
return None, None
|
||||
|
||||
def get_content_type(url):
|
||||
try:
|
||||
@@ -143,7 +143,12 @@ def update_story(story, is_manual=False, urlref=None):
|
||||
return False
|
||||
|
||||
logging.info('Getting article ' + story['url'])
|
||||
story['text'] = get_article(story['url'])
|
||||
details, scraper = get_article(story['url'])
|
||||
if not details: return False
|
||||
story['text'] = details.get('content', '')
|
||||
story['excerpt'] = details.get('excerpt', '')
|
||||
story['scraper'] = scraper
|
||||
story['scraper_link'] = details.get('scraper_link', '')
|
||||
if not story['text']: return False
|
||||
|
||||
return True
|
||||
|
@@ -12,9 +12,28 @@ def get_html(url):
|
||||
details = get_details(url)
|
||||
if not details:
|
||||
return ''
|
||||
return details['html']
|
||||
return details['content']
|
||||
|
||||
def get_details(url):
|
||||
outline = _get_outline(url)
|
||||
if not outline:
|
||||
return None
|
||||
return as_readable(outline)
|
||||
|
||||
def as_readable(details):
|
||||
readable = {
|
||||
'title': details['title'],
|
||||
'byline': details['author'],
|
||||
'content': details['html'],
|
||||
'excerpt': _excerpt(details),
|
||||
'siteName': details['site_name'],
|
||||
'url': details['article_url'],
|
||||
'publisher': details['site_name'],
|
||||
'scraper_link': 'https://outline.com/' + details['short_code']
|
||||
}
|
||||
return readable
|
||||
|
||||
def _get_outline(url):
|
||||
try:
|
||||
logging.info(f"Outline Scraper: {url}")
|
||||
params = {'source_url': url}
|
||||
@@ -34,4 +53,11 @@ def get_details(url):
|
||||
raise
|
||||
except BaseException as e:
|
||||
logging.error('Problem outlining article: {}'.format(str(e)))
|
||||
return None
|
||||
return None
|
||||
|
||||
def _excerpt(details):
|
||||
meta = details.get('meta')
|
||||
if not meta: return ''
|
||||
if meta.get('description'): return meta.get('description', '')
|
||||
if not meta.get('og'): return ''
|
||||
return meta.get('og').get('og:description', '')
|
Reference in New Issue
Block a user