allow re-scraping if simple scraper was used.
This commit is contained in:
parent
8727be6d86
commit
33a25fa34e
|
@ -132,7 +132,11 @@ def update_story(story, is_manual=False, urlref=None):
|
||||||
logging.info('Story too old, removing')
|
logging.info('Story too old, removing')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if story.get('url', '') and not story.get('text', ''):
|
has_url = story.get('url') or False
|
||||||
|
has_text = story.get('text') or False
|
||||||
|
is_simple = story.get('scaper', '') == 'simple'
|
||||||
|
|
||||||
|
if has_url and (not has_text or is_simple):
|
||||||
if not get_content_type(story['url']).startswith('text/'):
|
if not get_content_type(story['url']).startswith('text/'):
|
||||||
logging.info('URL invalid file type / content type:')
|
logging.info('URL invalid file type / content type:')
|
||||||
logging.info(story['url'])
|
logging.info(story['url'])
|
||||||
|
@ -149,6 +153,7 @@ def update_story(story, is_manual=False, urlref=None):
|
||||||
story['scraper'] = scraper
|
story['scraper'] = scraper
|
||||||
story['text'] = details.get('content', '')
|
story['text'] = details.get('content', '')
|
||||||
if not story['text']: return False
|
if not story['text']: return False
|
||||||
|
story['last_update'] = time.time()
|
||||||
story['excerpt'] = details.get('excerpt', '')
|
story['excerpt'] = details.get('excerpt', '')
|
||||||
story['scraper_link'] = details.get('scraper_link', '')
|
story['scraper_link'] = details.get('scraper_link', '')
|
||||||
meta = details.get('meta')
|
meta = details.get('meta')
|
||||||
|
|
Loading…
Reference in New Issue
Block a user