Stop using archive.is on articles (hits CAPTCHAs)
This commit is contained in:
		@@ -13,9 +13,8 @@ OUTLINE_API = 'https://outlineapi.com/article'
 | 
				
			|||||||
ARCHIVE_API = 'https://archive.fo/submit/'
 | 
					ARCHIVE_API = 'https://archive.fo/submit/'
 | 
				
			||||||
READ_API = 'http://127.0.0.1:33843'
 | 
					READ_API = 'http://127.0.0.1:33843'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ARCHIVE_FIRST = ['bloomberg.com', 'wsj.com']
 | 
					 | 
				
			||||||
INVALID_FILES = ['.pdf', '.png', '.jpg', '.gif']
 | 
					INVALID_FILES = ['.pdf', '.png', '.jpg', '.gif']
 | 
				
			||||||
INVALID_DOMAINS = ['youtube.com']
 | 
					INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com']
 | 
				
			||||||
TWO_DAYS = 60*60*24*2
 | 
					TWO_DAYS = 60*60*24*2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def list():
 | 
					def list():
 | 
				
			||||||
@@ -26,24 +25,6 @@ def list():
 | 
				
			|||||||
    return feed
 | 
					    return feed
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_article(url):
 | 
					def get_article(url):
 | 
				
			||||||
    if any([domain in url for domain in ARCHIVE_FIRST]):
 | 
					 | 
				
			||||||
        try:
 | 
					 | 
				
			||||||
            logging.info('Article from {}, archiving first...'.format(url))
 | 
					 | 
				
			||||||
            data = {'submitid': '9tjtS1EYe5wy8AJiYgVfH9P97uHU1IHG4lO67hsQpHOC3KKJrhqVIoQG2U7Rg%2Fpr', 'url': url}
 | 
					 | 
				
			||||||
            r = requests.post(ARCHIVE_API, data=data, timeout=20, allow_redirects=False)
 | 
					 | 
				
			||||||
            if r.status_code == 200:
 | 
					 | 
				
			||||||
                logging.info('Submitted for archiving. Skipping to wait...')
 | 
					 | 
				
			||||||
                return ''
 | 
					 | 
				
			||||||
            elif 'location' in r.headers:
 | 
					 | 
				
			||||||
                url = r.headers['location']
 | 
					 | 
				
			||||||
            else:
 | 
					 | 
				
			||||||
                raise Exception('Bad response code ' + str(r.status_code))
 | 
					 | 
				
			||||||
        except KeyboardInterrupt:
 | 
					 | 
				
			||||||
            raise
 | 
					 | 
				
			||||||
        except BaseException as e:
 | 
					 | 
				
			||||||
            logging.error('Problem archiving article: {}'.format(str(e)))
 | 
					 | 
				
			||||||
            return ''
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
        params = {'source_url': url}
 | 
					        params = {'source_url': url}
 | 
				
			||||||
        headers = {'Referer': 'https://outline.com/'}
 | 
					        headers = {'Referer': 'https://outline.com/'}
 | 
				
			||||||
@@ -89,7 +70,7 @@ def get_first_image(text):
 | 
				
			|||||||
    except:
 | 
					    except:
 | 
				
			||||||
        return ''
 | 
					        return ''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def update_story(story, manual=False):
 | 
					def update_story(story, is_manual=False):
 | 
				
			||||||
    res = {}
 | 
					    res = {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    logging.info('Updating story ' + str(story['ref']))
 | 
					    logging.info('Updating story ' + str(story['ref']))
 | 
				
			||||||
@@ -109,7 +90,7 @@ def update_story(story, manual=False):
 | 
				
			|||||||
        logging.info('Article not ready yet')
 | 
					        logging.info('Article not ready yet')
 | 
				
			||||||
        return False
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if story['date'] and not manual and story['date'] + TWO_DAYS < time.time():
 | 
					    if story['date'] and not is_manual and story['date'] + TWO_DAYS < time.time():
 | 
				
			||||||
        logging.info('Article too old, removing')
 | 
					        logging.info('Article too old, removing')
 | 
				
			||||||
        return False
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -113,7 +113,7 @@ def submit():
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        news_story = dict(id=nid, ref=ref, source=source)
 | 
					        news_story = dict(id=nid, ref=ref, source=source)
 | 
				
			||||||
        news_cache[nid] = news_story
 | 
					        news_cache[nid] = news_story
 | 
				
			||||||
        valid = feed.update_story(news_story, manual=True)
 | 
					        valid = feed.update_story(news_story, is_manual=True)
 | 
				
			||||||
        if valid:
 | 
					        if valid:
 | 
				
			||||||
            archive.update(news_story)
 | 
					            archive.update(news_story)
 | 
				
			||||||
            return {'nid': nid}
 | 
					            return {'nid': nid}
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user