|
|
|
@ -10,9 +10,6 @@ from bs4 import BeautifulSoup |
|
|
|
|
import settings |
|
|
|
|
from feeds import hackernews, reddit, tildes, manual, lobsters |
|
|
|
|
|
|
|
|
|
OUTLINE_API = 'https://api.outline.com/v3/parse_article' |
|
|
|
|
READ_API = 'http://127.0.0.1:33843' |
|
|
|
|
|
|
|
|
|
INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com', 'sec.gov'] |
|
|
|
|
TWO_DAYS = 60*60*24*2 |
|
|
|
|
|
|
|
|
@ -33,29 +30,16 @@ def list(): |
|
|
|
|
return feed |
|
|
|
|
|
|
|
|
|
def get_article(url): |
|
|
|
|
try: |
|
|
|
|
params = {'source_url': url} |
|
|
|
|
headers = {'Referer': 'https://outline.com/'} |
|
|
|
|
r = requests.get(OUTLINE_API, params=params, headers=headers, timeout=20) |
|
|
|
|
if r.status_code == 429: |
|
|
|
|
logging.info('Rate limited by outline, sleeping 30s and skipping...') |
|
|
|
|
time.sleep(30) |
|
|
|
|
return '' |
|
|
|
|
if r.status_code != 200: |
|
|
|
|
raise Exception('Bad response code ' + str(r.status_code)) |
|
|
|
|
html = r.json()['data']['html'] |
|
|
|
|
if 'URL is not supported by Outline' in html: |
|
|
|
|
raise Exception('URL not supported by Outline') |
|
|
|
|
return html |
|
|
|
|
except KeyboardInterrupt: |
|
|
|
|
raise |
|
|
|
|
except BaseException as e: |
|
|
|
|
logging.error('Problem outlining article: {}'.format(str(e))) |
|
|
|
|
if not settings.READER_URL: |
|
|
|
|
logging.info('Readerserver not configured, aborting.') |
|
|
|
|
return '' |
|
|
|
|
|
|
|
|
|
logging.info('Trying our server instead...') |
|
|
|
|
if url.startswith('https://twitter.com'): |
|
|
|
|
logging.info('Replacing twitter.com url with nitter.net') |
|
|
|
|
url = url.replace('twitter.com', 'nitter.net') |
|
|
|
|
|
|
|
|
|
try: |
|
|
|
|
r = requests.post(READ_API, data=dict(url=url), timeout=20) |
|
|
|
|
r = requests.post(settings.READER_URL, data=dict(url=url), timeout=20) |
|
|
|
|
if r.status_code != 200: |
|
|
|
|
raise Exception('Bad response code ' + str(r.status_code)) |
|
|
|
|
return r.text |
|
|
|
@ -102,7 +86,7 @@ def update_story(story, is_manual=False): |
|
|
|
|
return False |
|
|
|
|
|
|
|
|
|
if story['date'] and not is_manual and story['date'] + TWO_DAYS < time.time(): |
|
|
|
|
logging.info('Story too old, removing') |
|
|
|
|
logging.info('Story too old, removing. Date: {}'.format(story['date'])) |
|
|
|
|
return False |
|
|
|
|
|
|
|
|
|
if story.get('url', '') and not story.get('text', ''): |
|
|
|
|