increase declutter timeout.

master
Jason Schwarzenberger 4 years ago
parent e6976db25d
commit 98a0c2257c
  1. 7
      apiserver/scrapers/declutter.py
  2. 8
      apiserver/scrapers/local.py
  3. 10
      apiserver/scrapers/outline.py

@ -5,19 +5,22 @@ logging.basicConfig(
import requests
DECLUTTER_API = 'https://declutter.1j.nz/details'
TIMEOUT = 30
def get_html(url):
try:
logging.info(f"Declutter Scraper: {url}")
details = get_details(url)
if not details:
return ''
return details['content']
except:
raise
def get_details(url):
try:
r = requests.post(DECLUTTER_API, data=dict(url=url), timeout=20)
r = requests.post(DECLUTTER_API, data=dict(url=url), timeout=TIMEOUT)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
@ -25,4 +28,4 @@ def get_details(url):
raise
except BaseException as e:
logging.error('Problem decluttering article: {}'.format(str(e)))
return {}
return None

@ -5,19 +5,21 @@ logging.basicConfig(
import requests
READ_API = 'http://127.0.0.1:33843/details'
TIMEOUT = 20
def get_html(url):
try:
logging.info(f"Local Scraper: {url}")
details = get_details(url)
if not details:
return ''
return details['content']
except:
raise
def get_details(url):
try:
r = requests.post(READ_API, data=dict(url=url), timeout=20)
r = requests.post(READ_API, data=dict(url=url), timeout=TIMEOUT)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
@ -25,4 +27,4 @@ def get_details(url):
raise
except BaseException as e:
logging.error('Problem getting article: {}'.format(str(e)))
return {}
return None

@ -6,11 +6,13 @@ import requests
OUTLINE_REFERER = 'https://outline.com/'
OUTLINE_API = 'https://api.outline.com/v3/parse_article'
TIMEOUT = 20
def get_html(url):
try:
details = get_details(url)
if not details:
return ''
return details['html']
except:
raise
@ -20,11 +22,11 @@ def get_details(url):
logging.info(f"Outline Scraper: {url}")
params = {'source_url': url}
headers = {'Referer': OUTLINE_REFERER}
r = requests.get(OUTLINE_API, params=params, headers=headers, timeout=20)
r = requests.get(OUTLINE_API, params=params, headers=headers, timeout=TIMEOUT)
if r.status_code == 429:
logging.info('Rate limited by outline, sleeping 30s and skipping...')
time.sleep(30)
return ''
return None
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
data = r.json()['data']
@ -35,4 +37,4 @@ def get_details(url):
raise
except BaseException as e:
logging.error('Problem outlining article: {}'.format(str(e)))
return {}
return None
Loading…
Cancel
Save