From feba8b7aa0dfb44f603e53edc83c65d8bfd44c81 Mon Sep 17 00:00:00 2001 From: Tanner Collin Date: Thu, 29 Oct 2020 04:55:34 +0000 Subject: [PATCH] Make qotnews work with WaPo --- apiserver/feed.py | 3 +-- apiserver/feeds/manual.py | 9 ++++++--- readerserver/main.js | 8 ++++++-- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/apiserver/feed.py b/apiserver/feed.py index d86e4a1..a53874a 100644 --- a/apiserver/feed.py +++ b/apiserver/feed.py @@ -46,7 +46,7 @@ def get_article(url): logging.info('Trying our server instead...') try: - r = requests.post(READ_API, data=dict(url=url), timeout=10) + r = requests.post(READ_API, data=dict(url=url), timeout=20) if r.status_code != 200: raise Exception('Bad response code ' + str(r.status_code)) return r.text @@ -94,7 +94,6 @@ def update_story(story, is_manual=False): return False if story.get('url', '') and not story.get('text', ''): - logging.info('inside if') if not get_content_type(story['url']).startswith('text/'): logging.info('URL invalid file type / content type:') logging.info(story['url']) diff --git a/apiserver/feeds/manual.py b/apiserver/feeds/manual.py index d145df4..c23ed70 100644 --- a/apiserver/feeds/manual.py +++ b/apiserver/feeds/manual.py @@ -7,12 +7,15 @@ import requests import time from bs4 import BeautifulSoup -USER_AGENT = 'Twitterbot/1.0' +USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0' def api(route): try: - headers = {'User-Agent': USER_AGENT} - r = requests.get(route, headers=headers, timeout=5) + headers = { + 'User-Agent': USER_AGENT, + 'X-Forwarded-For': '66.249.66.1', + } + r = requests.get(route, headers=headers, timeout=10) if r.status_code != 200: raise Exception('Bad response code ' + str(r.status_code)) return r.text diff --git a/readerserver/main.js b/readerserver/main.js index 4c213ef..242db7a 100644 --- a/readerserver/main.js +++ b/readerserver/main.js @@ -4,7 +4,7 @@ const port = 33843; const request = require('request'); const JSDOM = require('jsdom').JSDOM; -const Readability = require('readability'); +const { Readability } = require('readability'); app.use(express.urlencoded({ extended: true })); @@ -36,7 +36,11 @@ app.post('/', (req, res) => { const requestOptions = { url: url, //headers: {'User-Agent': 'Googlebot/2.1 (+http://www.google.com/bot.html)'}, - headers: {'User-Agent': 'Twitterbot/1.0'}, + //headers: {'User-Agent': 'Twitterbot/1.0'}, + headers: { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0', + 'X-Forwarded-For': '66.249.66.1', + }, }; console.log('Parse request for:', url);