Make qotnews work with WaPo
This commit is contained in:
parent
ee5105743d
commit
feba8b7aa0
|
@ -46,7 +46,7 @@ def get_article(url):
|
||||||
logging.info('Trying our server instead...')
|
logging.info('Trying our server instead...')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
r = requests.post(READ_API, data=dict(url=url), timeout=10)
|
r = requests.post(READ_API, data=dict(url=url), timeout=20)
|
||||||
if r.status_code != 200:
|
if r.status_code != 200:
|
||||||
raise Exception('Bad response code ' + str(r.status_code))
|
raise Exception('Bad response code ' + str(r.status_code))
|
||||||
return r.text
|
return r.text
|
||||||
|
@ -94,7 +94,6 @@ def update_story(story, is_manual=False):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if story.get('url', '') and not story.get('text', ''):
|
if story.get('url', '') and not story.get('text', ''):
|
||||||
logging.info('inside if')
|
|
||||||
if not get_content_type(story['url']).startswith('text/'):
|
if not get_content_type(story['url']).startswith('text/'):
|
||||||
logging.info('URL invalid file type / content type:')
|
logging.info('URL invalid file type / content type:')
|
||||||
logging.info(story['url'])
|
logging.info(story['url'])
|
||||||
|
|
|
@ -7,12 +7,15 @@ import requests
|
||||||
import time
|
import time
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
USER_AGENT = 'Twitterbot/1.0'
|
USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'
|
||||||
|
|
||||||
def api(route):
|
def api(route):
|
||||||
try:
|
try:
|
||||||
headers = {'User-Agent': USER_AGENT}
|
headers = {
|
||||||
r = requests.get(route, headers=headers, timeout=5)
|
'User-Agent': USER_AGENT,
|
||||||
|
'X-Forwarded-For': '66.249.66.1',
|
||||||
|
}
|
||||||
|
r = requests.get(route, headers=headers, timeout=10)
|
||||||
if r.status_code != 200:
|
if r.status_code != 200:
|
||||||
raise Exception('Bad response code ' + str(r.status_code))
|
raise Exception('Bad response code ' + str(r.status_code))
|
||||||
return r.text
|
return r.text
|
||||||
|
|
|
@ -4,7 +4,7 @@ const port = 33843;
|
||||||
|
|
||||||
const request = require('request');
|
const request = require('request');
|
||||||
const JSDOM = require('jsdom').JSDOM;
|
const JSDOM = require('jsdom').JSDOM;
|
||||||
const Readability = require('readability');
|
const { Readability } = require('readability');
|
||||||
|
|
||||||
app.use(express.urlencoded({ extended: true }));
|
app.use(express.urlencoded({ extended: true }));
|
||||||
|
|
||||||
|
@ -36,7 +36,11 @@ app.post('/', (req, res) => {
|
||||||
const requestOptions = {
|
const requestOptions = {
|
||||||
url: url,
|
url: url,
|
||||||
//headers: {'User-Agent': 'Googlebot/2.1 (+http://www.google.com/bot.html)'},
|
//headers: {'User-Agent': 'Googlebot/2.1 (+http://www.google.com/bot.html)'},
|
||||||
headers: {'User-Agent': 'Twitterbot/1.0'},
|
//headers: {'User-Agent': 'Twitterbot/1.0'},
|
||||||
|
headers: {
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0',
|
||||||
|
'X-Forwarded-For': '66.249.66.1',
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
console.log('Parse request for:', url);
|
console.log('Parse request for:', url);
|
||||||
|
|
Loading…
Reference in New Issue
Block a user