Compare commits

..

2 Commits

Author SHA1 Message Date
feba8b7aa0 Make qotnews work with WaPo 2020-10-29 04:55:34 +00:00
ee5105743d Upgrade readability 2020-10-29 01:24:13 +00:00
4 changed files with 15 additions and 9 deletions

View File

@ -46,7 +46,7 @@ def get_article(url):
logging.info('Trying our server instead...') logging.info('Trying our server instead...')
try: try:
r = requests.post(READ_API, data=dict(url=url), timeout=10) r = requests.post(READ_API, data=dict(url=url), timeout=20)
if r.status_code != 200: if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code)) raise Exception('Bad response code ' + str(r.status_code))
return r.text return r.text
@ -94,7 +94,6 @@ def update_story(story, is_manual=False):
return False return False
if story.get('url', '') and not story.get('text', ''): if story.get('url', '') and not story.get('text', ''):
logging.info('inside if')
if not get_content_type(story['url']).startswith('text/'): if not get_content_type(story['url']).startswith('text/'):
logging.info('URL invalid file type / content type:') logging.info('URL invalid file type / content type:')
logging.info(story['url']) logging.info(story['url'])

View File

@ -7,12 +7,15 @@ import requests
import time import time
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
USER_AGENT = 'Twitterbot/1.0' USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'
def api(route): def api(route):
try: try:
headers = {'User-Agent': USER_AGENT} headers = {
r = requests.get(route, headers=headers, timeout=5) 'User-Agent': USER_AGENT,
'X-Forwarded-For': '66.249.66.1',
}
r = requests.get(route, headers=headers, timeout=10)
if r.status_code != 200: if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code)) raise Exception('Bad response code ' + str(r.status_code))
return r.text return r.text

View File

@ -4,7 +4,7 @@ const port = 33843;
const request = require('request'); const request = require('request');
const JSDOM = require('jsdom').JSDOM; const JSDOM = require('jsdom').JSDOM;
const Readability = require('readability'); const { Readability } = require('readability');
app.use(express.urlencoded({ extended: true })); app.use(express.urlencoded({ extended: true }));
@ -36,7 +36,11 @@ app.post('/', (req, res) => {
const requestOptions = { const requestOptions = {
url: url, url: url,
//headers: {'User-Agent': 'Googlebot/2.1 (+http://www.google.com/bot.html)'}, //headers: {'User-Agent': 'Googlebot/2.1 (+http://www.google.com/bot.html)'},
headers: {'User-Agent': 'Twitterbot/1.0'}, //headers: {'User-Agent': 'Twitterbot/1.0'},
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0',
'X-Forwarded-For': '66.249.66.1',
},
}; };
console.log('Parse request for:', url); console.log('Parse request for:', url);

View File

@ -712,8 +712,8 @@ raw-body@2.4.0:
unpipe "1.0.0" unpipe "1.0.0"
"readability@https://github.com/mozilla/readability": "readability@https://github.com/mozilla/readability":
version "0.2.0" version "0.3.0"
resolved "https://github.com/mozilla/readability#2982216913af2c66b0690e88606b03116553ad92" resolved "https://github.com/mozilla/readability#d5eea06a0095b3138dbd1f6233f656d690200509"
request-promise-core@1.1.2: request-promise-core@1.1.2:
version "1.1.2" version "1.1.2"