From 943a1cfa4f2f8035ea13177f7f1638717f120fef Mon Sep 17 00:00:00 2001 From: Jason Schwarzenberger Date: Tue, 10 Nov 2020 14:56:21 +1300 Subject: [PATCH] reader server --- readerserver/constants.js | 4 ++++ readerserver/main.js | 27 +++++++++++++++++++++------ readerserver/{ => scraper}/simple.js | 14 ++++++-------- 3 files changed, 31 insertions(+), 14 deletions(-) create mode 100644 readerserver/constants.js rename readerserver/{ => scraper}/simple.js (75%) diff --git a/readerserver/constants.js b/readerserver/constants.js new file mode 100644 index 0000000..13119c4 --- /dev/null +++ b/readerserver/constants.js @@ -0,0 +1,4 @@ +module.exports.headers = { + 'User-Agent': 'Googlebot/2.1 (+http://www.google.com/bot.html)', + 'X-Forwarded-For': '66.249.66.1', +}; \ No newline at end of file diff --git a/readerserver/main.js b/readerserver/main.js index 219faeb..870a126 100644 --- a/readerserver/main.js +++ b/readerserver/main.js @@ -1,14 +1,29 @@ const port = 33843; const express = require('express'); const app = express(); -const simple = require('./simple'); +const simple = require('./scraper/simple'); app.use(express.urlencoded({ extended: true })); -app.get('/', (req, res) => res.send(simple.FORM)); -app.post('/', (req, res) => simple.scrape(req, res)); -app.post('/details', (req, res) => simple.details(req, res)); -// app.post('/browser', (req, res) => browser.scrape(req, res)); -// app.post('/browser/details', (req, res) => browser.details(req, res)); + +app.get('/', (req, res) => { + // const routes = ['/', '/details', '/browser', '/browser/details', '/browser/comments']; + const routes = ['/', '/details']; + + const html = routes.map(route => ` +
+
+ route: POST ${route} + + +
+
`).join('
'); + res.send(html); +}); +app.post('/', simple.scrape); +app.post('/details', simple.details); +// app.post('/browser', browser.scrape); +// app.post('/browser/details', browser.details); +// app.post('/browser/comments', browser.comments); app.listen(port, () => { console.log(`Example app listening on port ${port}!`); diff --git a/readerserver/simple.js b/readerserver/scraper/simple.js similarity index 75% rename from readerserver/simple.js rename to readerserver/scraper/simple.js index 69fcd0d..ef2784c 100644 --- a/readerserver/simple.js +++ b/readerserver/scraper/simple.js @@ -2,12 +2,11 @@ const request = require('request'); const JSDOM = require('jsdom').JSDOM; const { Readability } = require('readability'); +const { headers } = require('../constants'); + const options = url => ({ - url: url, - headers: { - 'User-Agent': 'Googlebot/2.1 (+http://www.google.com/bot.html)', - 'X-Forwarded-For': '66.249.66.1', - }, + url, + headers, }); const extract = (url, body) => { @@ -17,13 +16,12 @@ const extract = (url, body) => { }; -module.exports.FORM = '
'; module.exports.scrape = (req, res) => request(options(req.body.url), (error, response, body) => { if (error || response.statusCode != 200) { console.log('Response error:', error ? error.toString() : response.statusCode); return res.sendStatus(response ? response.statusCode : 404); } - const article = extract(url, body); + const article = extract(req.body.url, body); if (article && article.content) { return res.send(article.content); } @@ -35,7 +33,7 @@ module.exports.details = (req, res) => request(options(req.body.url), (error, re console.log('Response error:', error ? error.toString() : response.statusCode); return res.sendStatus(response ? response.statusCode : 404); } - const article = extract(url, body); + const article = extract(req.body.url, body); if (article) { return res.send(article); }