Compare commits
	
		
			77 Commits
		
	
	
		
			master
			...
			5668fa5dbc
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					5668fa5dbc | ||
| 
						 | 
					b771b52501 | ||
| 
						 | 
					f5c7a658ba | ||
| 
						 | 
					f5ccd844da | ||
| 
						 | 
					6a91b9402f | ||
| 
						 | 
					b80c1a5cb5 | ||
| 
						 | 
					b23e470317 | ||
| 
						 | 
					7420b5ece9 | ||
| 
						 | 
					64ced635cc | ||
| 
						 | 
					9318627f1b | ||
| 
						 | 
					3d0a3f1577 | ||
| 
						 | 
					587b10c438 | ||
| 
						 | 
					00954c6cac | ||
| 
						 | 
					637bc38476 | ||
| 
						 | 
					164b7e72c4 | ||
| 
						 | 
					3169af3002 | ||
| 
						 | 
					d588a60930 | ||
| 
						 | 
					408e2870b2 | ||
| 
						 | 
					44b8b36547 | ||
| 
						 | 
					4f49684194 | ||
| 
						 | 
					1d78b1c592 | ||
| 
						 | 
					0374794536 | ||
| 
						 | 
					943a1cfa4f | ||
| 
						 | 
					9cee370a25 | ||
| 
						 | 
					5efc6ef2d3 | ||
| 
						 | 
					4ec50e20cb | ||
| 
						 | 
					c1b7877f4b | ||
| 
						 | 
					7b8cbfc9b9 | ||
| 
						 | 
					bfa4108a8e | ||
| 
						 | 
					0bd0d40a31 | ||
| 
						 | 
					4e04595415 | ||
| 
						 | 
					006db2960c | ||
| 
						 | 
					1f063f0dac | ||
| 
						 | 
					1658346aa9 | ||
| 
						 | 
					2dbc702b40 | ||
| 
						 | 
					1c4764e67d | ||
| 
						 | 
					ee49d2021e | ||
| 
						 | 
					c391c50ab1 | ||
| 
						 | 
					095f0d549a | ||
| 
						 | 
					c21c71667e | ||
| 
						 | 
					c3a2c91a11 | ||
| 
						 | 
					0f39446a61 | ||
| 
						 | 
					351059aab1 | ||
| 
						 | 
					4488e2c292 | ||
| 
						 | 
					afda5b635c | ||
| 
						 | 
					0fc1a44d2b | ||
| 
						 | 
					9fff1b9e46 | ||
| 
						 | 
					16b59f6c67 | ||
| 
						 | 
					939f4775a7 | ||
| 
						 | 
					9bfc6fc6fa | ||
| 
						 | 
					6ea9844d00 | ||
| 
						 | 
					1318259d3d | ||
| 
						 | 
					98a0c2257c | ||
| 
						 | 
					e6976db25d | ||
| 
						 | 
					9edc8b7cca | ||
| 
						 | 
					33e21e7f30 | ||
| 
						 | 
					892a99eca6 | ||
| 
						 | 
					d718d05a04 | ||
| 
						 | 
					d1795eb1b8 | ||
| 
						 | 
					9f4ff4acf0 | ||
| 
						 | 
					db6aad84ec | ||
| 
						 | 
					29f8a8b8cc | ||
| 
						 | 
					abf8589e02 | ||
| 
						 | 
					b759f46582 | ||
| 
						 | 
					736cdc8576 | ||
| 
						 | 
					244d416f6e | ||
| 
						 | 
					5f98a2e76a | ||
| 
						 | 
					0567cdfd9b | ||
| 
						 | 
					4f90671cec | ||
| 
						 | 
					e63a1456a5 | ||
| 
						 | 
					76f1d57702 | ||
| 
						 | 
					de80389ed0 | ||
| 
						 | 
					4e64cf682a | ||
| 
						 | 
					c5fe5d25a0 | ||
| 
						 | 
					283a2b1545 | ||
| 
						 | 
					0d6a86ace2 | ||
| 
						 | 
					f23bf628e0 | 
							
								
								
									
										3
									
								
								.gitmodules
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								.gitmodules
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,3 @@
 | 
				
			|||||||
 | 
					[submodule "readerserver/scraper/browser/scripts/bypass-paywalls-chrome"]
 | 
				
			||||||
 | 
						path = readerserver/scraper/browser/scripts/bypass-paywalls-chrome
 | 
				
			||||||
 | 
						url = https://github.com/iamadamdev/bypass-paywalls-chrome.git
 | 
				
			||||||
@@ -1,9 +1,9 @@
 | 
				
			|||||||
import json
 | 
					from datetime import datetime, timedelta
 | 
				
			||||||
 | 
					 | 
				
			||||||
from sqlalchemy import create_engine, Column, String, ForeignKey, Integer
 | 
					from sqlalchemy import create_engine, Column, String, ForeignKey, Integer
 | 
				
			||||||
from sqlalchemy.ext.declarative import declarative_base
 | 
					from sqlalchemy.ext.declarative import declarative_base
 | 
				
			||||||
from sqlalchemy.orm import sessionmaker
 | 
					from sqlalchemy.orm import sessionmaker
 | 
				
			||||||
from sqlalchemy.exc import IntegrityError
 | 
					from sqlalchemy.exc import IntegrityError
 | 
				
			||||||
 | 
					from sqlalchemy.types import JSON
 | 
				
			||||||
 | 
					
 | 
				
			||||||
engine = create_engine('sqlite:///data/qotnews.sqlite')
 | 
					engine = create_engine('sqlite:///data/qotnews.sqlite')
 | 
				
			||||||
Session = sessionmaker(bind=engine)
 | 
					Session = sessionmaker(bind=engine)
 | 
				
			||||||
@@ -15,8 +15,8 @@ class Story(Base):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    sid = Column(String(16), primary_key=True)
 | 
					    sid = Column(String(16), primary_key=True)
 | 
				
			||||||
    ref = Column(String(16), unique=True)
 | 
					    ref = Column(String(16), unique=True)
 | 
				
			||||||
    meta_json = Column(String)
 | 
					    meta = Column(JSON)
 | 
				
			||||||
    full_json = Column(String)
 | 
					    data = Column(JSON)
 | 
				
			||||||
    title = Column(String)
 | 
					    title = Column(String)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Reflist(Base):
 | 
					class Reflist(Base):
 | 
				
			||||||
@@ -24,6 +24,7 @@ class Reflist(Base):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    rid = Column(Integer, primary_key=True)
 | 
					    rid = Column(Integer, primary_key=True)
 | 
				
			||||||
    ref = Column(String(16), unique=True)
 | 
					    ref = Column(String(16), unique=True)
 | 
				
			||||||
 | 
					    urlref = Column(String)
 | 
				
			||||||
    sid = Column(String, ForeignKey('stories.sid'), unique=True)
 | 
					    sid = Column(String, ForeignKey('stories.sid'), unique=True)
 | 
				
			||||||
    source = Column(String(16))
 | 
					    source = Column(String(16))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -36,19 +37,21 @@ def get_story(sid):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
def put_story(story):
 | 
					def put_story(story):
 | 
				
			||||||
    story = story.copy()
 | 
					    story = story.copy()
 | 
				
			||||||
    full_json = json.dumps(story)
 | 
					    data = {}
 | 
				
			||||||
 | 
					    data.update(story)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    story.pop('text', None)
 | 
					    meta = {}
 | 
				
			||||||
    story.pop('comments', None)
 | 
					    meta.update(story)
 | 
				
			||||||
    meta_json = json.dumps(story)
 | 
					    meta.pop('text', None)
 | 
				
			||||||
 | 
					    meta.pop('comments', None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
        session = Session()
 | 
					        session = Session()
 | 
				
			||||||
        s = Story(
 | 
					        s = Story(
 | 
				
			||||||
            sid=story['id'],
 | 
					            sid=story['id'],
 | 
				
			||||||
            ref=story['ref'],
 | 
					            ref=story['ref'],
 | 
				
			||||||
            full_json=full_json,
 | 
					            data=data,
 | 
				
			||||||
            meta_json=meta_json,
 | 
					            meta=meta,
 | 
				
			||||||
            title=story.get('title', None),
 | 
					            title=story.get('title', None),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        session.merge(s)
 | 
					        session.merge(s)
 | 
				
			||||||
@@ -63,24 +66,32 @@ def get_story_by_ref(ref):
 | 
				
			|||||||
    session = Session()
 | 
					    session = Session()
 | 
				
			||||||
    return session.query(Story).filter(Story.ref==ref).first()
 | 
					    return session.query(Story).filter(Story.ref==ref).first()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_reflist(amount):
 | 
					def get_stories_by_url(url):
 | 
				
			||||||
    session = Session()
 | 
					    session = Session()
 | 
				
			||||||
    q = session.query(Reflist).order_by(Reflist.rid.desc()).limit(amount)
 | 
					    return session.query(Story).\
 | 
				
			||||||
    return [dict(ref=x.ref, sid=x.sid, source=x.source) for x in q.all()]
 | 
					            filter(Story.title != None).\
 | 
				
			||||||
 | 
					            filter(Story.meta['url'].as_string() == url).\
 | 
				
			||||||
 | 
					            order_by(Story.meta['date'].desc())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_stories(amount):
 | 
					def get_reflist():
 | 
				
			||||||
    session = Session()
 | 
					    session = Session()
 | 
				
			||||||
    q = session.query(Reflist, Story.meta_json).\
 | 
					    q = session.query(Reflist).order_by(Reflist.rid.desc())
 | 
				
			||||||
            order_by(Reflist.rid.desc()).\
 | 
					    return [dict(ref=x.ref, sid=x.sid, source=x.source, urlref=x.urlref) for x in q.all()]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_stories(maxage=60*60*24*2):
 | 
				
			||||||
 | 
					    time = datetime.now().timestamp() - maxage
 | 
				
			||||||
 | 
					    session = Session()
 | 
				
			||||||
 | 
					    q = session.query(Reflist, Story.meta).\
 | 
				
			||||||
            join(Story).\
 | 
					            join(Story).\
 | 
				
			||||||
            filter(Story.title != None).\
 | 
					            filter(Story.title != None).\
 | 
				
			||||||
            limit(amount)
 | 
					            filter(Story.meta['date'].as_integer() > time).\
 | 
				
			||||||
 | 
					            order_by(Story.meta['date'].desc())
 | 
				
			||||||
    return [x[1] for x in q]
 | 
					    return [x[1] for x in q]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def put_ref(ref, sid, source):
 | 
					def put_ref(ref, sid, source, urlref):
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
        session = Session()
 | 
					        session = Session()
 | 
				
			||||||
        r = Reflist(ref=ref, sid=sid, source=source)
 | 
					        r = Reflist(ref=ref, sid=sid, source=source, urlref=urlref)
 | 
				
			||||||
        session.add(r)
 | 
					        session.add(r)
 | 
				
			||||||
        session.commit()
 | 
					        session.commit()
 | 
				
			||||||
    except:
 | 
					    except:
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -6,60 +6,83 @@ logging.basicConfig(
 | 
				
			|||||||
import requests
 | 
					import requests
 | 
				
			||||||
import time
 | 
					import time
 | 
				
			||||||
from bs4 import BeautifulSoup
 | 
					from bs4 import BeautifulSoup
 | 
				
			||||||
 | 
					import itertools
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import settings
 | 
					import settings
 | 
				
			||||||
from feeds import hackernews, reddit, tildes, manual
 | 
					from feeds import hackernews, reddit, tildes, substack, manual
 | 
				
			||||||
 | 
					from feeds.sitemap import Sitemap
 | 
				
			||||||
OUTLINE_API = 'https://api.outline.com/v3/parse_article'
 | 
					from feeds.category import Category
 | 
				
			||||||
READ_API = 'http://127.0.0.1:33843'
 | 
					from scrapers import outline, declutter, browser, local
 | 
				
			||||||
 | 
					
 | 
				
			||||||
INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com']
 | 
					INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com']
 | 
				
			||||||
TWO_DAYS = 60*60*24*2
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
def list():
 | 
					substacks = {}
 | 
				
			||||||
    feed = []
 | 
					for key, value in settings.SUBSTACK.items():
 | 
				
			||||||
 | 
					    substacks[key] = substack.Publication(value['url'])
 | 
				
			||||||
 | 
					categories = {}
 | 
				
			||||||
 | 
					for key, value in settings.CATEGORY.items():
 | 
				
			||||||
 | 
					    categories[key] = Category(value)
 | 
				
			||||||
 | 
					sitemaps = {}
 | 
				
			||||||
 | 
					for key, value in settings.SITEMAP.items():
 | 
				
			||||||
 | 
					    sitemaps[key] = Sitemap(value)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_list():
 | 
				
			||||||
 | 
					    feeds = {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if settings.NUM_HACKERNEWS:
 | 
					    if settings.NUM_HACKERNEWS:
 | 
				
			||||||
        feed += [(x, 'hackernews') for x in hackernews.feed()[:settings.NUM_HACKERNEWS]]
 | 
					        feeds['hackernews'] = [(x, 'hackernews', x) for x in hackernews.feed()[:settings.NUM_HACKERNEWS]]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if settings.NUM_REDDIT:
 | 
					    if settings.NUM_REDDIT:
 | 
				
			||||||
        feed += [(x, 'reddit') for x in reddit.feed()[:settings.NUM_REDDIT]]
 | 
					        feeds['reddit'] = [(x, 'reddit', x) for x in reddit.feed()[:settings.NUM_REDDIT]]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if settings.NUM_TILDES:
 | 
					    if settings.NUM_TILDES:
 | 
				
			||||||
        feed += [(x, 'tildes') for x in tildes.feed()[:settings.NUM_TILDES]]
 | 
					        feeds['tildes'] = [(x, 'tildes', x) for x in tildes.feed()[:settings.NUM_TILDES]]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if settings.NUM_SUBSTACK:
 | 
				
			||||||
 | 
					        feeds['substack'] = [(x, 'substack', x) for x in substack.top.feed()[:settings.NUM_SUBSTACK]]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for key, publication in substacks.items():
 | 
				
			||||||
 | 
					        count = settings.SUBSTACK[key]['count']
 | 
				
			||||||
 | 
					        feeds[key] = [(x, key, x) for x in publication.feed()[:count]]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for key, sites in categories.items():
 | 
				
			||||||
 | 
					        count = settings.CATEGORY[key].get('count') or 0
 | 
				
			||||||
 | 
					        excludes = settings.CATEGORY[key].get('excludes')
 | 
				
			||||||
 | 
					        tz = settings.CATEGORY[key].get('tz')
 | 
				
			||||||
 | 
					        feeds[key] = [(x, key, u) for x, u in sites.feed(excludes)[:count]]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for key, sites in sitemaps.items():
 | 
				
			||||||
 | 
					        count = settings.SITEMAP[key].get('count') or 0
 | 
				
			||||||
 | 
					        excludes = settings.SITEMAP[key].get('excludes')
 | 
				
			||||||
 | 
					        feeds[key] = [(x, key, u) for x, u in sites.feed(excludes)[:count]]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    values = feeds.values()
 | 
				
			||||||
 | 
					    feed = itertools.chain.from_iterable(itertools.zip_longest(*values, fillvalue=None))
 | 
				
			||||||
 | 
					    feed = list(filter(None, feed))
 | 
				
			||||||
    return feed
 | 
					    return feed
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_article(url):
 | 
					def get_article(url):
 | 
				
			||||||
 | 
					    scrapers = {
 | 
				
			||||||
 | 
					        'declutter': declutter,
 | 
				
			||||||
 | 
					        'outline': outline,
 | 
				
			||||||
 | 
					        'browser': browser,
 | 
				
			||||||
 | 
					        'local': local,
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    available = settings.SCRAPERS or ['local']
 | 
				
			||||||
 | 
					    if 'local' not in available:
 | 
				
			||||||
 | 
					        available += ['local']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for scraper in available:
 | 
				
			||||||
 | 
					        if scraper not in scrapers.keys():
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
        params = {'source_url': url}
 | 
					            html = scrapers[scraper].get_html(url)
 | 
				
			||||||
        headers = {'Referer': 'https://outline.com/'}
 | 
					            if html:
 | 
				
			||||||
        r = requests.get(OUTLINE_API, params=params, headers=headers, timeout=20)
 | 
					 | 
				
			||||||
        if r.status_code == 429:
 | 
					 | 
				
			||||||
            logging.info('Rate limited by outline, sleeping 30s and skipping...')
 | 
					 | 
				
			||||||
            time.sleep(30)
 | 
					 | 
				
			||||||
            return ''
 | 
					 | 
				
			||||||
        if r.status_code != 200:
 | 
					 | 
				
			||||||
            raise Exception('Bad response code ' + str(r.status_code))
 | 
					 | 
				
			||||||
        html = r.json()['data']['html']
 | 
					 | 
				
			||||||
        if 'URL is not supported by Outline' in html:
 | 
					 | 
				
			||||||
            raise Exception('URL not supported by Outline')
 | 
					 | 
				
			||||||
                return html
 | 
					                return html
 | 
				
			||||||
        except KeyboardInterrupt:
 | 
					        except KeyboardInterrupt:
 | 
				
			||||||
            raise
 | 
					            raise
 | 
				
			||||||
    except BaseException as e:
 | 
					        except:
 | 
				
			||||||
        logging.error('Problem outlining article: {}'.format(str(e)))
 | 
					            pass
 | 
				
			||||||
 | 
					 | 
				
			||||||
    logging.info('Trying our server instead...')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    try:
 | 
					 | 
				
			||||||
        r = requests.post(READ_API, data=dict(url=url), timeout=20)
 | 
					 | 
				
			||||||
        if r.status_code != 200:
 | 
					 | 
				
			||||||
            raise Exception('Bad response code ' + str(r.status_code))
 | 
					 | 
				
			||||||
        return r.text
 | 
					 | 
				
			||||||
    except KeyboardInterrupt:
 | 
					 | 
				
			||||||
        raise
 | 
					 | 
				
			||||||
    except BaseException as e:
 | 
					 | 
				
			||||||
        logging.error('Problem getting article: {}'.format(str(e)))
 | 
					 | 
				
			||||||
    return ''
 | 
					    return ''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_content_type(url):
 | 
					def get_content_type(url):
 | 
				
			||||||
@@ -78,7 +101,7 @@ def get_content_type(url):
 | 
				
			|||||||
    except:
 | 
					    except:
 | 
				
			||||||
        return ''
 | 
					        return ''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def update_story(story, is_manual=False):
 | 
					def update_story(story, is_manual=False, urlref=None):
 | 
				
			||||||
    res = {}
 | 
					    res = {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if story['source'] == 'hackernews':
 | 
					    if story['source'] == 'hackernews':
 | 
				
			||||||
@@ -87,6 +110,14 @@ def update_story(story, is_manual=False):
 | 
				
			|||||||
        res = reddit.story(story['ref'])
 | 
					        res = reddit.story(story['ref'])
 | 
				
			||||||
    elif story['source'] == 'tildes':
 | 
					    elif story['source'] == 'tildes':
 | 
				
			||||||
        res = tildes.story(story['ref'])
 | 
					        res = tildes.story(story['ref'])
 | 
				
			||||||
 | 
					    elif story['source'] == 'substack':
 | 
				
			||||||
 | 
					        res = substack.top.story(story['ref'])
 | 
				
			||||||
 | 
					    elif story['source'] in categories.keys():
 | 
				
			||||||
 | 
					        res = categories[story['source']].story(story['ref'], urlref)
 | 
				
			||||||
 | 
					    elif story['source'] in sitemaps.keys():
 | 
				
			||||||
 | 
					        res = sitemaps[story['source']].story(story['ref'], urlref)
 | 
				
			||||||
 | 
					    elif story['source'] in substacks.keys():
 | 
				
			||||||
 | 
					        res = substacks[story['source']].story(story['ref'])
 | 
				
			||||||
    elif story['source'] == 'manual':
 | 
					    elif story['source'] == 'manual':
 | 
				
			||||||
        res = manual.story(story['ref'])
 | 
					        res = manual.story(story['ref'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -96,7 +127,7 @@ def update_story(story, is_manual=False):
 | 
				
			|||||||
        logging.info('Story not ready yet')
 | 
					        logging.info('Story not ready yet')
 | 
				
			||||||
        return False
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if story['date'] and not is_manual and story['date'] + TWO_DAYS < time.time():
 | 
					    if story['date'] and not is_manual and story['date'] + settings.MAX_STORY_AGE < time.time():
 | 
				
			||||||
        logging.info('Story too old, removing')
 | 
					        logging.info('Story too old, removing')
 | 
				
			||||||
        return False
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										72
									
								
								apiserver/feeds/category.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								apiserver/feeds/category.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,72 @@
 | 
				
			|||||||
 | 
					import logging
 | 
				
			||||||
 | 
					logging.basicConfig(
 | 
				
			||||||
 | 
					        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 | 
				
			||||||
 | 
					        level=logging.DEBUG)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == '__main__':
 | 
				
			||||||
 | 
					    import sys
 | 
				
			||||||
 | 
					    sys.path.insert(0,'.')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from bs4 import BeautifulSoup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import settings
 | 
				
			||||||
 | 
					from utils import clean
 | 
				
			||||||
 | 
					from misc.api import xml
 | 
				
			||||||
 | 
					from misc.news import Base
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def _filter_links(links, category_url, excludes=None):
 | 
				
			||||||
 | 
					    links = list(filter(None, [link if link.startswith(category_url) else None for link in links]))
 | 
				
			||||||
 | 
					    links = list(filter(None, [link if link != category_url else None for link in links]))
 | 
				
			||||||
 | 
					    links = list(set(links))
 | 
				
			||||||
 | 
					    if excludes:
 | 
				
			||||||
 | 
					        links = list(filter(None, [None if any(e in link for e in excludes) else link for link in links]))
 | 
				
			||||||
 | 
					    return links
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def _get_category(category_url, excludes=None):
 | 
				
			||||||
 | 
					    base_url = '/'.join(category_url.split('/')[:3])
 | 
				
			||||||
 | 
					    markup = xml(lambda x: category_url)
 | 
				
			||||||
 | 
					    if not markup: return []
 | 
				
			||||||
 | 
					    soup = BeautifulSoup(markup, features='html.parser')
 | 
				
			||||||
 | 
					    links = soup.find_all('a', href=True)
 | 
				
			||||||
 | 
					    links = [link.get('href') for link in links]
 | 
				
			||||||
 | 
					    links = [f"{base_url}{link}" if link.startswith('/') else link for link in links]
 | 
				
			||||||
 | 
					    links = _filter_links(links, category_url, excludes)
 | 
				
			||||||
 | 
					    return links
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Category(Base):
 | 
				
			||||||
 | 
					    def __init__(self, config):
 | 
				
			||||||
 | 
					        self.config = config
 | 
				
			||||||
 | 
					        self.category_url = config.get('url')
 | 
				
			||||||
 | 
					        self.tz = config.get('tz')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def feed(self, excludes=None):
 | 
				
			||||||
 | 
					        links = []
 | 
				
			||||||
 | 
					        if isinstance(self.category_url, str):
 | 
				
			||||||
 | 
					            links += _get_category(self.category_url, excludes)
 | 
				
			||||||
 | 
					        elif isinstance(self.category_url, list):
 | 
				
			||||||
 | 
					            for url in self.category_url:
 | 
				
			||||||
 | 
					                links += _get_category(url, excludes)
 | 
				
			||||||
 | 
					        links = list(set(links))
 | 
				
			||||||
 | 
					        return [(self.get_id(link), link) for link in links]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# scratchpad so I can quickly develop the parser
 | 
				
			||||||
 | 
					if __name__ == '__main__':
 | 
				
			||||||
 | 
					    print("Category: RadioNZ")
 | 
				
			||||||
 | 
					    site = Category("https://www.rnz.co.nz/news/")
 | 
				
			||||||
 | 
					    excludes = [
 | 
				
			||||||
 | 
					        'rnz.co.nz/news/sport',
 | 
				
			||||||
 | 
					        'rnz.co.nz/weather',
 | 
				
			||||||
 | 
					        'rnz.co.nz/news/weather',
 | 
				
			||||||
 | 
					    ]
 | 
				
			||||||
 | 
					    posts = site.feed(excludes)
 | 
				
			||||||
 | 
					    print(posts[:5])
 | 
				
			||||||
 | 
					    print(site.story(posts[0]))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print("Category: Newsroom")
 | 
				
			||||||
 | 
					    site = Category("https://www.newsroom.co.nz/news/", tz='Pacific/Auckland')
 | 
				
			||||||
 | 
					    posts = site.feed()
 | 
				
			||||||
 | 
					    print(posts[:5])
 | 
				
			||||||
 | 
					    print(site.story(posts[0]))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -7,6 +7,8 @@ import requests
 | 
				
			|||||||
import time
 | 
					import time
 | 
				
			||||||
from bs4 import BeautifulSoup
 | 
					from bs4 import BeautifulSoup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import settings
 | 
				
			||||||
 | 
					
 | 
				
			||||||
USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'
 | 
					USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def api(route):
 | 
					def api(route):
 | 
				
			||||||
@@ -33,7 +35,7 @@ def story(ref):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    s = {}
 | 
					    s = {}
 | 
				
			||||||
    s['author'] = 'manual submission'
 | 
					    s['author'] = 'manual submission'
 | 
				
			||||||
    s['author_link'] = 'https://news.t0.vc'
 | 
					    s['author_link'] = 'https://{}'.format(settings.HOSTNAME)
 | 
				
			||||||
    s['score'] = 0
 | 
					    s['score'] = 0
 | 
				
			||||||
    s['date'] = int(time.time())
 | 
					    s['date'] = int(time.time())
 | 
				
			||||||
    s['title'] = str(soup.title.string) if soup.title else ref
 | 
					    s['title'] = str(soup.title.string) if soup.title else ref
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -73,7 +73,7 @@ def story(ref):
 | 
				
			|||||||
        s['comments'] = list(filter(bool, s['comments']))
 | 
					        s['comments'] = list(filter(bool, s['comments']))
 | 
				
			||||||
        s['num_comments'] = r.num_comments
 | 
					        s['num_comments'] = r.num_comments
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if s['score'] < 25 and s['num_comments'] < 10:
 | 
					        if s['score'] < settings.REDDIT_SCORE_THRESHOLD and s['num_comments'] < settings.REDDIT_COMMENT_THRESHOLD:
 | 
				
			||||||
            return False
 | 
					            return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if r.selftext:
 | 
					        if r.selftext:
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										99
									
								
								apiserver/feeds/sitemap.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										99
									
								
								apiserver/feeds/sitemap.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,99 @@
 | 
				
			|||||||
 | 
					import logging
 | 
				
			||||||
 | 
					logging.basicConfig(
 | 
				
			||||||
 | 
					        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 | 
				
			||||||
 | 
					        level=logging.DEBUG)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == '__main__':
 | 
				
			||||||
 | 
					    import sys
 | 
				
			||||||
 | 
					    sys.path.insert(0,'.')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from datetime import datetime
 | 
				
			||||||
 | 
					from bs4 import BeautifulSoup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import settings
 | 
				
			||||||
 | 
					from utils import clean
 | 
				
			||||||
 | 
					from misc.time import unix
 | 
				
			||||||
 | 
					from misc.api import xml
 | 
				
			||||||
 | 
					from misc.news import Base
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def _get_sitemap_date(a):
 | 
				
			||||||
 | 
					    if a.find('lastmod'):
 | 
				
			||||||
 | 
					        return a.find('lastmod').text
 | 
				
			||||||
 | 
					    if a.find('news:publication_date'):
 | 
				
			||||||
 | 
					        return a.find('news:publication_date').text
 | 
				
			||||||
 | 
					    if a.find('ns2:publication_date'):
 | 
				
			||||||
 | 
					        return a.find('ns2:publication_date').text
 | 
				
			||||||
 | 
					    return ''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def _filter_links(links, excludes=None):
 | 
				
			||||||
 | 
					    too_old = datetime.now().timestamp() - settings.MAX_STORY_AGE
 | 
				
			||||||
 | 
					    links = list(filter(None, [a if _get_sitemap_date(a) else None for a in links]))
 | 
				
			||||||
 | 
					    links = list(filter(None, [a if unix(_get_sitemap_date(a)) > too_old else None for a in links]))
 | 
				
			||||||
 | 
					    links.sort(key=lambda a: unix(_get_sitemap_date(a)), reverse=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    links = [x.find('loc').text for x in links] or []
 | 
				
			||||||
 | 
					    links = list(set(links))
 | 
				
			||||||
 | 
					    if excludes:
 | 
				
			||||||
 | 
					        links = list(filter(None, [None if any(e in link for e in excludes) else link for link in links]))
 | 
				
			||||||
 | 
					    return links
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def _get_sitemap(feed_url, excludes=None):
 | 
				
			||||||
 | 
					    markup = xml(lambda x: feed_url)
 | 
				
			||||||
 | 
					    if not markup: return []
 | 
				
			||||||
 | 
					    soup = BeautifulSoup(markup, features='lxml')
 | 
				
			||||||
 | 
					    links = []
 | 
				
			||||||
 | 
					    feed_urls = []
 | 
				
			||||||
 | 
					    if soup.find('sitemapindex'):
 | 
				
			||||||
 | 
					        sitemap = soup.find('sitemapindex').findAll('sitemap')
 | 
				
			||||||
 | 
					        feed_urls = list(filter(None, [a if a.find('loc') else None for a in sitemap]))
 | 
				
			||||||
 | 
					    if soup.find('urlset'):
 | 
				
			||||||
 | 
					        sitemap = soup.find('urlset').findAll('url')
 | 
				
			||||||
 | 
					        links = list(filter(None, [a if a.find('loc') else None for a in sitemap]))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    feed_urls = _filter_links(feed_urls, excludes)
 | 
				
			||||||
 | 
					    links = _filter_links(links, excludes)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for url in feed_urls:
 | 
				
			||||||
 | 
					        links += _get_sitemap(url, excludes)
 | 
				
			||||||
 | 
					    return list(set(links))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Sitemap(Base):
 | 
				
			||||||
 | 
					    def __init__(self, config):
 | 
				
			||||||
 | 
					        self.config = config
 | 
				
			||||||
 | 
					        self.sitemap_url = config.get('url')
 | 
				
			||||||
 | 
					        self.tz = config.get('tz')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def feed(self, excludes=None):
 | 
				
			||||||
 | 
					        links = []
 | 
				
			||||||
 | 
					        if isinstance(self.sitemap_url, str):
 | 
				
			||||||
 | 
					            links += _get_sitemap(self.sitemap_url, excludes)
 | 
				
			||||||
 | 
					        elif isinstance(self.sitemap_url, list):
 | 
				
			||||||
 | 
					            for url in self.sitemap_url:
 | 
				
			||||||
 | 
					                links += _get_sitemap(url, excludes)
 | 
				
			||||||
 | 
					        links = list(set(links))
 | 
				
			||||||
 | 
					        return [(self.get_id(link), link) for link in links]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# scratchpad so I can quickly develop the parser
 | 
				
			||||||
 | 
					if __name__ == '__main__':
 | 
				
			||||||
 | 
					    print("Sitemap: The Spinoff")
 | 
				
			||||||
 | 
					    site = Sitemap("https://thespinoff.co.nz/sitemap.xml")
 | 
				
			||||||
 | 
					    excludes = [
 | 
				
			||||||
 | 
					        'thespinoff.co.nz/sitemap-misc.xml',
 | 
				
			||||||
 | 
					        'thespinoff.co.nz/sitemap-authors.xml',
 | 
				
			||||||
 | 
					        'thespinoff.co.nz/sitemap-tax-category.xml',
 | 
				
			||||||
 | 
					    ]
 | 
				
			||||||
 | 
					    posts = site.feed(excludes)
 | 
				
			||||||
 | 
					    print(posts[:5])
 | 
				
			||||||
 | 
					    print(site.story(posts[0]))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print("Sitemap: Newshub")
 | 
				
			||||||
 | 
					    site = Sitemap([
 | 
				
			||||||
 | 
					        'https://www.newshub.co.nz/home/politics.gnewssitemap.xml',
 | 
				
			||||||
 | 
					        'https://www.newshub.co.nz/home/new-zealand.gnewssitemap.xml',
 | 
				
			||||||
 | 
					        'https://www.newshub.co.nz/home/world.gnewssitemap.xml',
 | 
				
			||||||
 | 
					        'https://www.newshub.co.nz/home/money.gnewssitemap.xml',
 | 
				
			||||||
 | 
					    ])
 | 
				
			||||||
 | 
					    posts = site.feed()
 | 
				
			||||||
 | 
					    print(posts[:5])
 | 
				
			||||||
 | 
					    print(site.story(posts[0]))
 | 
				
			||||||
 | 
					    print(site.story(posts[:-1]))
 | 
				
			||||||
							
								
								
									
										165
									
								
								apiserver/feeds/substack.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										165
									
								
								apiserver/feeds/substack.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,165 @@
 | 
				
			|||||||
 | 
					import logging
 | 
				
			||||||
 | 
					logging.basicConfig(
 | 
				
			||||||
 | 
					        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 | 
				
			||||||
 | 
					        level=logging.DEBUG)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == '__main__':
 | 
				
			||||||
 | 
					    import sys
 | 
				
			||||||
 | 
					    sys.path.insert(0,'.')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import requests
 | 
				
			||||||
 | 
					from datetime import datetime
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from utils import clean
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SUBSTACK_REFERER = 'https://substack.com'
 | 
				
			||||||
 | 
					SUBSTACK_API_TOP_POSTS = lambda x: "https://substack.com/api/v1/reader/top-posts"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def author_link(author_id, base_url):
 | 
				
			||||||
 | 
					    return f"{base_url}/people/{author_id}"
 | 
				
			||||||
 | 
					def api_comments(post_id, base_url):
 | 
				
			||||||
 | 
					    return f"{base_url}/api/v1/post/{post_id}/comments?all_comments=true&sort=best_first"
 | 
				
			||||||
 | 
					def api_stories(x, base_url): 
 | 
				
			||||||
 | 
					    return f"{base_url}/api/v1/archive?sort=new&search=&offset=0&limit=100"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def unix(date_str):
 | 
				
			||||||
 | 
					    return int(datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S.%fZ').timestamp())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def api(route, ref=None, referer=None):
 | 
				
			||||||
 | 
					    headers = {'Referer': referer} if referer else None
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        r = requests.get(route(ref), headers=headers, timeout=10)
 | 
				
			||||||
 | 
					        if r.status_code != 200:
 | 
				
			||||||
 | 
					            raise Exception('Bad response code ' + str(r.status_code))
 | 
				
			||||||
 | 
					        return r.json()
 | 
				
			||||||
 | 
					    except KeyboardInterrupt:
 | 
				
			||||||
 | 
					        raise
 | 
				
			||||||
 | 
					    except BaseException as e:
 | 
				
			||||||
 | 
					        logging.error('Problem hitting Substack API: {}, trying again'.format(str(e)))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        r = requests.get(route(ref), headers=headers, timeout=20)
 | 
				
			||||||
 | 
					        if r.status_code != 200:
 | 
				
			||||||
 | 
					            raise Exception('Bad response code ' + str(r.status_code))
 | 
				
			||||||
 | 
					        return r.json()
 | 
				
			||||||
 | 
					    except KeyboardInterrupt:
 | 
				
			||||||
 | 
					        raise
 | 
				
			||||||
 | 
					    except BaseException as e:
 | 
				
			||||||
 | 
					        logging.error('Problem hitting Substack API: {}'.format(str(e)))
 | 
				
			||||||
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def comment(i):
 | 
				
			||||||
 | 
					    if 'body' not in i:
 | 
				
			||||||
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    c = {}
 | 
				
			||||||
 | 
					    c['date'] = unix(i.get('date'))
 | 
				
			||||||
 | 
					    c['author'] = i.get('name', '')
 | 
				
			||||||
 | 
					    c['score'] = i.get('reactions').get('❤')
 | 
				
			||||||
 | 
					    c['text'] = clean(i.get('body', '') or '')
 | 
				
			||||||
 | 
					    c['comments'] = [comment(j) for j in i['children']]
 | 
				
			||||||
 | 
					    c['comments'] = list(filter(bool, c['comments']))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return c
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Publication:
 | 
				
			||||||
 | 
					    def __init__(self, domain):
 | 
				
			||||||
 | 
					        self.BASE_DOMAIN = domain
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def feed(self):
 | 
				
			||||||
 | 
					        stories = api(lambda x: api_stories(x, self.BASE_DOMAIN), referer=self.BASE_DOMAIN)
 | 
				
			||||||
 | 
					        if not stories: return []
 | 
				
			||||||
 | 
					        stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
 | 
				
			||||||
 | 
					        return [str(i.get("id")) for i in stories or []]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def story(self, ref):
 | 
				
			||||||
 | 
					        stories = api(lambda x: api_stories(x, self.BASE_DOMAIN), referer=self.BASE_DOMAIN)
 | 
				
			||||||
 | 
					        if not stories: return False
 | 
				
			||||||
 | 
					        stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
 | 
				
			||||||
 | 
					        stories = list(filter(None, [i if str(i.get('id')) == ref else None for i in stories]))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if len(stories) == 0:
 | 
				
			||||||
 | 
					            return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        r = stories[0]
 | 
				
			||||||
 | 
					        if not r:
 | 
				
			||||||
 | 
					            return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        s = {}
 | 
				
			||||||
 | 
					        s['author'] = ''
 | 
				
			||||||
 | 
					        s['author_link'] = ''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        s['date'] = unix(r.get('post_date'))
 | 
				
			||||||
 | 
					        s['score'] = r.get('reactions').get('❤')
 | 
				
			||||||
 | 
					        s['title'] = r.get('title', '')
 | 
				
			||||||
 | 
					        s['link'] = r.get('canonical_url', '')
 | 
				
			||||||
 | 
					        s['url'] = r.get('canonical_url', '')
 | 
				
			||||||
 | 
					        comments = api(lambda x: api_comments(x, self.BASE_DOMAIN), r.get('id'), referer=self.BASE_DOMAIN)
 | 
				
			||||||
 | 
					        s['comments'] = [comment(i) for i in comments.get('comments')]
 | 
				
			||||||
 | 
					        s['comments'] = list(filter(bool, s['comments']))
 | 
				
			||||||
 | 
					        s['num_comments'] = r.get('comment_count', 0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        authors = list(filter(None, [self._bylines(byline) for byline in r.get('publishedBylines')]))
 | 
				
			||||||
 | 
					        if len(authors):
 | 
				
			||||||
 | 
					            s['author'] = authors[0].get('name')
 | 
				
			||||||
 | 
					            s['author_link'] = authors[0].get('link')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _bylines(self, b):
 | 
				
			||||||
 | 
					        if 'id' not in b:
 | 
				
			||||||
 | 
					            return None
 | 
				
			||||||
 | 
					        a = {}
 | 
				
			||||||
 | 
					        a['name'] = b.get('name')
 | 
				
			||||||
 | 
					        a['link'] = author_link(b.get('id'), self.BASE_DOMAIN)
 | 
				
			||||||
 | 
					        return a
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Top:
 | 
				
			||||||
 | 
					    def feed(self):
 | 
				
			||||||
 | 
					        stories = api(SUBSTACK_API_TOP_POSTS, referer=SUBSTACK_REFERER)
 | 
				
			||||||
 | 
					        if not stories: return []
 | 
				
			||||||
 | 
					        stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
 | 
				
			||||||
 | 
					        return [str(i.get("id")) for i in stories or []]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def story(self, ref):
 | 
				
			||||||
 | 
					        stories = api(SUBSTACK_API_TOP_POSTS, referer=SUBSTACK_REFERER)
 | 
				
			||||||
 | 
					        if not stories: return False
 | 
				
			||||||
 | 
					        stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
 | 
				
			||||||
 | 
					        stories = list(filter(None, [i if str(i.get('id')) == ref else None for i in stories]))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if len(stories) == 0:
 | 
				
			||||||
 | 
					            return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        r = stories[0]
 | 
				
			||||||
 | 
					        if not r:
 | 
				
			||||||
 | 
					            return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        s = {}
 | 
				
			||||||
 | 
					        pub = r.get('pub')
 | 
				
			||||||
 | 
					        base_url = pub.get('base_url')
 | 
				
			||||||
 | 
					        s['author'] = pub.get('author_name')
 | 
				
			||||||
 | 
					        s['author_link'] = author_link(pub.get('author_id'), base_url)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        s['date'] = unix(r.get('post_date'))
 | 
				
			||||||
 | 
					        s['score'] = r.get('score')
 | 
				
			||||||
 | 
					        s['title'] = r.get('title', '')
 | 
				
			||||||
 | 
					        s['link'] = r.get('canonical_url', '')
 | 
				
			||||||
 | 
					        s['url'] = r.get('canonical_url', '')
 | 
				
			||||||
 | 
					        comments = api(lambda x: api_comments(x, base_url), r.get('id'), referer=SUBSTACK_REFERER)
 | 
				
			||||||
 | 
					        s['comments'] = [comment(i) for i in comments.get('comments')]
 | 
				
			||||||
 | 
					        s['comments'] = list(filter(bool, s['comments']))
 | 
				
			||||||
 | 
					        s['num_comments'] = r.get('comment_count', 0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					top = Top()        
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# scratchpad so I can quickly develop the parser
 | 
				
			||||||
 | 
					if __name__ == '__main__':
 | 
				
			||||||
 | 
					    top_posts = top.feed()
 | 
				
			||||||
 | 
					    print(top.story(top_posts[0]))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    webworm = Publication("https://www.webworm.co/")
 | 
				
			||||||
 | 
					    posts = webworm.feed()
 | 
				
			||||||
 | 
					    print(webworm.story(posts[0]))
 | 
				
			||||||
							
								
								
									
										35
									
								
								apiserver/misc/api.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								apiserver/misc/api.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,35 @@
 | 
				
			|||||||
 | 
					import logging
 | 
				
			||||||
 | 
					logging.basicConfig(
 | 
				
			||||||
 | 
					        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 | 
				
			||||||
 | 
					        level=logging.DEBUG)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import requests
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					USER_AGENT = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
 | 
				
			||||||
 | 
					FORWARD_IP = '66.249.66.1'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def xml(route, ref=None):
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        headers = {'User-Agent': USER_AGENT, 'X-Forwarded-For': FORWARD_IP}
 | 
				
			||||||
 | 
					        r = requests.get(route(ref), headers=headers, timeout=5)
 | 
				
			||||||
 | 
					        if r.status_code != 200:
 | 
				
			||||||
 | 
					            raise Exception('Bad response code ' + str(r.status_code))
 | 
				
			||||||
 | 
					        return r.text
 | 
				
			||||||
 | 
					    except KeyboardInterrupt:
 | 
				
			||||||
 | 
					        raise
 | 
				
			||||||
 | 
					    except BaseException as e:
 | 
				
			||||||
 | 
					        logging.error('Problem hitting URL: {}'.format(str(e)))
 | 
				
			||||||
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def json(route, ref=None):
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        headers = {'User-Agent': USER_AGENT, 'X-Forwarded-For': FORWARD_IP}
 | 
				
			||||||
 | 
					        r = requests.get(route(ref), headers=headers, timeout=5)
 | 
				
			||||||
 | 
					        if r.status_code != 200:
 | 
				
			||||||
 | 
					            raise Exception('Bad response code ' + str(r.status_code))
 | 
				
			||||||
 | 
					        return r.json()
 | 
				
			||||||
 | 
					    except KeyboardInterrupt:
 | 
				
			||||||
 | 
					        raise
 | 
				
			||||||
 | 
					    except BaseException as e:
 | 
				
			||||||
 | 
					        logging.error('Problem hitting URL: {}'.format(str(e)))
 | 
				
			||||||
 | 
					        return False
 | 
				
			||||||
							
								
								
									
										69
									
								
								apiserver/misc/metadata.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										69
									
								
								apiserver/misc/metadata.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,69 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					def parse_extruct(s, data):
 | 
				
			||||||
 | 
					    rdfa_keys = {
 | 
				
			||||||
 | 
					        'title': [
 | 
				
			||||||
 | 
					            'http://ogp.me/ns#title',
 | 
				
			||||||
 | 
					            'https://ogp.me/ns#title',
 | 
				
			||||||
 | 
					        ],
 | 
				
			||||||
 | 
					        'date': [
 | 
				
			||||||
 | 
					            'http://ogp.me/ns/article#modified_time',
 | 
				
			||||||
 | 
					            'https://ogp.me/ns/article#modified_time',
 | 
				
			||||||
 | 
					            'http://ogp.me/ns/article#published_time',
 | 
				
			||||||
 | 
					            'https://ogp.me/ns/article#published_time',
 | 
				
			||||||
 | 
					        ]
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    for rdfa in data['rdfa']:
 | 
				
			||||||
 | 
					        for key, props in rdfa.items():
 | 
				
			||||||
 | 
					            for attribute, properties in rdfa_keys.items():
 | 
				
			||||||
 | 
					                for prop in properties:
 | 
				
			||||||
 | 
					                    if prop in props:
 | 
				
			||||||
 | 
					                        for values in props[prop]:
 | 
				
			||||||
 | 
					                            s[attribute] = values['@value']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for og in data['opengraph']:
 | 
				
			||||||
 | 
					        titles = list(filter(None, [value if 'og:title' in key else None for key, value in og['properties']]))
 | 
				
			||||||
 | 
					        modified = list(filter(None, [value if 'article:modified_time' in key else None for key, value in og['properties']]))
 | 
				
			||||||
 | 
					        published = list(filter(None, [value if 'article:published_time' in key else None for key, value in og['properties']]))
 | 
				
			||||||
 | 
					        if len(modified):
 | 
				
			||||||
 | 
					            s['date'] = modified[0]
 | 
				
			||||||
 | 
					        if len(published):
 | 
				
			||||||
 | 
					            s['date'] = published[0]
 | 
				
			||||||
 | 
					        if len(titles):
 | 
				
			||||||
 | 
					            s['title'] = titles[0]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for md in data['microdata']:
 | 
				
			||||||
 | 
					        if md['type'] in ['https://schema.org/NewsArticle', 'http://schema.org/NewsArticle']:
 | 
				
			||||||
 | 
					            props = md['properties']
 | 
				
			||||||
 | 
					            s['title'] = props['headline']
 | 
				
			||||||
 | 
					            if props['dateModified']:
 | 
				
			||||||
 | 
					                s['date'] = props['dateModified']
 | 
				
			||||||
 | 
					            if props['datePublished']:
 | 
				
			||||||
 | 
					                s['date'] = props['datePublished']
 | 
				
			||||||
 | 
					            if 'author' in props and props['author']:
 | 
				
			||||||
 | 
					                if 'properties' in props['author']:
 | 
				
			||||||
 | 
					                    s['author'] = props['author']['properties']['name']
 | 
				
			||||||
 | 
					                elif isinstance(props['author'], list):
 | 
				
			||||||
 | 
					                    s['author'] = props['author'][0]['properties']['name']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for ld in data['json-ld']:
 | 
				
			||||||
 | 
					        if '@type' in ld and ld['@type'] in ['Article', 'NewsArticle']:
 | 
				
			||||||
 | 
					            s['title'] = ld['headline']
 | 
				
			||||||
 | 
					            if ld['dateModified']:
 | 
				
			||||||
 | 
					                s['date'] = ld['dateModified']
 | 
				
			||||||
 | 
					            if ld['datePublished']:
 | 
				
			||||||
 | 
					                s['date'] = ld['datePublished']
 | 
				
			||||||
 | 
					            if 'author' in ld and ld['author']:
 | 
				
			||||||
 | 
					                if 'name' in ld['author']:
 | 
				
			||||||
 | 
					                    s['author'] = ld['author']['name']
 | 
				
			||||||
 | 
					                elif isinstance(ld['author'], list):
 | 
				
			||||||
 | 
					                    s['author'] = ld['author'][0]['name']
 | 
				
			||||||
 | 
					        if '@graph' in ld:
 | 
				
			||||||
 | 
					            for gld in ld['@graph']:
 | 
				
			||||||
 | 
					                if '@type' in gld and gld['@type'] in ['Article', 'NewsArticle']:
 | 
				
			||||||
 | 
					                    s['title'] = gld['headline']
 | 
				
			||||||
 | 
					                    if gld['dateModified']:
 | 
				
			||||||
 | 
					                        s['date'] = gld['dateModified']
 | 
				
			||||||
 | 
					                    if gld['datePublished']:
 | 
				
			||||||
 | 
					                        s['date'] = gld['datePublished']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return s
 | 
				
			||||||
							
								
								
									
										101
									
								
								apiserver/misc/news.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										101
									
								
								apiserver/misc/news.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,101 @@
 | 
				
			|||||||
 | 
					import logging
 | 
				
			||||||
 | 
					logging.basicConfig(
 | 
				
			||||||
 | 
					        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 | 
				
			||||||
 | 
					        level=logging.DEBUG)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
 | 
					import requests
 | 
				
			||||||
 | 
					from bs4 import BeautifulSoup
 | 
				
			||||||
 | 
					from scrapers import declutter
 | 
				
			||||||
 | 
					import extruct
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import settings
 | 
				
			||||||
 | 
					from utils import clean
 | 
				
			||||||
 | 
					from misc.metadata import parse_extruct
 | 
				
			||||||
 | 
					from misc.time import unix
 | 
				
			||||||
 | 
					from misc.api import xml
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def comment(i):
 | 
				
			||||||
 | 
					    if 'author' not in i:
 | 
				
			||||||
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    c = {}
 | 
				
			||||||
 | 
					    c['author'] = i.get('author', '')
 | 
				
			||||||
 | 
					    c['score'] = i.get('points', 0)
 | 
				
			||||||
 | 
					    c['date'] = unix(i.get('date', 0))
 | 
				
			||||||
 | 
					    c['text'] = clean(i.get('text', '') or '')
 | 
				
			||||||
 | 
					    c['comments'] = [comment(j) for j in i['children']]
 | 
				
			||||||
 | 
					    c['comments'] = list(filter(bool, c['comments']))
 | 
				
			||||||
 | 
					    return c
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def comment_count(i):
 | 
				
			||||||
 | 
					    alive = 1 if i['author'] else 0
 | 
				
			||||||
 | 
					    return sum([comment_count(c) for c in i['comments']]) + alive
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Base:
 | 
				
			||||||
 | 
					    def __init__(config):
 | 
				
			||||||
 | 
					        self.config = config
 | 
				
			||||||
 | 
					        self.url = config.get('url')
 | 
				
			||||||
 | 
					        self.tz = config.get('tz')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def get_id(self, link):
 | 
				
			||||||
 | 
					        patterns = self.config.get('patterns')
 | 
				
			||||||
 | 
					        if not patterns:
 | 
				
			||||||
 | 
					            return link
 | 
				
			||||||
 | 
					        patterns = [re.compile(p) for p in patterns]
 | 
				
			||||||
 | 
					        patterns = list(filter(None, [p.match(link) for p in patterns]))
 | 
				
			||||||
 | 
					        patterns = list(set([':'.join(p.groups()) for p in patterns]))
 | 
				
			||||||
 | 
					        if not patterns:
 | 
				
			||||||
 | 
					            return link
 | 
				
			||||||
 | 
					        return patterns[0]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def feed(self, excludes=None):
 | 
				
			||||||
 | 
					        return []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def story(self, ref, urlref):
 | 
				
			||||||
 | 
					        if urlref is None:
 | 
				
			||||||
 | 
					            return False
 | 
				
			||||||
 | 
					        markup = xml(lambda x: urlref)
 | 
				
			||||||
 | 
					        if not markup:
 | 
				
			||||||
 | 
					            return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        s = {}
 | 
				
			||||||
 | 
					        s['author_link'] = ''
 | 
				
			||||||
 | 
					        s['score'] = 0
 | 
				
			||||||
 | 
					        s['comments'] = []
 | 
				
			||||||
 | 
					        s['num_comments'] = 0
 | 
				
			||||||
 | 
					        s['link'] = urlref
 | 
				
			||||||
 | 
					        s['url'] = urlref
 | 
				
			||||||
 | 
					        s['date'] = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        soup = BeautifulSoup(markup, features='html.parser')
 | 
				
			||||||
 | 
					        icon32 = soup.find_all('link', rel="icon", href=True, sizes="32x32")
 | 
				
			||||||
 | 
					        icon16 = soup.find_all('link', rel="icon", href=True, sizes="16x16")
 | 
				
			||||||
 | 
					        favicon = soup.find_all('link', rel="shortcut icon", href=True)
 | 
				
			||||||
 | 
					        others = soup.find_all('link', rel="icon", href=True)
 | 
				
			||||||
 | 
					        icons = icon32 + icon16 + favicon + others
 | 
				
			||||||
 | 
					        base_url = '/'.join(urlref.split('/')[:3])
 | 
				
			||||||
 | 
					        icons = list(set([i.get('href') for i in icons]))
 | 
				
			||||||
 | 
					        icons = [i if i.startswith('http') else base_url + i for i in icons]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if icons:
 | 
				
			||||||
 | 
					            s['icon'] = icons[0]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        data = extruct.extract(markup)
 | 
				
			||||||
 | 
					        s = parse_extruct(s, data)
 | 
				
			||||||
 | 
					        if s['date']:
 | 
				
			||||||
 | 
					            s['date'] = unix(s['date'], tz=self.tz)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if 'disqus' in markup:
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
 | 
					                s['comments'] = declutter.get_comments(urlref)
 | 
				
			||||||
 | 
					                c['comments'] = list(filter(bool, c['comments']))
 | 
				
			||||||
 | 
					                s['num_comments'] = comment_count(s['comments'])
 | 
				
			||||||
 | 
					            except KeyboardInterrupt:
 | 
				
			||||||
 | 
					                raise
 | 
				
			||||||
 | 
					            except:
 | 
				
			||||||
 | 
					                pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if not s['date']:
 | 
				
			||||||
 | 
					            return False
 | 
				
			||||||
 | 
					        return s
 | 
				
			||||||
							
								
								
									
										18
									
								
								apiserver/misc/time.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								apiserver/misc/time.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,18 @@
 | 
				
			|||||||
 | 
					import pytz
 | 
				
			||||||
 | 
					import dateutil.parser
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TZINFOS = {
 | 
				
			||||||
 | 
					    'NZDT': pytz.timezone('Pacific/Auckland'),
 | 
				
			||||||
 | 
					    'NZST': pytz.timezone('Pacific/Auckland')
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def unix(date_str, tz=None, tzinfos=TZINFOS):
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        dt = dateutil.parser.parse(date_str, tzinfos=tzinfos)
 | 
				
			||||||
 | 
					        if tz:
 | 
				
			||||||
 | 
					            dt = pytz.timezone(tz).localize(dt)
 | 
				
			||||||
 | 
					        return int(dt.timestamp())
 | 
				
			||||||
 | 
					    except:
 | 
				
			||||||
 | 
					        pass
 | 
				
			||||||
 | 
					    return 0
 | 
				
			||||||
@@ -4,6 +4,7 @@ certifi==2020.6.20
 | 
				
			|||||||
chardet==3.0.4
 | 
					chardet==3.0.4
 | 
				
			||||||
click==7.1.2
 | 
					click==7.1.2
 | 
				
			||||||
commonmark==0.9.1
 | 
					commonmark==0.9.1
 | 
				
			||||||
 | 
					extruct==0.10.0
 | 
				
			||||||
Flask==1.1.2
 | 
					Flask==1.1.2
 | 
				
			||||||
Flask-Cors==3.0.8
 | 
					Flask-Cors==3.0.8
 | 
				
			||||||
gevent==20.6.2
 | 
					gevent==20.6.2
 | 
				
			||||||
@@ -11,11 +12,13 @@ greenlet==0.4.16
 | 
				
			|||||||
idna==2.10
 | 
					idna==2.10
 | 
				
			||||||
itsdangerous==1.1.0
 | 
					itsdangerous==1.1.0
 | 
				
			||||||
Jinja2==2.11.2
 | 
					Jinja2==2.11.2
 | 
				
			||||||
 | 
					lxml==4.6.1
 | 
				
			||||||
MarkupSafe==1.1.1
 | 
					MarkupSafe==1.1.1
 | 
				
			||||||
packaging==20.4
 | 
					packaging==20.4
 | 
				
			||||||
praw==6.4.0
 | 
					praw==6.4.0
 | 
				
			||||||
prawcore==1.4.0
 | 
					prawcore==1.4.0
 | 
				
			||||||
pyparsing==2.4.7
 | 
					pyparsing==2.4.7
 | 
				
			||||||
 | 
					pytz==2020.4
 | 
				
			||||||
requests==2.24.0
 | 
					requests==2.24.0
 | 
				
			||||||
six==1.15.0
 | 
					six==1.15.0
 | 
				
			||||||
soupsieve==2.0.1
 | 
					soupsieve==2.0.1
 | 
				
			||||||
@@ -27,3 +30,4 @@ websocket-client==0.57.0
 | 
				
			|||||||
Werkzeug==1.0.1
 | 
					Werkzeug==1.0.1
 | 
				
			||||||
zope.event==4.4
 | 
					zope.event==4.4
 | 
				
			||||||
zope.interface==5.1.0
 | 
					zope.interface==5.1.0
 | 
				
			||||||
 | 
					python-dateutil==2.8.1
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										41
									
								
								apiserver/scrapers/browser.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								apiserver/scrapers/browser.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,41 @@
 | 
				
			|||||||
 | 
					import logging
 | 
				
			||||||
 | 
					logging.basicConfig(
 | 
				
			||||||
 | 
					        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 | 
				
			||||||
 | 
					        level=logging.DEBUG)
 | 
				
			||||||
 | 
					import requests
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					READ_API = 'http://127.0.0.1:33843/browser/details'
 | 
				
			||||||
 | 
					READ_COMMENT__API = 'http://127.0.0.1:33843/browser/commentd'
 | 
				
			||||||
 | 
					TIMEOUT = 60
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_html(url):
 | 
				
			||||||
 | 
					    logging.info(f"Reader Scraper: {url}")
 | 
				
			||||||
 | 
					    details = get_details(url)
 | 
				
			||||||
 | 
					    if not details:
 | 
				
			||||||
 | 
					        return ''
 | 
				
			||||||
 | 
					    return details['content']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_details(url):
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        r = requests.post(READ_API, data=dict(url=url), timeout=TIMEOUT)
 | 
				
			||||||
 | 
					        if r.status_code != 200:
 | 
				
			||||||
 | 
					            raise Exception('Bad response code ' + str(r.status_code))
 | 
				
			||||||
 | 
					        return r.json()
 | 
				
			||||||
 | 
					    except KeyboardInterrupt:
 | 
				
			||||||
 | 
					        raise
 | 
				
			||||||
 | 
					    except BaseException as e:
 | 
				
			||||||
 | 
					        logging.error('Problem Scraping article: {}'.format(str(e)))
 | 
				
			||||||
 | 
					        return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_comments(url):
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        r = requests.post(READ_COMMENT_API, data=dict(url=url), timeout=TIMEOUT)
 | 
				
			||||||
 | 
					        if r.status_code != 200:
 | 
				
			||||||
 | 
					            raise Exception('Bad response code ' + str(r.status_code))
 | 
				
			||||||
 | 
					        return r.json()
 | 
				
			||||||
 | 
					    except KeyboardInterrupt:
 | 
				
			||||||
 | 
					        raise
 | 
				
			||||||
 | 
					    except BaseException as e:
 | 
				
			||||||
 | 
					        logging.error('Problem getting comments for article: {}'.format(str(e)))
 | 
				
			||||||
 | 
					        return None
 | 
				
			||||||
							
								
								
									
										41
									
								
								apiserver/scrapers/declutter.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								apiserver/scrapers/declutter.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,41 @@
 | 
				
			|||||||
 | 
					import logging
 | 
				
			||||||
 | 
					logging.basicConfig(
 | 
				
			||||||
 | 
					        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 | 
				
			||||||
 | 
					        level=logging.DEBUG)
 | 
				
			||||||
 | 
					import requests
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					DECLUTTER_API = 'https://declutter.1j.nz/details'
 | 
				
			||||||
 | 
					DECLUTTER_COMMENT_API = 'https://declutter.1j.nz/comments'
 | 
				
			||||||
 | 
					TIMEOUT = 30
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_html(url):
 | 
				
			||||||
 | 
					    logging.info(f"Declutter Scraper: {url}")
 | 
				
			||||||
 | 
					    details = get_details(url)
 | 
				
			||||||
 | 
					    if not details:
 | 
				
			||||||
 | 
					        return ''
 | 
				
			||||||
 | 
					    return details['content']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_details(url):
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        r = requests.post(DECLUTTER_API, data=dict(url=url), timeout=TIMEOUT)
 | 
				
			||||||
 | 
					        if r.status_code != 200:
 | 
				
			||||||
 | 
					            raise Exception('Bad response code ' + str(r.status_code))
 | 
				
			||||||
 | 
					        return r.json()
 | 
				
			||||||
 | 
					    except KeyboardInterrupt:
 | 
				
			||||||
 | 
					        raise
 | 
				
			||||||
 | 
					    except BaseException as e:
 | 
				
			||||||
 | 
					        logging.error('Problem decluttering article: {}'.format(str(e)))
 | 
				
			||||||
 | 
					        return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_comments(url):
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        r = requests.post(DECLUTTER_COMMENT_API, data=dict(url=url), timeout=TIMEOUT)
 | 
				
			||||||
 | 
					        if r.status_code != 200:
 | 
				
			||||||
 | 
					            raise Exception('Bad response code ' + str(r.status_code))
 | 
				
			||||||
 | 
					        return r.json()
 | 
				
			||||||
 | 
					    except KeyboardInterrupt:
 | 
				
			||||||
 | 
					        raise
 | 
				
			||||||
 | 
					    except BaseException as e:
 | 
				
			||||||
 | 
					        logging.error('Problem getting comments for article: {}'.format(str(e)))
 | 
				
			||||||
 | 
					        return None
 | 
				
			||||||
							
								
								
									
										27
									
								
								apiserver/scrapers/local.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								apiserver/scrapers/local.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,27 @@
 | 
				
			|||||||
 | 
					import logging
 | 
				
			||||||
 | 
					logging.basicConfig(
 | 
				
			||||||
 | 
					        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 | 
				
			||||||
 | 
					        level=logging.DEBUG)
 | 
				
			||||||
 | 
					import requests
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					READ_API = 'http://127.0.0.1:33843/details'
 | 
				
			||||||
 | 
					TIMEOUT = 20
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_html(url):
 | 
				
			||||||
 | 
					    logging.info(f"Local Scraper: {url}")
 | 
				
			||||||
 | 
					    details = get_details(url)
 | 
				
			||||||
 | 
					    if not details:
 | 
				
			||||||
 | 
					        return ''
 | 
				
			||||||
 | 
					    return details['content']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_details(url):
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        r = requests.post(READ_API, data=dict(url=url), timeout=TIMEOUT)
 | 
				
			||||||
 | 
					        if r.status_code != 200:
 | 
				
			||||||
 | 
					            raise Exception('Bad response code ' + str(r.status_code))
 | 
				
			||||||
 | 
					        return r.json()
 | 
				
			||||||
 | 
					    except KeyboardInterrupt:
 | 
				
			||||||
 | 
					        raise
 | 
				
			||||||
 | 
					    except BaseException as e:
 | 
				
			||||||
 | 
					        logging.error('Problem getting article: {}'.format(str(e)))
 | 
				
			||||||
 | 
					        return None
 | 
				
			||||||
							
								
								
									
										37
									
								
								apiserver/scrapers/outline.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								apiserver/scrapers/outline.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,37 @@
 | 
				
			|||||||
 | 
					import logging
 | 
				
			||||||
 | 
					logging.basicConfig(
 | 
				
			||||||
 | 
					        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 | 
				
			||||||
 | 
					        level=logging.DEBUG)
 | 
				
			||||||
 | 
					import requests
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					OUTLINE_REFERER = 'https://outline.com/'
 | 
				
			||||||
 | 
					OUTLINE_API = 'https://api.outline.com/v3/parse_article'
 | 
				
			||||||
 | 
					TIMEOUT = 20
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_html(url):
 | 
				
			||||||
 | 
					    details = get_details(url)
 | 
				
			||||||
 | 
					    if not details:
 | 
				
			||||||
 | 
					        return ''
 | 
				
			||||||
 | 
					    return details['html']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_details(url):
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        logging.info(f"Outline Scraper: {url}")
 | 
				
			||||||
 | 
					        params = {'source_url': url}
 | 
				
			||||||
 | 
					        headers = {'Referer': OUTLINE_REFERER}
 | 
				
			||||||
 | 
					        r = requests.get(OUTLINE_API, params=params, headers=headers, timeout=TIMEOUT)
 | 
				
			||||||
 | 
					        if r.status_code == 429:
 | 
				
			||||||
 | 
					            logging.info('Rate limited by outline, sleeping 30s and skipping...')
 | 
				
			||||||
 | 
					            time.sleep(30)
 | 
				
			||||||
 | 
					            return None
 | 
				
			||||||
 | 
					        if r.status_code != 200:
 | 
				
			||||||
 | 
					            raise Exception('Bad response code ' + str(r.status_code))
 | 
				
			||||||
 | 
					        data = r.json()['data']
 | 
				
			||||||
 | 
					        if 'URL is not supported by Outline' in data['html']:
 | 
				
			||||||
 | 
					            raise Exception('URL not supported by Outline')
 | 
				
			||||||
 | 
					        return data
 | 
				
			||||||
 | 
					    except KeyboardInterrupt:
 | 
				
			||||||
 | 
					        raise
 | 
				
			||||||
 | 
					    except BaseException as e:
 | 
				
			||||||
 | 
					        logging.error('Problem outlining article: {}'.format(str(e)))
 | 
				
			||||||
 | 
					        return None
 | 
				
			||||||
@@ -35,14 +35,11 @@ def update_rankings():
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
def update_attributes():
 | 
					def update_attributes():
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
        json = ['title', 'url', 'author', 'link', 'id']
 | 
					        json = ['title', 'url', 'author', 'link', 'id', 'source']
 | 
				
			||||||
        r = requests.post(MEILI_URL + 'indexes/qotnews/settings/searchable-attributes', json=json, timeout=2)
 | 
					        r = requests.post(MEILI_URL + 'indexes/qotnews/settings/searchable-attributes', json=json, timeout=2)
 | 
				
			||||||
        if r.status_code != 202:
 | 
					        if r.status_code != 202:
 | 
				
			||||||
            raise Exception('Bad response code ' + str(r.status_code))
 | 
					            raise Exception('Bad response code ' + str(r.status_code))
 | 
				
			||||||
        return r.json()
 | 
					        requests.delete(MEILI_URL + 'indexes/qotnews/settings/displayed-attributes', timeout=2)
 | 
				
			||||||
        r = requests.delete(MEILI_URL + 'indexes/qotnews/settings/displayed-attributes', timeout=2)
 | 
					 | 
				
			||||||
        if r.status_code != 202:
 | 
					 | 
				
			||||||
            raise Exception('Bad response code ' + str(r.status_code))
 | 
					 | 
				
			||||||
        return r.json()
 | 
					        return r.json()
 | 
				
			||||||
    except KeyboardInterrupt:
 | 
					    except KeyboardInterrupt:
 | 
				
			||||||
        raise
 | 
					        raise
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -15,6 +15,7 @@ import traceback
 | 
				
			|||||||
import time
 | 
					import time
 | 
				
			||||||
from urllib.parse import urlparse, parse_qs
 | 
					from urllib.parse import urlparse, parse_qs
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import settings
 | 
				
			||||||
import database
 | 
					import database
 | 
				
			||||||
import search
 | 
					import search
 | 
				
			||||||
import feed
 | 
					import feed
 | 
				
			||||||
@@ -27,9 +28,6 @@ from flask_cors import CORS
 | 
				
			|||||||
database.init()
 | 
					database.init()
 | 
				
			||||||
search.init()
 | 
					search.init()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
FEED_LENGTH = 75
 | 
					 | 
				
			||||||
news_index = 0
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def new_id():
 | 
					def new_id():
 | 
				
			||||||
    nid = gen_rand_id()
 | 
					    nid = gen_rand_id()
 | 
				
			||||||
    while database.get_story(nid):
 | 
					    while database.get_story(nid):
 | 
				
			||||||
@@ -42,9 +40,8 @@ cors = CORS(flask_app)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
@flask_app.route('/api')
 | 
					@flask_app.route('/api')
 | 
				
			||||||
def api():
 | 
					def api():
 | 
				
			||||||
    stories = database.get_stories(FEED_LENGTH)
 | 
					    stories = database.get_stories(settings.MAX_STORY_AGE)
 | 
				
			||||||
    # hacky nested json
 | 
					    res = Response(json.dumps({"stories": stories}))
 | 
				
			||||||
    res = Response('{"stories":[' + ','.join(stories) + ']}')
 | 
					 | 
				
			||||||
    res.headers['content-type'] = 'application/json'
 | 
					    res.headers['content-type'] = 'application/json'
 | 
				
			||||||
    return res
 | 
					    return res
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -73,7 +70,7 @@ def submit():
 | 
				
			|||||||
        elif 'reddit.com' in parse.hostname and 'comments' in url:
 | 
					        elif 'reddit.com' in parse.hostname and 'comments' in url:
 | 
				
			||||||
            source = 'reddit'
 | 
					            source = 'reddit'
 | 
				
			||||||
            ref = parse.path.split('/')[4]
 | 
					            ref = parse.path.split('/')[4]
 | 
				
			||||||
        elif 'news.t0.vc' in parse.hostname:
 | 
					        elif settings.HOSTNAME in parse.hostname:
 | 
				
			||||||
            raise Exception('Invalid article')
 | 
					            raise Exception('Invalid article')
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            source = 'manual'
 | 
					            source = 'manual'
 | 
				
			||||||
@@ -102,8 +99,9 @@ def submit():
 | 
				
			|||||||
def story(sid):
 | 
					def story(sid):
 | 
				
			||||||
    story = database.get_story(sid)
 | 
					    story = database.get_story(sid)
 | 
				
			||||||
    if story:
 | 
					    if story:
 | 
				
			||||||
        # hacky nested json
 | 
					        related = database.get_stories_by_url(story.meta['url'])
 | 
				
			||||||
        res = Response('{"story":' + story.full_json + '}')
 | 
					        related = [r.meta for r in related]
 | 
				
			||||||
 | 
					        res = Response(json.dumps({"story": story.data, "related": related}))
 | 
				
			||||||
        res.headers['content-type'] = 'application/json'
 | 
					        res.headers['content-type'] = 'application/json'
 | 
				
			||||||
        return res
 | 
					        return res
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
@@ -114,7 +112,7 @@ def story(sid):
 | 
				
			|||||||
def index():
 | 
					def index():
 | 
				
			||||||
    return render_template('index.html',
 | 
					    return render_template('index.html',
 | 
				
			||||||
            title='Feed',
 | 
					            title='Feed',
 | 
				
			||||||
            url='news.t0.vc',
 | 
					            url=settings.HOSTNAME,
 | 
				
			||||||
            description='Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode')
 | 
					            description='Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@flask_app.route('/<sid>', strict_slashes=False)
 | 
					@flask_app.route('/<sid>', strict_slashes=False)
 | 
				
			||||||
@@ -127,7 +125,7 @@ def static_story(sid):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    story = database.get_story(sid)
 | 
					    story = database.get_story(sid)
 | 
				
			||||||
    if not story: return abort(404)
 | 
					    if not story: return abort(404)
 | 
				
			||||||
    story = json.loads(story.full_json)
 | 
					    story = story.data
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    score = story['score']
 | 
					    score = story['score']
 | 
				
			||||||
    num_comments = story['num_comments']
 | 
					    num_comments = story['num_comments']
 | 
				
			||||||
@@ -146,52 +144,49 @@ def static_story(sid):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
http_server = WSGIServer(('', 33842), flask_app)
 | 
					http_server = WSGIServer(('', 33842), flask_app)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def feed_thread():
 | 
					def _add_new_refs():
 | 
				
			||||||
    global news_index
 | 
					    for ref, source, urlref in feed.get_list():
 | 
				
			||||||
 | 
					 | 
				
			||||||
    try:
 | 
					 | 
				
			||||||
        while True:
 | 
					 | 
				
			||||||
            # onboard new stories
 | 
					 | 
				
			||||||
            if news_index == 0:
 | 
					 | 
				
			||||||
                for ref, source in feed.list():
 | 
					 | 
				
			||||||
        if database.get_story_by_ref(ref):
 | 
					        if database.get_story_by_ref(ref):
 | 
				
			||||||
            continue
 | 
					            continue
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
            nid = new_id()
 | 
					            nid = new_id()
 | 
				
			||||||
                        database.put_ref(ref, nid, source)
 | 
					            database.put_ref(ref, nid, source, urlref)
 | 
				
			||||||
            logging.info('Added ref ' + ref)
 | 
					            logging.info('Added ref ' + ref)
 | 
				
			||||||
        except database.IntegrityError:
 | 
					        except database.IntegrityError:
 | 
				
			||||||
            continue
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            ref_list = database.get_reflist(FEED_LENGTH)
 | 
					def _update_current_story(item):
 | 
				
			||||||
 | 
					 | 
				
			||||||
            # update current stories
 | 
					 | 
				
			||||||
            if news_index < len(ref_list):
 | 
					 | 
				
			||||||
                item = ref_list[news_index]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
                    story_json = database.get_story(item['sid']).full_json
 | 
					        story = database.get_story(item['sid']).data
 | 
				
			||||||
                    story = json.loads(story_json)
 | 
					 | 
				
			||||||
    except AttributeError:
 | 
					    except AttributeError:
 | 
				
			||||||
        story = dict(id=item['sid'], ref=item['ref'], source=item['source'])
 | 
					        story = dict(id=item['sid'], ref=item['ref'], source=item['source'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                logging.info('Updating story: ' + str(story['ref']) + ', index: ' + str(news_index))
 | 
					    logging.info('Updating story: {}'.format(str(story['ref'])))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                valid = feed.update_story(story)
 | 
					    valid = feed.update_story(story, urlref=item['urlref'])
 | 
				
			||||||
    if valid:
 | 
					    if valid:
 | 
				
			||||||
        database.put_story(story)
 | 
					        database.put_story(story)
 | 
				
			||||||
        search.put_story(story)
 | 
					        search.put_story(story)
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        database.del_ref(item['ref'])
 | 
					        database.del_ref(item['ref'])
 | 
				
			||||||
        logging.info('Removed ref {}'.format(item['ref']))
 | 
					        logging.info('Removed ref {}'.format(item['ref']))
 | 
				
			||||||
            else:
 | 
					
 | 
				
			||||||
                logging.info('Skipping index: ' + str(news_index))
 | 
					def feed_thread():
 | 
				
			||||||
 | 
					    ref_list = []
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        while True:
 | 
				
			||||||
 | 
					            # onboard new stories
 | 
				
			||||||
 | 
					            if not len(ref_list):
 | 
				
			||||||
 | 
					                _add_new_refs()
 | 
				
			||||||
 | 
					                ref_list = database.get_reflist()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            # update current stories
 | 
				
			||||||
 | 
					            if len(ref_list):
 | 
				
			||||||
 | 
					                item = ref_list.pop(0)
 | 
				
			||||||
 | 
					                _update_current_story(item)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            gevent.sleep(6)
 | 
					            gevent.sleep(6)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            news_index += 1
 | 
					 | 
				
			||||||
            if news_index == FEED_LENGTH: news_index = 0
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    except KeyboardInterrupt:
 | 
					    except KeyboardInterrupt:
 | 
				
			||||||
        logging.info('Ending feed thread...')
 | 
					        logging.info('Ending feed thread...')
 | 
				
			||||||
    except ValueError as e:
 | 
					    except ValueError as e:
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,12 +1,57 @@
 | 
				
			|||||||
# QotNews settings
 | 
					# QotNews settings
 | 
				
			||||||
# edit this file and save it as settings.py
 | 
					# edit this file and save it as settings.py
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					HOSTNAME = 'news.t0.vc'
 | 
				
			||||||
 | 
					MAX_STORY_AGE = 3*24*60*60
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Feed Lengths
 | 
					# Feed Lengths
 | 
				
			||||||
# Number of top items from each site to pull
 | 
					# Number of top items from each site to pull
 | 
				
			||||||
# set to 0 to disable that site
 | 
					# set to 0 to disable that site
 | 
				
			||||||
NUM_HACKERNEWS = 15
 | 
					NUM_HACKERNEWS = 15
 | 
				
			||||||
NUM_REDDIT = 10
 | 
					NUM_REDDIT = 10
 | 
				
			||||||
NUM_TILDES = 5
 | 
					NUM_TILDES = 5
 | 
				
			||||||
 | 
					NUM_SUBSTACK = 10
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SITEMAP = {}
 | 
				
			||||||
 | 
					# SITEMAP['nzherald'] = {
 | 
				
			||||||
 | 
					#     'url': "https://www.nzherald.co.nz/arcio/news-sitemap/",
 | 
				
			||||||
 | 
					#     'count': 20,
 | 
				
			||||||
 | 
					#     'patterns': [
 | 
				
			||||||
 | 
					#         r'^https:\/\/www\.(nzherald\.co\.nz)\/.*\/([^/]+)\/?$',
 | 
				
			||||||
 | 
					#     ],
 | 
				
			||||||
 | 
					#     'excludes': [
 | 
				
			||||||
 | 
					#         'driven.co.nz',
 | 
				
			||||||
 | 
					#         'oneroof.co.nz',
 | 
				
			||||||
 | 
					#         'nzherald.co.nz/sponsored-stories',
 | 
				
			||||||
 | 
					#         'nzherald.co.nz/entertainment/',
 | 
				
			||||||
 | 
					#         'nzherald.co.nz/lifestyle/',
 | 
				
			||||||
 | 
					#         'nzherald.co.nz/travel/',
 | 
				
			||||||
 | 
					#         'nzherald.co.nz/sport/',
 | 
				
			||||||
 | 
					#         'nzherald.co.nz/promotions/',
 | 
				
			||||||
 | 
					#         'nzherald.co.nzhttp',
 | 
				
			||||||
 | 
					#         'herald-afternoon-quiz',
 | 
				
			||||||
 | 
					#         'herald-morning-quiz'
 | 
				
			||||||
 | 
					#     ],
 | 
				
			||||||
 | 
					# }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SUBSTACK = {}
 | 
				
			||||||
 | 
					# SUBSTACK['webworm'] = { 'url': "https://www.webworm.co", 'count': 10},
 | 
				
			||||||
 | 
					# SUBSTACK['the bulletin'] = { 'url': "https://thespinoff.substack.com", 'count': 10},
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					CATEGORY = {}
 | 
				
			||||||
 | 
					# CATEGORY['radionz'] = {
 | 
				
			||||||
 | 
					#     'url': "https://www.rnz.co.nz/news/",
 | 
				
			||||||
 | 
					#     'count': 20,
 | 
				
			||||||
 | 
					#     'patterns': [
 | 
				
			||||||
 | 
					#         r'https:\/\/www\.(rnz\.co\.nz)\/news\/[^\/]+\/(\d+)\/[^\/]+\/?'
 | 
				
			||||||
 | 
					#     ],
 | 
				
			||||||
 | 
					#     'excludes': [
 | 
				
			||||||
 | 
					#         'rnz.co.nz/news/sport',
 | 
				
			||||||
 | 
					#         'rnz.co.nz/weather',
 | 
				
			||||||
 | 
					#     ],
 | 
				
			||||||
 | 
					# }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SCRAPERS = ['browser', 'declutter', 'outline', 'local']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Reddit account info
 | 
					# Reddit account info
 | 
				
			||||||
# leave blank if not using Reddit
 | 
					# leave blank if not using Reddit
 | 
				
			||||||
@@ -14,6 +59,10 @@ REDDIT_CLIENT_ID = ''
 | 
				
			|||||||
REDDIT_CLIENT_SECRET = ''
 | 
					REDDIT_CLIENT_SECRET = ''
 | 
				
			||||||
REDDIT_USER_AGENT = ''
 | 
					REDDIT_USER_AGENT = ''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Minimum points or number of comments before including a thread:
 | 
				
			||||||
 | 
					REDDIT_COMMENT_THRESHOLD = 10
 | 
				
			||||||
 | 
					REDDIT_SCORE_THRESHOLD = 25
 | 
				
			||||||
 | 
					
 | 
				
			||||||
SUBREDDITS = [
 | 
					SUBREDDITS = [
 | 
				
			||||||
    'Economics',
 | 
					    'Economics',
 | 
				
			||||||
    'AcademicPhilosophy',
 | 
					    'AcademicPhilosophy',
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,52 +1,29 @@
 | 
				
			|||||||
 | 
					const port = 33843;
 | 
				
			||||||
const express = require('express');
 | 
					const express = require('express');
 | 
				
			||||||
const app = express();
 | 
					const app = express();
 | 
				
			||||||
const port = 33843;
 | 
					const simple = require('./scraper/simple');
 | 
				
			||||||
 | 
					const browser = require('./scraper/browser');
 | 
				
			||||||
const request = require('request');
 | 
					 | 
				
			||||||
const JSDOM = require('jsdom').JSDOM;
 | 
					 | 
				
			||||||
const { Readability } = require('readability');
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
app.use(express.urlencoded({ extended: true }));
 | 
					app.use(express.urlencoded({ extended: true }));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
app.get('/', (req, res) => {
 | 
					app.get('/', (req, res) => {
 | 
				
			||||||
	res.send('<form method="POST" accept-charset="UTF-8"><input name="url"><button type="submit">SUBMIT</button></form>');
 | 
						const routes = ['/', '/details', '/browser', '/browser/details', '/browser/comments'];
 | 
				
			||||||
});
 | 
					
 | 
				
			||||||
 | 
						const html = routes.map(route => `
 | 
				
			||||||
const requestCallback = (url, res) => (error, response, body) => {
 | 
						<form method="POST" action="${route}" accept-charset="UTF-8">
 | 
				
			||||||
	if (!error && response.statusCode == 200) {
 | 
							<fieldset>
 | 
				
			||||||
		console.log('Response OK.');
 | 
								<legend>route: POST ${route}</legend>
 | 
				
			||||||
 | 
								<input name="url">
 | 
				
			||||||
		const doc = new JSDOM(body, {url: url});
 | 
								<button type="submit">SUBMIT</button>
 | 
				
			||||||
		const reader = new Readability(doc.window.document);
 | 
							</fieldset>
 | 
				
			||||||
		const article = reader.parse();
 | 
						</form>`).join('<hr />');
 | 
				
			||||||
 | 
						res.send(html);
 | 
				
			||||||
		if (article && article.content) {
 | 
					 | 
				
			||||||
			res.send(article.content);
 | 
					 | 
				
			||||||
		} else {
 | 
					 | 
				
			||||||
			res.sendStatus(404);
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	} else {
 | 
					 | 
				
			||||||
		console.log('Response error:', error ? error.toString() : response.statusCode);
 | 
					 | 
				
			||||||
		res.sendStatus(response ? response.statusCode : 404);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
app.post('/', (req, res) => {
 | 
					 | 
				
			||||||
	const url = req.body.url;
 | 
					 | 
				
			||||||
	const requestOptions = {
 | 
					 | 
				
			||||||
		url: url,
 | 
					 | 
				
			||||||
		//headers: {'User-Agent': 'Googlebot/2.1 (+http://www.google.com/bot.html)'},
 | 
					 | 
				
			||||||
		//headers: {'User-Agent': 'Twitterbot/1.0'},
 | 
					 | 
				
			||||||
		headers: {
 | 
					 | 
				
			||||||
			'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0',
 | 
					 | 
				
			||||||
			'X-Forwarded-For': '66.249.66.1',
 | 
					 | 
				
			||||||
		},
 | 
					 | 
				
			||||||
	};
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	console.log('Parse request for:', url);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	request(requestOptions, requestCallback(url, res));
 | 
					 | 
				
			||||||
});
 | 
					});
 | 
				
			||||||
 | 
					app.post('/', simple.scrape);
 | 
				
			||||||
 | 
					app.post('/details', simple.details);
 | 
				
			||||||
 | 
					app.post('/browser', browser.scrape);
 | 
				
			||||||
 | 
					app.post('/browser/details', browser.details);
 | 
				
			||||||
 | 
					app.post('/browser/comments', browser.comments);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
app.listen(port, () => {
 | 
					app.listen(port, () => {
 | 
				
			||||||
	console.log(`Example app listening on port ${port}!`);
 | 
						console.log(`Example app listening on port ${port}!`);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -4,10 +4,12 @@
 | 
				
			|||||||
  "main": "main.js",
 | 
					  "main": "main.js",
 | 
				
			||||||
  "license": "MIT",
 | 
					  "license": "MIT",
 | 
				
			||||||
  "dependencies": {
 | 
					  "dependencies": {
 | 
				
			||||||
 | 
					    "@mozilla/readability": "^0.3.0",
 | 
				
			||||||
    "dompurify": "^1.0.11",
 | 
					    "dompurify": "^1.0.11",
 | 
				
			||||||
    "express": "^4.17.1",
 | 
					    "express": "^4.17.1",
 | 
				
			||||||
    "jsdom": "^15.1.1",
 | 
					    "jsdom": "^15.1.1",
 | 
				
			||||||
    "readability": "https://github.com/mozilla/readability",
 | 
					    "node-fetch": "^2.6.1",
 | 
				
			||||||
 | 
					    "playwright": "^1.5.2",
 | 
				
			||||||
    "request": "^2.88.0"
 | 
					    "request": "^2.88.0"
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										45
									
								
								readerserver/scraper/browser/_browser.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								readerserver/scraper/browser/_browser.js
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,45 @@
 | 
				
			|||||||
 | 
					const { firefox } = require("playwright");
 | 
				
			||||||
 | 
					const { JSDOM } = require("jsdom");
 | 
				
			||||||
 | 
					const { Readability } = require("@mozilla/readability");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const { getUserAgent } = require('../../utils/user-agent');
 | 
				
			||||||
 | 
					const { blockedRegexes, matchUrlDomain } = require("../../utils/sites");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					module.exports.getDetails = async (url) => {
 | 
				
			||||||
 | 
						const { userAgent, headers } = getUserAgent(url);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						const browser = await firefox.launch({ args: [], headless: true });
 | 
				
			||||||
 | 
						const tab = await browser.newPage({
 | 
				
			||||||
 | 
							extraHTTPHeaders: headers,
 | 
				
			||||||
 | 
							userAgent,
 | 
				
			||||||
 | 
							viewport: { width: 2000, height: 10000 },
 | 
				
			||||||
 | 
						});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						try {
 | 
				
			||||||
 | 
							await tab.route(/.*/, (route) => {
 | 
				
			||||||
 | 
								const routeUrl = route.request().url();
 | 
				
			||||||
 | 
								const blockedDomains = Object.keys(blockedRegexes);
 | 
				
			||||||
 | 
								const domain = matchUrlDomain(blockedDomains, routeUrl);
 | 
				
			||||||
 | 
								if (domain && routeUrl.match(blockedRegexes[domain])) {
 | 
				
			||||||
 | 
									return route.abort();
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								return route.continue();
 | 
				
			||||||
 | 
							});
 | 
				
			||||||
 | 
							await tab.addInitScript({ path: "scraper/browser/scripts/bypass-paywalls-chrome/src/js/contentScript.js" });
 | 
				
			||||||
 | 
							await tab.addInitScript({ path: "scraper/browser/scripts/cosmetic-filters.js" });
 | 
				
			||||||
 | 
							await tab.addInitScript({ path: "scraper/browser/scripts/fix-relative-links.js" });
 | 
				
			||||||
 | 
							await tab.goto(url, { timeout: 60000, waitUntil: "domcontentloaded" });
 | 
				
			||||||
 | 
							await tab.waitForTimeout(2000);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							const body = await tab.content();
 | 
				
			||||||
 | 
							const doc = new JSDOM(body, { url });
 | 
				
			||||||
 | 
							const reader = new Readability(doc.window.document);
 | 
				
			||||||
 | 
							const article = reader.parse();
 | 
				
			||||||
 | 
							return article;
 | 
				
			||||||
 | 
						} catch (e) {
 | 
				
			||||||
 | 
							throw e;
 | 
				
			||||||
 | 
						} finally {
 | 
				
			||||||
 | 
							await tab.close();
 | 
				
			||||||
 | 
							await browser.close();
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
							
								
								
									
										34
									
								
								readerserver/scraper/browser/_comments.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								readerserver/scraper/browser/_comments.js
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,34 @@
 | 
				
			|||||||
 | 
					const { JSDOM } = require("jsdom");
 | 
				
			||||||
 | 
					const { firefox } = require("playwright");
 | 
				
			||||||
 | 
					const { getUserAgent } = require('../../utils/user-agent');
 | 
				
			||||||
 | 
					const { disqusThread } = require('../../utils/disqus-thread');
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const DISQUS_EMBED = 'https://disqus.com/embed/comments/';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					module.exports.getComments = async (url) => {
 | 
				
			||||||
 | 
						const { userAgent, headers } = getUserAgent(url);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						const browser = await firefox.launch({ args: [], headless: true });
 | 
				
			||||||
 | 
						const tab = await browser.newPage({
 | 
				
			||||||
 | 
							extraHTTPHeaders: headers,
 | 
				
			||||||
 | 
							userAgent,
 | 
				
			||||||
 | 
							viewport: { width: 2000, height: 10000 },
 | 
				
			||||||
 | 
						});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						try {
 | 
				
			||||||
 | 
							await tab.goto(url, { timeout: 60000, waitUntil: "domcontentloaded" });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							const response = await tab.waitForResponse(response => response.url().includes(DISQUS_EMBED));
 | 
				
			||||||
 | 
							const text = await response.text();
 | 
				
			||||||
 | 
							const dom = new JSDOM(text, response.url());
 | 
				
			||||||
 | 
							const script = dom.window.document.querySelector('#disqus-threadData')
 | 
				
			||||||
 | 
							const data = JSON.parse(script.innerHTML);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							return disqusThread(data);
 | 
				
			||||||
 | 
						} catch (e) {
 | 
				
			||||||
 | 
							throw e;
 | 
				
			||||||
 | 
						} finally {
 | 
				
			||||||
 | 
							await tab.close();
 | 
				
			||||||
 | 
							await browser.close();
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
							
								
								
									
										40
									
								
								readerserver/scraper/browser/index.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								readerserver/scraper/browser/index.js
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,40 @@
 | 
				
			|||||||
 | 
					const { getDetails } = require('./_browser');
 | 
				
			||||||
 | 
					const { getComments } = require('./_comments');
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					module.exports.scrape = async (req, res) => {
 | 
				
			||||||
 | 
						try {
 | 
				
			||||||
 | 
							const article = await getDetails(req.body.url);
 | 
				
			||||||
 | 
							if (!article || !article.content) {
 | 
				
			||||||
 | 
								throw new Error('failed to get details.');
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							return res.send(article.content);
 | 
				
			||||||
 | 
						} catch (e) {
 | 
				
			||||||
 | 
							return res.sendStatus(500);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					module.exports.details = async (req, res) => {
 | 
				
			||||||
 | 
						try {
 | 
				
			||||||
 | 
							const article = await getDetails(req.body.url);
 | 
				
			||||||
 | 
							if (!article) {
 | 
				
			||||||
 | 
								throw new Error('failed to get details.');
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							return res.send(article);
 | 
				
			||||||
 | 
						} catch (e) {
 | 
				
			||||||
 | 
							console.log(e);
 | 
				
			||||||
 | 
							return res.sendStatus(500);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					module.exports.comments = async (req, res) => {
 | 
				
			||||||
 | 
						try {
 | 
				
			||||||
 | 
							const comments = await getComments(req.body.url);
 | 
				
			||||||
 | 
							if (!comments) {
 | 
				
			||||||
 | 
								throw new Error('failed to get comments.');
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							return res.send(comments);
 | 
				
			||||||
 | 
						} catch (e) {
 | 
				
			||||||
 | 
							console.log(e);
 | 
				
			||||||
 | 
							return res.sendStatus(500);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 Submodule readerserver/scraper/browser/scripts/bypass-paywalls-chrome added at 44f3d1b114
									
								
							
							
								
								
									
										99
									
								
								readerserver/scraper/browser/scripts/cosmetic-filters.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										99
									
								
								readerserver/scraper/browser/scripts/cosmetic-filters.js
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,99 @@
 | 
				
			|||||||
 | 
					(function () {
 | 
				
			||||||
 | 
						removeHiddenElements();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (matchDomain("stuff.co.nz")) {
 | 
				
			||||||
 | 
							removeSelectors([
 | 
				
			||||||
 | 
								".support-brief-container",
 | 
				
			||||||
 | 
								'[class*="donation-in-"]',
 | 
				
			||||||
 | 
								".sics-component__sharebar",
 | 
				
			||||||
 | 
								".breaking-news-pointer",
 | 
				
			||||||
 | 
								".bigbyline-container",
 | 
				
			||||||
 | 
								[
 | 
				
			||||||
 | 
									".sics-component__html-injector.sics-component__story__paragraph",
 | 
				
			||||||
 | 
									"READ MORE:",
 | 
				
			||||||
 | 
								],
 | 
				
			||||||
 | 
							]);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if (matchDomain("nzherald.co.nz")) {
 | 
				
			||||||
 | 
							removeSelectors([
 | 
				
			||||||
 | 
								"[href$='#commenting-widget']",
 | 
				
			||||||
 | 
								".related-articles",
 | 
				
			||||||
 | 
								".article__print-button",
 | 
				
			||||||
 | 
								".share-bar",
 | 
				
			||||||
 | 
								".c-suggest-links.read-more-links",
 | 
				
			||||||
 | 
								".website-of-year",
 | 
				
			||||||
 | 
								".meta-data",
 | 
				
			||||||
 | 
								".article__kicker",
 | 
				
			||||||
 | 
								".author__image",
 | 
				
			||||||
 | 
							]);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if (matchDomain(["rnz.co.nz", "radionz.co.nz"])) {
 | 
				
			||||||
 | 
							removeSelectors([".c-advert-app", ".c-sub-nav"]);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if (matchDomain(["newsroom.co.nz"])) {
 | 
				
			||||||
 | 
							removeSelectors([".article_content__section", ".bio"]);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if (matchDomain(["newshub.co.nz"])) {
 | 
				
			||||||
 | 
							removeSelectors([".c-ArticleHeading-authorPicture", ".relatedarticles"]);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if (matchDomain(["tvnz.co.nz"])) {
 | 
				
			||||||
 | 
							removeSelectors([".signup-container container"]);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if (matchDomain(["thespinoff.co.nz"])) {
 | 
				
			||||||
 | 
							removeSelectors([".the-spinoff-club-interruptive", ".bulletin-signup"]);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						function matchDomain(domains) {
 | 
				
			||||||
 | 
							const hostname = window.location.hostname;
 | 
				
			||||||
 | 
							if (typeof domains === "string") {
 | 
				
			||||||
 | 
								domains = [domains];
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							return domains.some(
 | 
				
			||||||
 | 
								(domain) => hostname === domain || hostname.endsWith("." + domain)
 | 
				
			||||||
 | 
							);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						function removeDOMElement(...elements) {
 | 
				
			||||||
 | 
							for (const element of elements) {
 | 
				
			||||||
 | 
								if (element) {
 | 
				
			||||||
 | 
									element.remove();
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						function pageContains(selector, text) {
 | 
				
			||||||
 | 
							const elements = document.querySelectorAll(selector);
 | 
				
			||||||
 | 
							return Array.prototype.filter.call(elements, function (element) {
 | 
				
			||||||
 | 
								return RegExp(text).test(element.textContent);
 | 
				
			||||||
 | 
							});
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						function removeHiddenElements() {
 | 
				
			||||||
 | 
							window.setTimeout(function () {
 | 
				
			||||||
 | 
								const selector = "*:not(script):not(head):not(meta):not(link):not(style)";
 | 
				
			||||||
 | 
								Array.from(document.querySelectorAll(selector))
 | 
				
			||||||
 | 
									.filter((element) => {
 | 
				
			||||||
 | 
										const computed = getComputedStyle(element);
 | 
				
			||||||
 | 
										const displayNone = computed["display"] === "none";
 | 
				
			||||||
 | 
										const visibilityHidden = computed["visibility"] === "hidden";
 | 
				
			||||||
 | 
										return displayNone || visibilityHidden;
 | 
				
			||||||
 | 
									})
 | 
				
			||||||
 | 
									.forEach((element) => element && element.remove());
 | 
				
			||||||
 | 
							}, 1000);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						function removeSelectors(selectors) {
 | 
				
			||||||
 | 
							window.setTimeout(function () {
 | 
				
			||||||
 | 
								const elements = selectors.flatMap((s) => {
 | 
				
			||||||
 | 
									if (typeof s === "string") {
 | 
				
			||||||
 | 
										return Array.from(document.querySelectorAll(s));
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
									if (s && s.constructor.name === "Array") {
 | 
				
			||||||
 | 
										return pageContains(...s);
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
									return undefined;
 | 
				
			||||||
 | 
								});
 | 
				
			||||||
 | 
								removeDOMElement(...elements);
 | 
				
			||||||
 | 
							}, 1000);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					})();
 | 
				
			||||||
							
								
								
									
										14
									
								
								readerserver/scraper/browser/scripts/fix-relative-links.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								readerserver/scraper/browser/scripts/fix-relative-links.js
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,14 @@
 | 
				
			|||||||
 | 
					(function () {
 | 
				
			||||||
 | 
						const { host, protocol } = window.location;
 | 
				
			||||||
 | 
						const url = `${protocol}//${host}`;
 | 
				
			||||||
 | 
						[
 | 
				
			||||||
 | 
							['[src^="/"]', 'src'],
 | 
				
			||||||
 | 
							['[href^="/"]', 'href']
 | 
				
			||||||
 | 
						].forEach(([selector, attribute]) => {
 | 
				
			||||||
 | 
							Array.from(document.querySelectorAll(selector))
 | 
				
			||||||
 | 
								.filter(e => e.attributes[attribute] && /^\/[^\/]/.test(e.attributes[attribute].value))
 | 
				
			||||||
 | 
								.forEach((e) => {
 | 
				
			||||||
 | 
									e.attributes[attribute].value = `${url}${e.attributes[attribute].value}`;
 | 
				
			||||||
 | 
								});
 | 
				
			||||||
 | 
						});
 | 
				
			||||||
 | 
					})();
 | 
				
			||||||
							
								
								
									
										59
									
								
								readerserver/scraper/simple.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								readerserver/scraper/simple.js
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,59 @@
 | 
				
			|||||||
 | 
					const fetch = require('node-fetch');
 | 
				
			||||||
 | 
					const { JSDOM } = require('jsdom');
 | 
				
			||||||
 | 
					const { Readability } = require('@mozilla/readability');
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const { getUserAgent } = require('../utils/user-agent');
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const extract = (url, body) => {
 | 
				
			||||||
 | 
						const doc = new JSDOM(body, { url: url });
 | 
				
			||||||
 | 
						const reader = new Readability(doc.window.document);
 | 
				
			||||||
 | 
						return reader.parse();
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					module.exports.scrape = async (req, res) => {
 | 
				
			||||||
 | 
						try {
 | 
				
			||||||
 | 
							const { userAgent, headers } = getUserAgent(req.body.url);
 | 
				
			||||||
 | 
							const response = await fetch(req.body.url, {
 | 
				
			||||||
 | 
								headers: {
 | 
				
			||||||
 | 
									...headers,
 | 
				
			||||||
 | 
									'User-Agent': userAgent
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							});
 | 
				
			||||||
 | 
							if (!response.ok) {
 | 
				
			||||||
 | 
								return res.sendStatus(response.statusCode);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							const html = await response.text();
 | 
				
			||||||
 | 
							const article = await extract(req.body.url, html);
 | 
				
			||||||
 | 
							if (article && article.content) {
 | 
				
			||||||
 | 
								return res.send(article.content);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							return res.sendStatus(404);
 | 
				
			||||||
 | 
						} catch (e) {
 | 
				
			||||||
 | 
							console.error(e);
 | 
				
			||||||
 | 
							return res.sendStatus(500);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					module.exports.details = async (req, res) => {
 | 
				
			||||||
 | 
						try {
 | 
				
			||||||
 | 
							const { userAgent, headers } = getUserAgent(req.body.url);
 | 
				
			||||||
 | 
							const response = await fetch(req.body.url, {
 | 
				
			||||||
 | 
								headers: {
 | 
				
			||||||
 | 
									...headers,
 | 
				
			||||||
 | 
									'User-Agent': userAgent
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							});
 | 
				
			||||||
 | 
							if (!response.ok) {
 | 
				
			||||||
 | 
								return res.sendStatus(response.statusCode);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							const html = await response.text();
 | 
				
			||||||
 | 
							const article = await extract(req.body.url, html);
 | 
				
			||||||
 | 
							if (article) {
 | 
				
			||||||
 | 
								return res.send(article);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							return res.sendStatus(404);
 | 
				
			||||||
 | 
						} catch (e) {
 | 
				
			||||||
 | 
							console.error(e);
 | 
				
			||||||
 | 
							return res.sendStatus(500);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
							
								
								
									
										11
									
								
								readerserver/utils/constants.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								readerserver/utils/constants.js
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,11 @@
 | 
				
			|||||||
 | 
					const googleBotUserAgent = 'Googlebot/2.1 (+http://www.google.com/bot.html)';
 | 
				
			||||||
 | 
					const googleBotIp = '66.249.66.1';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					module.exports.googleBot = {
 | 
				
			||||||
 | 
						userAgent: googleBotUserAgent,
 | 
				
			||||||
 | 
						ip: googleBotIp,
 | 
				
			||||||
 | 
						headers: {
 | 
				
			||||||
 | 
							'User-Agent': googleBotUserAgent,
 | 
				
			||||||
 | 
							'X-Forwarded-For': googleBotIp,
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										21
									
								
								readerserver/utils/disqus-thread.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								readerserver/utils/disqus-thread.js
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,21 @@
 | 
				
			|||||||
 | 
					module.exports.disqusThread = data => {
 | 
				
			||||||
 | 
						const comments = data.response.posts.reduce((c, post) => ({
 | 
				
			||||||
 | 
							...c,
 | 
				
			||||||
 | 
							[post.id.toString()]: {
 | 
				
			||||||
 | 
								author: post.author.name,
 | 
				
			||||||
 | 
								authorLink: post.author.profileUrl,
 | 
				
			||||||
 | 
								date: post.createdAt,
 | 
				
			||||||
 | 
								text: post.raw_message,
 | 
				
			||||||
 | 
								score: post.points,
 | 
				
			||||||
 | 
								children: [],
 | 
				
			||||||
 | 
								id: post.id.toString(),
 | 
				
			||||||
 | 
								parent: (post.parent || '').toString(),
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}), {});
 | 
				
			||||||
 | 
						Object.keys(comments).filter(id => !!comments[id].parent).forEach(id => {
 | 
				
			||||||
 | 
							const comment = comments[id];
 | 
				
			||||||
 | 
							comments[comment.parent].children.push(comment);
 | 
				
			||||||
 | 
						});
 | 
				
			||||||
 | 
						const parents = Object.keys(comments).filter(id => comments[id].parent).map(id => comments[id]);
 | 
				
			||||||
 | 
						return parents;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
							
								
								
									
										98
									
								
								readerserver/utils/sites.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										98
									
								
								readerserver/utils/sites.js
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,98 @@
 | 
				
			|||||||
 | 
					module.exports.blockedRegexes = {
 | 
				
			||||||
 | 
						"adweek.com": /.+\.lightboxcdn\.com\/.+/,
 | 
				
			||||||
 | 
						"afr.com": /afr\.com\/assets\/vendorsReactRedux_client.+\.js/,
 | 
				
			||||||
 | 
						"businessinsider.com": /(.+\.tinypass\.com\/.+|cdn\.onesignal\.com\/sdks\/.+\.js)/,
 | 
				
			||||||
 | 
						"chicagotribune.com": /.+:\/\/.+\.tribdss\.com\//,
 | 
				
			||||||
 | 
						"economist.com": /(.+\.tinypass\.com\/.+|economist\.com\/engassets\/_next\/static\/chunks\/framework.+\.js)/,
 | 
				
			||||||
 | 
						"editorialedomani.it": /(js\.pelcro\.com\/.+|editorialedomani.it\/pelcro\.js)/,
 | 
				
			||||||
 | 
						"foreignpolicy.com": /.+\.tinypass\.com\/.+/,
 | 
				
			||||||
 | 
						"fortune.com": /.+\.tinypass\.com\/.+/,
 | 
				
			||||||
 | 
						"haaretz.co.il": /haaretz\.co\.il\/htz\/js\/inter\.js/,
 | 
				
			||||||
 | 
						"haaretz.com": /haaretz\.com\/hdc\/web\/js\/minified\/header-scripts-int.js.+/,
 | 
				
			||||||
 | 
						"inquirer.com": /.+\.tinypass\.com\/.+/,
 | 
				
			||||||
 | 
						"lastampa.it": /.+\.repstatic\.it\/minify\/sites\/lastampa\/.+\/config\.cache\.php\?name=social_js/,
 | 
				
			||||||
 | 
						"lrb.co.uk": /.+\.tinypass\.com\/.+/,
 | 
				
			||||||
 | 
						"nzherald.co.nz": /(.+nzherald\.co\.nz\/.+\/subs\/p\.js|.+nzherald\.co\.nz\/.+\/react\.js|.+nzherald\.co\.nz\/.+\/appear\.js|.+nzherald\.co\.nz\/.+\/tracking\/.+|.+nzherald\.co\.nz\/.+\/default\.js|.+\/newsbarscript\.js)/,
 | 
				
			||||||
 | 
						"medscape.com": /.+\.medscapestatic\.com\/.*medscape-library\.js/,
 | 
				
			||||||
 | 
						"interest.co.nz": /(.+\.presspatron\.com.+|.+interest\.co\.nz.+pp-ablock-banner\.js)/,
 | 
				
			||||||
 | 
						"repubblica.it": /scripts\.repubblica\.it\/pw\/pw\.js.+/,
 | 
				
			||||||
 | 
						"spectator.co.uk": /.+\.tinypass\.com\/.+/,
 | 
				
			||||||
 | 
						"spectator.com.au": /.+\.tinypass\.com\/.+/,
 | 
				
			||||||
 | 
						"telegraph.co.uk": /.+telegraph\.co\.uk.+martech.+/,
 | 
				
			||||||
 | 
						"thecourier.com.au": /.+cdn-au\.piano\.io\/api\/tinypass.+\.js/,
 | 
				
			||||||
 | 
						"thenation.com": /thenation\.com\/.+\/paywall-script\.php/,
 | 
				
			||||||
 | 
						"thenational.scot": /(.+\.tinypass\.com\/.+|.+thenational\.scot.+omniture\.js|.+thenational\.scot.+responsive-sync.+)/,
 | 
				
			||||||
 | 
						"thewrap.com": /thewrap\.com\/.+\/wallkit\.js/,
 | 
				
			||||||
 | 
						"wsj.com": /cdn\.ampproject\.org\/v\d\/amp-access-.+\.js/,
 | 
				
			||||||
 | 
						"historyextra.com": /.+\.evolok\.net\/.+\/authorize\/.+/,
 | 
				
			||||||
 | 
						"barrons.com": /cdn\.ampproject\.org\/v\d\/amp-access-.+\.js/,
 | 
				
			||||||
 | 
						"irishtimes.com": /cdn\.ampproject\.org\/v\d\/amp-access-.+\.js/,
 | 
				
			||||||
 | 
						"elmercurio.com": /(merreader\.emol\.cl\/assets\/js\/merPramV2.js|staticmer\.emol\.cl\/js\/inversiones\/PramModal.+\.js)/,
 | 
				
			||||||
 | 
						"sloanreview.mit.edu": /(.+\.tinypass\.com\/.+|.+\.netdna-ssl\.com\/wp-content\/themes\/smr\/assets\/js\/libs\/welcome-ad\.js)/,
 | 
				
			||||||
 | 
						"latercera.com": /.+\.cxense\.com\/+/,
 | 
				
			||||||
 | 
						"lesechos.fr": /.+\.tinypass\.com\/.+/,
 | 
				
			||||||
 | 
						"washingtonpost.com": /.+\.washingtonpost\.com\/.+\/pwapi-proxy\.min\.js/,
 | 
				
			||||||
 | 
						"thehindu.com": /ajax\.cloudflare\.com\/cdn-cgi\/scripts\/.+\/cloudflare-static\/rocket-loader\.min\.js/,
 | 
				
			||||||
 | 
						"technologyreview.com": /.+\.blueconic\.net\/.+/,
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					module.exports.useGoogleBotSites = [
 | 
				
			||||||
 | 
						"adelaidenow.com.au",
 | 
				
			||||||
 | 
						"barrons.com",
 | 
				
			||||||
 | 
						"couriermail.com.au",
 | 
				
			||||||
 | 
						"dailytelegraph.com.au",
 | 
				
			||||||
 | 
						"fd.nl",
 | 
				
			||||||
 | 
						"genomeweb.com",
 | 
				
			||||||
 | 
						"haaretz.co.il",
 | 
				
			||||||
 | 
						"haaretz.com",
 | 
				
			||||||
 | 
						"heraldsun.com.au",
 | 
				
			||||||
 | 
						"mexiconewsdaily.com",
 | 
				
			||||||
 | 
						"ntnews.com.au",
 | 
				
			||||||
 | 
						"quora.com",
 | 
				
			||||||
 | 
						"seekingalpha.com",
 | 
				
			||||||
 | 
						"telegraph.co.uk",
 | 
				
			||||||
 | 
						"theaustralian.com.au",
 | 
				
			||||||
 | 
						"themarker.com",
 | 
				
			||||||
 | 
						"themercury.com.au",
 | 
				
			||||||
 | 
						"thenational.scot",
 | 
				
			||||||
 | 
						"thetimes.co.uk",
 | 
				
			||||||
 | 
						"wsj.com",
 | 
				
			||||||
 | 
						"kansascity.com",
 | 
				
			||||||
 | 
						"republic.ru",
 | 
				
			||||||
 | 
						"nzz.ch",
 | 
				
			||||||
 | 
						"handelsblatt.com",
 | 
				
			||||||
 | 
						"washingtonpost.com",
 | 
				
			||||||
 | 
						"df.cl",
 | 
				
			||||||
 | 
					];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					function matchDomain(domains, hostname) {
 | 
				
			||||||
 | 
						let matchedDomain = false;
 | 
				
			||||||
 | 
						if (typeof domains === "string") {
 | 
				
			||||||
 | 
							domains = [domains];
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						domains.some(
 | 
				
			||||||
 | 
							(domain) =>
 | 
				
			||||||
 | 
								(hostname === domain || hostname.endsWith("." + domain)) &&
 | 
				
			||||||
 | 
								(matchedDomain = domain)
 | 
				
			||||||
 | 
						);
 | 
				
			||||||
 | 
						return matchedDomain;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					function matchUrlDomain(domains, url) {
 | 
				
			||||||
 | 
						return matchDomain(domains, urlHost(url));
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					function urlHost(url) {
 | 
				
			||||||
 | 
						if (url && url.startsWith("http")) {
 | 
				
			||||||
 | 
							try {
 | 
				
			||||||
 | 
								return new URL(url).hostname;
 | 
				
			||||||
 | 
							} catch (e) {
 | 
				
			||||||
 | 
								console.log(`url not valid: ${url} error: ${e}`);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return url;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					module.exports.matchDomain = matchDomain;
 | 
				
			||||||
 | 
					module.exports.matchUrlDomain = matchUrlDomain;
 | 
				
			||||||
 | 
					module.exports.urlHost = urlHost;
 | 
				
			||||||
							
								
								
									
										18
									
								
								readerserver/utils/user-agent.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								readerserver/utils/user-agent.js
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,18 @@
 | 
				
			|||||||
 | 
					const { googleBot } = require('./constants');
 | 
				
			||||||
 | 
					const { matchUrlDomain, useGoogleBotSites } = require("./sites");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					module.exports.getUserAgent = (url) => {
 | 
				
			||||||
 | 
						const useGoogleBot = useGoogleBotSites.some(function (item) {
 | 
				
			||||||
 | 
							return typeof item === "string" && matchUrlDomain(item, url);
 | 
				
			||||||
 | 
						});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!useGoogleBot) {
 | 
				
			||||||
 | 
							return {};
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return {
 | 
				
			||||||
 | 
							userAgent: googleBot.userAgent,
 | 
				
			||||||
 | 
							headers: {
 | 
				
			||||||
 | 
								"X-Forwarded-For": googleBot.ip
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
@@ -2,6 +2,23 @@
 | 
				
			|||||||
# yarn lockfile v1
 | 
					# yarn lockfile v1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					"@mozilla/readability@^0.3.0":
 | 
				
			||||||
 | 
					  version "0.3.0"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/@mozilla/readability/-/readability-0.3.0.tgz#a473790e8b42ea39f9f03703fa1f17fec7984b60"
 | 
				
			||||||
 | 
					  integrity sha512-q8f1CAZsRKK1j+O0BmikGIlKSK03RpT4woT0PCQwhw0nH0z4+rG026AkxoPcjT7Dsgh1ifGscW8tOpvjoyOjvw==
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					"@types/node@*":
 | 
				
			||||||
 | 
					  version "14.14.7"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/@types/node/-/node-14.14.7.tgz#8ea1e8f8eae2430cf440564b98c6dfce1ec5945d"
 | 
				
			||||||
 | 
					  integrity sha512-Zw1vhUSQZYw+7u5dAwNbIA9TuTotpzY/OF7sJM9FqPOF3SPjKnxrjoTktXDZgUjybf4cWVBP7O8wvKdSaGHweg==
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					"@types/yauzl@^2.9.1":
 | 
				
			||||||
 | 
					  version "2.9.1"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/@types/yauzl/-/yauzl-2.9.1.tgz#d10f69f9f522eef3cf98e30afb684a1e1ec923af"
 | 
				
			||||||
 | 
					  integrity sha512-A1b8SU4D10uoPjwb0lnHmmu8wZhR9d+9o2PKBQT2jU5YPTKsxac6M2qGAdY7VcL+dHHhARVUDmeg0rOrcd9EjA==
 | 
				
			||||||
 | 
					  dependencies:
 | 
				
			||||||
 | 
					    "@types/node" "*"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
abab@^2.0.0:
 | 
					abab@^2.0.0:
 | 
				
			||||||
  version "2.0.0"
 | 
					  version "2.0.0"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/abab/-/abab-2.0.0.tgz#aba0ab4c5eee2d4c79d3487d85450fb2376ebb0f"
 | 
					  resolved "https://registry.yarnpkg.com/abab/-/abab-2.0.0.tgz#aba0ab4c5eee2d4c79d3487d85450fb2376ebb0f"
 | 
				
			||||||
@@ -33,6 +50,13 @@ acorn@^6.0.1, acorn@^6.1.1:
 | 
				
			|||||||
  resolved "https://registry.yarnpkg.com/acorn/-/acorn-6.3.0.tgz#0087509119ffa4fc0a0041d1e93a417e68cb856e"
 | 
					  resolved "https://registry.yarnpkg.com/acorn/-/acorn-6.3.0.tgz#0087509119ffa4fc0a0041d1e93a417e68cb856e"
 | 
				
			||||||
  integrity sha512-/czfa8BwS88b9gWQVhc8eknunSA2DoJpJyTQkhheIf5E48u1N0R4q/YxxsAeqRrmK9TQ/uYfgLDfZo91UlANIA==
 | 
					  integrity sha512-/czfa8BwS88b9gWQVhc8eknunSA2DoJpJyTQkhheIf5E48u1N0R4q/YxxsAeqRrmK9TQ/uYfgLDfZo91UlANIA==
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					agent-base@6:
 | 
				
			||||||
 | 
					  version "6.0.2"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-6.0.2.tgz#49fff58577cfee3f37176feab4c22e00f86d7f77"
 | 
				
			||||||
 | 
					  integrity sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==
 | 
				
			||||||
 | 
					  dependencies:
 | 
				
			||||||
 | 
					    debug "4"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ajv@^6.5.5:
 | 
					ajv@^6.5.5:
 | 
				
			||||||
  version "6.10.2"
 | 
					  version "6.10.2"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/ajv/-/ajv-6.10.2.tgz#d3cea04d6b017b2894ad69040fec8b623eb4bd52"
 | 
					  resolved "https://registry.yarnpkg.com/ajv/-/ajv-6.10.2.tgz#d3cea04d6b017b2894ad69040fec8b623eb4bd52"
 | 
				
			||||||
@@ -85,6 +109,11 @@ aws4@^1.8.0:
 | 
				
			|||||||
  resolved "https://registry.yarnpkg.com/aws4/-/aws4-1.8.0.tgz#f0e003d9ca9e7f59c7a508945d7b2ef9a04a542f"
 | 
					  resolved "https://registry.yarnpkg.com/aws4/-/aws4-1.8.0.tgz#f0e003d9ca9e7f59c7a508945d7b2ef9a04a542f"
 | 
				
			||||||
  integrity sha512-ReZxvNHIOv88FlT7rxcXIIC0fPt4KZqZbOlivyWtXLt8ESx84zd3kMC6iK5jVeS2qt+g7ftS7ye4fi06X5rtRQ==
 | 
					  integrity sha512-ReZxvNHIOv88FlT7rxcXIIC0fPt4KZqZbOlivyWtXLt8ESx84zd3kMC6iK5jVeS2qt+g7ftS7ye4fi06X5rtRQ==
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					balanced-match@^1.0.0:
 | 
				
			||||||
 | 
					  version "1.0.0"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.0.tgz#89b4d199ab2bee49de164ea02b89ce462d71b767"
 | 
				
			||||||
 | 
					  integrity sha1-ibTRmasr7kneFk6gK4nORi1xt2c=
 | 
				
			||||||
 | 
					
 | 
				
			||||||
bcrypt-pbkdf@^1.0.0:
 | 
					bcrypt-pbkdf@^1.0.0:
 | 
				
			||||||
  version "1.0.2"
 | 
					  version "1.0.2"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/bcrypt-pbkdf/-/bcrypt-pbkdf-1.0.2.tgz#a4301d389b6a43f9b67ff3ca11a3f6637e360e9e"
 | 
					  resolved "https://registry.yarnpkg.com/bcrypt-pbkdf/-/bcrypt-pbkdf-1.0.2.tgz#a4301d389b6a43f9b67ff3ca11a3f6637e360e9e"
 | 
				
			||||||
@@ -108,11 +137,24 @@ body-parser@1.19.0:
 | 
				
			|||||||
    raw-body "2.4.0"
 | 
					    raw-body "2.4.0"
 | 
				
			||||||
    type-is "~1.6.17"
 | 
					    type-is "~1.6.17"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					brace-expansion@^1.1.7:
 | 
				
			||||||
 | 
					  version "1.1.11"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd"
 | 
				
			||||||
 | 
					  integrity sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==
 | 
				
			||||||
 | 
					  dependencies:
 | 
				
			||||||
 | 
					    balanced-match "^1.0.0"
 | 
				
			||||||
 | 
					    concat-map "0.0.1"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
browser-process-hrtime@^0.1.2:
 | 
					browser-process-hrtime@^0.1.2:
 | 
				
			||||||
  version "0.1.3"
 | 
					  version "0.1.3"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/browser-process-hrtime/-/browser-process-hrtime-0.1.3.tgz#616f00faef1df7ec1b5bf9cfe2bdc3170f26c7b4"
 | 
					  resolved "https://registry.yarnpkg.com/browser-process-hrtime/-/browser-process-hrtime-0.1.3.tgz#616f00faef1df7ec1b5bf9cfe2bdc3170f26c7b4"
 | 
				
			||||||
  integrity sha512-bRFnI4NnjO6cnyLmOV/7PVoDEMJChlcfN0z4s1YMBY989/SvlfMI1lgCnkFUs53e9gQF+w7qu7XdllSTiSl8Aw==
 | 
					  integrity sha512-bRFnI4NnjO6cnyLmOV/7PVoDEMJChlcfN0z4s1YMBY989/SvlfMI1lgCnkFUs53e9gQF+w7qu7XdllSTiSl8Aw==
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					buffer-crc32@~0.2.3:
 | 
				
			||||||
 | 
					  version "0.2.13"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/buffer-crc32/-/buffer-crc32-0.2.13.tgz#0d333e3f00eac50aa1454abd30ef8c2a5d9a7242"
 | 
				
			||||||
 | 
					  integrity sha1-DTM+PwDqxQqhRUq9MO+MKl2ackI=
 | 
				
			||||||
 | 
					
 | 
				
			||||||
bytes@3.1.0:
 | 
					bytes@3.1.0:
 | 
				
			||||||
  version "3.1.0"
 | 
					  version "3.1.0"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/bytes/-/bytes-3.1.0.tgz#f6cf7933a360e0588fa9fde85651cdc7f805d1f6"
 | 
					  resolved "https://registry.yarnpkg.com/bytes/-/bytes-3.1.0.tgz#f6cf7933a360e0588fa9fde85651cdc7f805d1f6"
 | 
				
			||||||
@@ -130,6 +172,11 @@ combined-stream@^1.0.6, combined-stream@~1.0.6:
 | 
				
			|||||||
  dependencies:
 | 
					  dependencies:
 | 
				
			||||||
    delayed-stream "~1.0.0"
 | 
					    delayed-stream "~1.0.0"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					concat-map@0.0.1:
 | 
				
			||||||
 | 
					  version "0.0.1"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b"
 | 
				
			||||||
 | 
					  integrity sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=
 | 
				
			||||||
 | 
					
 | 
				
			||||||
content-disposition@0.5.3:
 | 
					content-disposition@0.5.3:
 | 
				
			||||||
  version "0.5.3"
 | 
					  version "0.5.3"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/content-disposition/-/content-disposition-0.5.3.tgz#e130caf7e7279087c5616c2007d0485698984fbd"
 | 
					  resolved "https://registry.yarnpkg.com/content-disposition/-/content-disposition-0.5.3.tgz#e130caf7e7279087c5616c2007d0485698984fbd"
 | 
				
			||||||
@@ -192,6 +239,13 @@ debug@2.6.9:
 | 
				
			|||||||
  dependencies:
 | 
					  dependencies:
 | 
				
			||||||
    ms "2.0.0"
 | 
					    ms "2.0.0"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					debug@4, debug@^4.1.1:
 | 
				
			||||||
 | 
					  version "4.2.0"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/debug/-/debug-4.2.0.tgz#7f150f93920e94c58f5574c2fd01a3110effe7f1"
 | 
				
			||||||
 | 
					  integrity sha512-IX2ncY78vDTjZMFUdmsvIRFY2Cf4FnD0wRs+nQwJU8Lu99/tPFdb0VybiiMTPe3I6rQmwsqQqRBvxU+bZ/I8sg==
 | 
				
			||||||
 | 
					  dependencies:
 | 
				
			||||||
 | 
					    ms "2.1.2"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
deep-is@~0.1.3:
 | 
					deep-is@~0.1.3:
 | 
				
			||||||
  version "0.1.3"
 | 
					  version "0.1.3"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/deep-is/-/deep-is-0.1.3.tgz#b369d6fb5dbc13eecf524f91b070feedc357cf34"
 | 
					  resolved "https://registry.yarnpkg.com/deep-is/-/deep-is-0.1.3.tgz#b369d6fb5dbc13eecf524f91b070feedc357cf34"
 | 
				
			||||||
@@ -242,6 +296,13 @@ encodeurl@~1.0.2:
 | 
				
			|||||||
  resolved "https://registry.yarnpkg.com/encodeurl/-/encodeurl-1.0.2.tgz#ad3ff4c86ec2d029322f5a02c3a9a606c95b3f59"
 | 
					  resolved "https://registry.yarnpkg.com/encodeurl/-/encodeurl-1.0.2.tgz#ad3ff4c86ec2d029322f5a02c3a9a606c95b3f59"
 | 
				
			||||||
  integrity sha1-rT/0yG7C0CkyL1oCw6mmBslbP1k=
 | 
					  integrity sha1-rT/0yG7C0CkyL1oCw6mmBslbP1k=
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					end-of-stream@^1.1.0:
 | 
				
			||||||
 | 
					  version "1.4.4"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/end-of-stream/-/end-of-stream-1.4.4.tgz#5ae64a5f45057baf3626ec14da0ca5e4b2431eb0"
 | 
				
			||||||
 | 
					  integrity sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==
 | 
				
			||||||
 | 
					  dependencies:
 | 
				
			||||||
 | 
					    once "^1.4.0"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
escape-html@~1.0.3:
 | 
					escape-html@~1.0.3:
 | 
				
			||||||
  version "1.0.3"
 | 
					  version "1.0.3"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/escape-html/-/escape-html-1.0.3.tgz#0258eae4d3d0c0974de1c169188ef0051d1d1988"
 | 
					  resolved "https://registry.yarnpkg.com/escape-html/-/escape-html-1.0.3.tgz#0258eae4d3d0c0974de1c169188ef0051d1d1988"
 | 
				
			||||||
@@ -320,6 +381,17 @@ extend@~3.0.2:
 | 
				
			|||||||
  resolved "https://registry.yarnpkg.com/extend/-/extend-3.0.2.tgz#f8b1136b4071fbd8eb140aff858b1019ec2915fa"
 | 
					  resolved "https://registry.yarnpkg.com/extend/-/extend-3.0.2.tgz#f8b1136b4071fbd8eb140aff858b1019ec2915fa"
 | 
				
			||||||
  integrity sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==
 | 
					  integrity sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					extract-zip@^2.0.1:
 | 
				
			||||||
 | 
					  version "2.0.1"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/extract-zip/-/extract-zip-2.0.1.tgz#663dca56fe46df890d5f131ef4a06d22bb8ba13a"
 | 
				
			||||||
 | 
					  integrity sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg==
 | 
				
			||||||
 | 
					  dependencies:
 | 
				
			||||||
 | 
					    debug "^4.1.1"
 | 
				
			||||||
 | 
					    get-stream "^5.1.0"
 | 
				
			||||||
 | 
					    yauzl "^2.10.0"
 | 
				
			||||||
 | 
					  optionalDependencies:
 | 
				
			||||||
 | 
					    "@types/yauzl" "^2.9.1"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extsprintf@1.3.0:
 | 
					extsprintf@1.3.0:
 | 
				
			||||||
  version "1.3.0"
 | 
					  version "1.3.0"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/extsprintf/-/extsprintf-1.3.0.tgz#96918440e3041a7a414f8c52e3c574eb3c3e1e05"
 | 
					  resolved "https://registry.yarnpkg.com/extsprintf/-/extsprintf-1.3.0.tgz#96918440e3041a7a414f8c52e3c574eb3c3e1e05"
 | 
				
			||||||
@@ -345,6 +417,13 @@ fast-levenshtein@~2.0.4:
 | 
				
			|||||||
  resolved "https://registry.yarnpkg.com/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz#3d8a5c66883a16a30ca8643e851f19baa7797917"
 | 
					  resolved "https://registry.yarnpkg.com/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz#3d8a5c66883a16a30ca8643e851f19baa7797917"
 | 
				
			||||||
  integrity sha1-PYpcZog6FqMMqGQ+hR8Zuqd5eRc=
 | 
					  integrity sha1-PYpcZog6FqMMqGQ+hR8Zuqd5eRc=
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fd-slicer@~1.1.0:
 | 
				
			||||||
 | 
					  version "1.1.0"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/fd-slicer/-/fd-slicer-1.1.0.tgz#25c7c89cb1f9077f8891bbe61d8f390eae256f1e"
 | 
				
			||||||
 | 
					  integrity sha1-JcfInLH5B3+IkbvmHY85Dq4lbx4=
 | 
				
			||||||
 | 
					  dependencies:
 | 
				
			||||||
 | 
					    pend "~1.2.0"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
finalhandler@~1.1.2:
 | 
					finalhandler@~1.1.2:
 | 
				
			||||||
  version "1.1.2"
 | 
					  version "1.1.2"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/finalhandler/-/finalhandler-1.1.2.tgz#b7e7d000ffd11938d0fdb053506f6ebabe9f587d"
 | 
					  resolved "https://registry.yarnpkg.com/finalhandler/-/finalhandler-1.1.2.tgz#b7e7d000ffd11938d0fdb053506f6ebabe9f587d"
 | 
				
			||||||
@@ -382,6 +461,18 @@ fresh@0.5.2:
 | 
				
			|||||||
  resolved "https://registry.yarnpkg.com/fresh/-/fresh-0.5.2.tgz#3d8cadd90d976569fa835ab1f8e4b23a105605a7"
 | 
					  resolved "https://registry.yarnpkg.com/fresh/-/fresh-0.5.2.tgz#3d8cadd90d976569fa835ab1f8e4b23a105605a7"
 | 
				
			||||||
  integrity sha1-PYyt2Q2XZWn6g1qx+OSyOhBWBac=
 | 
					  integrity sha1-PYyt2Q2XZWn6g1qx+OSyOhBWBac=
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fs.realpath@^1.0.0:
 | 
				
			||||||
 | 
					  version "1.0.0"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/fs.realpath/-/fs.realpath-1.0.0.tgz#1504ad2523158caa40db4a2787cb01411994ea4f"
 | 
				
			||||||
 | 
					  integrity sha1-FQStJSMVjKpA20onh8sBQRmU6k8=
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					get-stream@^5.1.0:
 | 
				
			||||||
 | 
					  version "5.2.0"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/get-stream/-/get-stream-5.2.0.tgz#4966a1795ee5ace65e706c4b7beb71257d6e22d3"
 | 
				
			||||||
 | 
					  integrity sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA==
 | 
				
			||||||
 | 
					  dependencies:
 | 
				
			||||||
 | 
					    pump "^3.0.0"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
getpass@^0.1.1:
 | 
					getpass@^0.1.1:
 | 
				
			||||||
  version "0.1.7"
 | 
					  version "0.1.7"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/getpass/-/getpass-0.1.7.tgz#5eff8e3e684d569ae4cb2b1282604e8ba62149fa"
 | 
					  resolved "https://registry.yarnpkg.com/getpass/-/getpass-0.1.7.tgz#5eff8e3e684d569ae4cb2b1282604e8ba62149fa"
 | 
				
			||||||
@@ -389,6 +480,23 @@ getpass@^0.1.1:
 | 
				
			|||||||
  dependencies:
 | 
					  dependencies:
 | 
				
			||||||
    assert-plus "^1.0.0"
 | 
					    assert-plus "^1.0.0"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					glob@^7.1.3:
 | 
				
			||||||
 | 
					  version "7.1.6"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/glob/-/glob-7.1.6.tgz#141f33b81a7c2492e125594307480c46679278a6"
 | 
				
			||||||
 | 
					  integrity sha512-LwaxwyZ72Lk7vZINtNNrywX0ZuLyStrdDtabefZKAY5ZGJhVtgdznluResxNmPitE0SAO+O26sWTHeKSI2wMBA==
 | 
				
			||||||
 | 
					  dependencies:
 | 
				
			||||||
 | 
					    fs.realpath "^1.0.0"
 | 
				
			||||||
 | 
					    inflight "^1.0.4"
 | 
				
			||||||
 | 
					    inherits "2"
 | 
				
			||||||
 | 
					    minimatch "^3.0.4"
 | 
				
			||||||
 | 
					    once "^1.3.0"
 | 
				
			||||||
 | 
					    path-is-absolute "^1.0.0"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					graceful-fs@^4.1.11:
 | 
				
			||||||
 | 
					  version "4.2.4"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.4.tgz#2256bde14d3632958c465ebc96dc467ca07a29fb"
 | 
				
			||||||
 | 
					  integrity sha512-WjKPNJF79dtJAVniUlGGWHYGz2jWxT6VhN/4m1NdkbZ2nOsEF+cI1Edgql5zCRhs/VsQYRvrXctxktVXZUkixw==
 | 
				
			||||||
 | 
					
 | 
				
			||||||
har-schema@^2.0.0:
 | 
					har-schema@^2.0.0:
 | 
				
			||||||
  version "2.0.0"
 | 
					  version "2.0.0"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/har-schema/-/har-schema-2.0.0.tgz#a94c2224ebcac04782a0d9035521f24735b7ec92"
 | 
					  resolved "https://registry.yarnpkg.com/har-schema/-/har-schema-2.0.0.tgz#a94c2224ebcac04782a0d9035521f24735b7ec92"
 | 
				
			||||||
@@ -440,6 +548,14 @@ http-signature@~1.2.0:
 | 
				
			|||||||
    jsprim "^1.2.2"
 | 
					    jsprim "^1.2.2"
 | 
				
			||||||
    sshpk "^1.7.0"
 | 
					    sshpk "^1.7.0"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					https-proxy-agent@^5.0.0:
 | 
				
			||||||
 | 
					  version "5.0.0"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-5.0.0.tgz#e2a90542abb68a762e0a0850f6c9edadfd8506b2"
 | 
				
			||||||
 | 
					  integrity sha512-EkYm5BcKUGiduxzSt3Eppko+PiNWNEpa4ySk9vTC6wDsQJW9rHSa+UhGNJoRYp7bz6Ht1eaRIa6QaJqO5rCFbA==
 | 
				
			||||||
 | 
					  dependencies:
 | 
				
			||||||
 | 
					    agent-base "6"
 | 
				
			||||||
 | 
					    debug "4"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
iconv-lite@0.4.24:
 | 
					iconv-lite@0.4.24:
 | 
				
			||||||
  version "0.4.24"
 | 
					  version "0.4.24"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/iconv-lite/-/iconv-lite-0.4.24.tgz#2022b4b25fbddc21d2f524974a474aafe733908b"
 | 
					  resolved "https://registry.yarnpkg.com/iconv-lite/-/iconv-lite-0.4.24.tgz#2022b4b25fbddc21d2f524974a474aafe733908b"
 | 
				
			||||||
@@ -447,16 +563,24 @@ iconv-lite@0.4.24:
 | 
				
			|||||||
  dependencies:
 | 
					  dependencies:
 | 
				
			||||||
    safer-buffer ">= 2.1.2 < 3"
 | 
					    safer-buffer ">= 2.1.2 < 3"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inflight@^1.0.4:
 | 
				
			||||||
 | 
					  version "1.0.6"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/inflight/-/inflight-1.0.6.tgz#49bd6331d7d02d0c09bc910a1075ba8165b56df9"
 | 
				
			||||||
 | 
					  integrity sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk=
 | 
				
			||||||
 | 
					  dependencies:
 | 
				
			||||||
 | 
					    once "^1.3.0"
 | 
				
			||||||
 | 
					    wrappy "1"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inherits@2, inherits@2.0.4:
 | 
				
			||||||
 | 
					  version "2.0.4"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.4.tgz#0fa2c64f932917c3433a0ded55363aae37416b7c"
 | 
				
			||||||
 | 
					  integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==
 | 
				
			||||||
 | 
					
 | 
				
			||||||
inherits@2.0.3:
 | 
					inherits@2.0.3:
 | 
				
			||||||
  version "2.0.3"
 | 
					  version "2.0.3"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.3.tgz#633c2c83e3da42a502f52466022480f4208261de"
 | 
					  resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.3.tgz#633c2c83e3da42a502f52466022480f4208261de"
 | 
				
			||||||
  integrity sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=
 | 
					  integrity sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=
 | 
				
			||||||
 | 
					
 | 
				
			||||||
inherits@2.0.4:
 | 
					 | 
				
			||||||
  version "2.0.4"
 | 
					 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.4.tgz#0fa2c64f932917c3433a0ded55363aae37416b7c"
 | 
					 | 
				
			||||||
  integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
ip-regex@^2.1.0:
 | 
					ip-regex@^2.1.0:
 | 
				
			||||||
  version "2.1.0"
 | 
					  version "2.1.0"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/ip-regex/-/ip-regex-2.1.0.tgz#fa78bf5d2e6913c911ce9f819ee5146bb6d844e9"
 | 
					  resolved "https://registry.yarnpkg.com/ip-regex/-/ip-regex-2.1.0.tgz#fa78bf5d2e6913c911ce9f819ee5146bb6d844e9"
 | 
				
			||||||
@@ -477,6 +601,11 @@ isstream@~0.1.2:
 | 
				
			|||||||
  resolved "https://registry.yarnpkg.com/isstream/-/isstream-0.1.2.tgz#47e63f7af55afa6f92e1500e690eb8b8529c099a"
 | 
					  resolved "https://registry.yarnpkg.com/isstream/-/isstream-0.1.2.tgz#47e63f7af55afa6f92e1500e690eb8b8529c099a"
 | 
				
			||||||
  integrity sha1-R+Y/evVa+m+S4VAOaQ64uFKcCZo=
 | 
					  integrity sha1-R+Y/evVa+m+S4VAOaQ64uFKcCZo=
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					jpeg-js@^0.4.2:
 | 
				
			||||||
 | 
					  version "0.4.2"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/jpeg-js/-/jpeg-js-0.4.2.tgz#8b345b1ae4abde64c2da2fe67ea216a114ac279d"
 | 
				
			||||||
 | 
					  integrity sha512-+az2gi/hvex7eLTMTlbRLOhH6P6WFdk2ITI8HJsaH2VqYO0I594zXSYEP+tf4FW+8Cy68ScDXoAsQdyQanv3sw==
 | 
				
			||||||
 | 
					
 | 
				
			||||||
jsbn@~0.1.0:
 | 
					jsbn@~0.1.0:
 | 
				
			||||||
  version "0.1.1"
 | 
					  version "0.1.1"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/jsbn/-/jsbn-0.1.1.tgz#a5e654c2e5a2deb5f201d96cefbca80c0ef2f513"
 | 
					  resolved "https://registry.yarnpkg.com/jsbn/-/jsbn-0.1.1.tgz#a5e654c2e5a2deb5f201d96cefbca80c0ef2f513"
 | 
				
			||||||
@@ -589,6 +718,18 @@ mime@1.6.0:
 | 
				
			|||||||
  resolved "https://registry.yarnpkg.com/mime/-/mime-1.6.0.tgz#32cd9e5c64553bd58d19a568af452acff04981b1"
 | 
					  resolved "https://registry.yarnpkg.com/mime/-/mime-1.6.0.tgz#32cd9e5c64553bd58d19a568af452acff04981b1"
 | 
				
			||||||
  integrity sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==
 | 
					  integrity sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					mime@^2.4.6:
 | 
				
			||||||
 | 
					  version "2.4.6"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/mime/-/mime-2.4.6.tgz#e5b407c90db442f2beb5b162373d07b69affa4d1"
 | 
				
			||||||
 | 
					  integrity sha512-RZKhC3EmpBchfTGBVb8fb+RL2cWyw/32lshnsETttkBAyAUXSGHxbEJWWRXc751DrIxG1q04b8QwMbAwkRPpUA==
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					minimatch@^3.0.4:
 | 
				
			||||||
 | 
					  version "3.0.4"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.0.4.tgz#5166e286457f03306064be5497e8dbb0c3d32083"
 | 
				
			||||||
 | 
					  integrity sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==
 | 
				
			||||||
 | 
					  dependencies:
 | 
				
			||||||
 | 
					    brace-expansion "^1.1.7"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ms@2.0.0:
 | 
					ms@2.0.0:
 | 
				
			||||||
  version "2.0.0"
 | 
					  version "2.0.0"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/ms/-/ms-2.0.0.tgz#5608aeadfc00be6c2901df5f9861788de0d597c8"
 | 
					  resolved "https://registry.yarnpkg.com/ms/-/ms-2.0.0.tgz#5608aeadfc00be6c2901df5f9861788de0d597c8"
 | 
				
			||||||
@@ -599,11 +740,21 @@ ms@2.1.1:
 | 
				
			|||||||
  resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.1.tgz#30a5864eb3ebb0a66f2ebe6d727af06a09d86e0a"
 | 
					  resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.1.tgz#30a5864eb3ebb0a66f2ebe6d727af06a09d86e0a"
 | 
				
			||||||
  integrity sha512-tgp+dl5cGk28utYktBsrFqA7HKgrhgPsg6Z/EfhWI4gl1Hwq8B/GmY/0oXZ6nF8hDVesS/FpnYaD/kOWhYQvyg==
 | 
					  integrity sha512-tgp+dl5cGk28utYktBsrFqA7HKgrhgPsg6Z/EfhWI4gl1Hwq8B/GmY/0oXZ6nF8hDVesS/FpnYaD/kOWhYQvyg==
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ms@2.1.2:
 | 
				
			||||||
 | 
					  version "2.1.2"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.2.tgz#d09d1f357b443f493382a8eb3ccd183872ae6009"
 | 
				
			||||||
 | 
					  integrity sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==
 | 
				
			||||||
 | 
					
 | 
				
			||||||
negotiator@0.6.2:
 | 
					negotiator@0.6.2:
 | 
				
			||||||
  version "0.6.2"
 | 
					  version "0.6.2"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/negotiator/-/negotiator-0.6.2.tgz#feacf7ccf525a77ae9634436a64883ffeca346fb"
 | 
					  resolved "https://registry.yarnpkg.com/negotiator/-/negotiator-0.6.2.tgz#feacf7ccf525a77ae9634436a64883ffeca346fb"
 | 
				
			||||||
  integrity sha512-hZXc7K2e+PgeI1eDBe/10Ard4ekbfrrqG8Ep+8Jmf4JID2bNg7NvCPOZN+kfF574pFQI7mum2AUqDidoKqcTOw==
 | 
					  integrity sha512-hZXc7K2e+PgeI1eDBe/10Ard4ekbfrrqG8Ep+8Jmf4JID2bNg7NvCPOZN+kfF574pFQI7mum2AUqDidoKqcTOw==
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					node-fetch@^2.6.1:
 | 
				
			||||||
 | 
					  version "2.6.1"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.6.1.tgz#045bd323631f76ed2e2b55573394416b639a0052"
 | 
				
			||||||
 | 
					  integrity sha512-V4aYg89jEoVRxRb2fJdAg8FHvI7cEyYdVAh94HH0UIK8oJxUfkjlDQN9RbMx+bEjP7+ggMiFRprSti032Oipxw==
 | 
				
			||||||
 | 
					
 | 
				
			||||||
nwsapi@^2.1.4:
 | 
					nwsapi@^2.1.4:
 | 
				
			||||||
  version "2.1.4"
 | 
					  version "2.1.4"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/nwsapi/-/nwsapi-2.1.4.tgz#e006a878db23636f8e8a67d33ca0e4edf61a842f"
 | 
					  resolved "https://registry.yarnpkg.com/nwsapi/-/nwsapi-2.1.4.tgz#e006a878db23636f8e8a67d33ca0e4edf61a842f"
 | 
				
			||||||
@@ -621,6 +772,13 @@ on-finished@~2.3.0:
 | 
				
			|||||||
  dependencies:
 | 
					  dependencies:
 | 
				
			||||||
    ee-first "1.1.1"
 | 
					    ee-first "1.1.1"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					once@^1.3.0, once@^1.3.1, once@^1.4.0:
 | 
				
			||||||
 | 
					  version "1.4.0"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/once/-/once-1.4.0.tgz#583b1aa775961d4b113ac17d9c50baef9dd76bd1"
 | 
				
			||||||
 | 
					  integrity sha1-WDsap3WWHUsROsF9nFC6753Xa9E=
 | 
				
			||||||
 | 
					  dependencies:
 | 
				
			||||||
 | 
					    wrappy "1"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
optionator@^0.8.1:
 | 
					optionator@^0.8.1:
 | 
				
			||||||
  version "0.8.2"
 | 
					  version "0.8.2"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/optionator/-/optionator-0.8.2.tgz#364c5e409d3f4d6301d6c0b4c05bba50180aeb64"
 | 
					  resolved "https://registry.yarnpkg.com/optionator/-/optionator-0.8.2.tgz#364c5e409d3f4d6301d6c0b4c05bba50180aeb64"
 | 
				
			||||||
@@ -643,26 +801,72 @@ parseurl@~1.3.3:
 | 
				
			|||||||
  resolved "https://registry.yarnpkg.com/parseurl/-/parseurl-1.3.3.tgz#9da19e7bee8d12dff0513ed5b76957793bc2e8d4"
 | 
					  resolved "https://registry.yarnpkg.com/parseurl/-/parseurl-1.3.3.tgz#9da19e7bee8d12dff0513ed5b76957793bc2e8d4"
 | 
				
			||||||
  integrity sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==
 | 
					  integrity sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					path-is-absolute@^1.0.0:
 | 
				
			||||||
 | 
					  version "1.0.1"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/path-is-absolute/-/path-is-absolute-1.0.1.tgz#174b9268735534ffbc7ace6bf53a5a9e1b5c5f5f"
 | 
				
			||||||
 | 
					  integrity sha1-F0uSaHNVNP+8es5r9TpanhtcX18=
 | 
				
			||||||
 | 
					
 | 
				
			||||||
path-to-regexp@0.1.7:
 | 
					path-to-regexp@0.1.7:
 | 
				
			||||||
  version "0.1.7"
 | 
					  version "0.1.7"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/path-to-regexp/-/path-to-regexp-0.1.7.tgz#df604178005f522f15eb4490e7247a1bfaa67f8c"
 | 
					  resolved "https://registry.yarnpkg.com/path-to-regexp/-/path-to-regexp-0.1.7.tgz#df604178005f522f15eb4490e7247a1bfaa67f8c"
 | 
				
			||||||
  integrity sha1-32BBeABfUi8V60SQ5yR6G/qmf4w=
 | 
					  integrity sha1-32BBeABfUi8V60SQ5yR6G/qmf4w=
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pend@~1.2.0:
 | 
				
			||||||
 | 
					  version "1.2.0"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/pend/-/pend-1.2.0.tgz#7a57eb550a6783f9115331fcf4663d5c8e007a50"
 | 
				
			||||||
 | 
					  integrity sha1-elfrVQpng/kRUzH89GY9XI4AelA=
 | 
				
			||||||
 | 
					
 | 
				
			||||||
performance-now@^2.1.0:
 | 
					performance-now@^2.1.0:
 | 
				
			||||||
  version "2.1.0"
 | 
					  version "2.1.0"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/performance-now/-/performance-now-2.1.0.tgz#6309f4e0e5fa913ec1c69307ae364b4b377c9e7b"
 | 
					  resolved "https://registry.yarnpkg.com/performance-now/-/performance-now-2.1.0.tgz#6309f4e0e5fa913ec1c69307ae364b4b377c9e7b"
 | 
				
			||||||
  integrity sha1-Ywn04OX6kT7BxpMHrjZLSzd8nns=
 | 
					  integrity sha1-Ywn04OX6kT7BxpMHrjZLSzd8nns=
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					playwright@^1.5.2:
 | 
				
			||||||
 | 
					  version "1.5.2"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/playwright/-/playwright-1.5.2.tgz#e127142cba86c918fad9f68315db5e79524af64c"
 | 
				
			||||||
 | 
					  integrity sha512-on7IEui47bDZta0txL86QKMDSgjbxERkLc5N0+lU2zajIfN/Ld6vMl+xiROEUPlT/QtqVekq9pTDGdcc0yScMQ==
 | 
				
			||||||
 | 
					  dependencies:
 | 
				
			||||||
 | 
					    debug "^4.1.1"
 | 
				
			||||||
 | 
					    extract-zip "^2.0.1"
 | 
				
			||||||
 | 
					    https-proxy-agent "^5.0.0"
 | 
				
			||||||
 | 
					    jpeg-js "^0.4.2"
 | 
				
			||||||
 | 
					    mime "^2.4.6"
 | 
				
			||||||
 | 
					    pngjs "^5.0.0"
 | 
				
			||||||
 | 
					    progress "^2.0.3"
 | 
				
			||||||
 | 
					    proper-lockfile "^4.1.1"
 | 
				
			||||||
 | 
					    proxy-from-env "^1.1.0"
 | 
				
			||||||
 | 
					    rimraf "^3.0.2"
 | 
				
			||||||
 | 
					    ws "^7.3.1"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pn@^1.1.0:
 | 
					pn@^1.1.0:
 | 
				
			||||||
  version "1.1.0"
 | 
					  version "1.1.0"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/pn/-/pn-1.1.0.tgz#e2f4cef0e219f463c179ab37463e4e1ecdccbafb"
 | 
					  resolved "https://registry.yarnpkg.com/pn/-/pn-1.1.0.tgz#e2f4cef0e219f463c179ab37463e4e1ecdccbafb"
 | 
				
			||||||
  integrity sha512-2qHaIQr2VLRFoxe2nASzsV6ef4yOOH+Fi9FBOVH6cqeSgUnoyySPZkxzLuzd+RYOQTRpROA0ztTMqxROKSb/nA==
 | 
					  integrity sha512-2qHaIQr2VLRFoxe2nASzsV6ef4yOOH+Fi9FBOVH6cqeSgUnoyySPZkxzLuzd+RYOQTRpROA0ztTMqxROKSb/nA==
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pngjs@^5.0.0:
 | 
				
			||||||
 | 
					  version "5.0.0"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/pngjs/-/pngjs-5.0.0.tgz#e79dd2b215767fd9c04561c01236df960bce7fbb"
 | 
				
			||||||
 | 
					  integrity sha512-40QW5YalBNfQo5yRYmiw7Yz6TKKVr3h6970B2YE+3fQpsWcrbj1PzJgxeJ19DRQjhMbKPIuMY8rFaXc8moolVw==
 | 
				
			||||||
 | 
					
 | 
				
			||||||
prelude-ls@~1.1.2:
 | 
					prelude-ls@~1.1.2:
 | 
				
			||||||
  version "1.1.2"
 | 
					  version "1.1.2"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/prelude-ls/-/prelude-ls-1.1.2.tgz#21932a549f5e52ffd9a827f570e04be62a97da54"
 | 
					  resolved "https://registry.yarnpkg.com/prelude-ls/-/prelude-ls-1.1.2.tgz#21932a549f5e52ffd9a827f570e04be62a97da54"
 | 
				
			||||||
  integrity sha1-IZMqVJ9eUv/ZqCf1cOBL5iqX2lQ=
 | 
					  integrity sha1-IZMqVJ9eUv/ZqCf1cOBL5iqX2lQ=
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					progress@^2.0.3:
 | 
				
			||||||
 | 
					  version "2.0.3"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/progress/-/progress-2.0.3.tgz#7e8cf8d8f5b8f239c1bc68beb4eb78567d572ef8"
 | 
				
			||||||
 | 
					  integrity sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					proper-lockfile@^4.1.1:
 | 
				
			||||||
 | 
					  version "4.1.1"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/proper-lockfile/-/proper-lockfile-4.1.1.tgz#284cf9db9e30a90e647afad69deb7cb06881262c"
 | 
				
			||||||
 | 
					  integrity sha512-1w6rxXodisVpn7QYvLk706mzprPTAPCYAqxMvctmPN3ekuRk/kuGkGc82pangZiAt4R3lwSuUzheTTn0/Yb7Zg==
 | 
				
			||||||
 | 
					  dependencies:
 | 
				
			||||||
 | 
					    graceful-fs "^4.1.11"
 | 
				
			||||||
 | 
					    retry "^0.12.0"
 | 
				
			||||||
 | 
					    signal-exit "^3.0.2"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
proxy-addr@~2.0.5:
 | 
					proxy-addr@~2.0.5:
 | 
				
			||||||
  version "2.0.5"
 | 
					  version "2.0.5"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/proxy-addr/-/proxy-addr-2.0.5.tgz#34cbd64a2d81f4b1fd21e76f9f06c8a45299ee34"
 | 
					  resolved "https://registry.yarnpkg.com/proxy-addr/-/proxy-addr-2.0.5.tgz#34cbd64a2d81f4b1fd21e76f9f06c8a45299ee34"
 | 
				
			||||||
@@ -671,11 +875,24 @@ proxy-addr@~2.0.5:
 | 
				
			|||||||
    forwarded "~0.1.2"
 | 
					    forwarded "~0.1.2"
 | 
				
			||||||
    ipaddr.js "1.9.0"
 | 
					    ipaddr.js "1.9.0"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					proxy-from-env@^1.1.0:
 | 
				
			||||||
 | 
					  version "1.1.0"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/proxy-from-env/-/proxy-from-env-1.1.0.tgz#e102f16ca355424865755d2c9e8ea4f24d58c3e2"
 | 
				
			||||||
 | 
					  integrity sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==
 | 
				
			||||||
 | 
					
 | 
				
			||||||
psl@^1.1.24, psl@^1.1.28:
 | 
					psl@^1.1.24, psl@^1.1.28:
 | 
				
			||||||
  version "1.3.0"
 | 
					  version "1.3.0"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/psl/-/psl-1.3.0.tgz#e1ebf6a3b5564fa8376f3da2275da76d875ca1bd"
 | 
					  resolved "https://registry.yarnpkg.com/psl/-/psl-1.3.0.tgz#e1ebf6a3b5564fa8376f3da2275da76d875ca1bd"
 | 
				
			||||||
  integrity sha512-avHdspHO+9rQTLbv1RO+MPYeP/SzsCoxofjVnHanETfQhTJrmB0HlDoW+EiN/R+C0BZ+gERab9NY0lPN2TxNag==
 | 
					  integrity sha512-avHdspHO+9rQTLbv1RO+MPYeP/SzsCoxofjVnHanETfQhTJrmB0HlDoW+EiN/R+C0BZ+gERab9NY0lPN2TxNag==
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pump@^3.0.0:
 | 
				
			||||||
 | 
					  version "3.0.0"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/pump/-/pump-3.0.0.tgz#b4a2116815bde2f4e1ea602354e8c75565107a64"
 | 
				
			||||||
 | 
					  integrity sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==
 | 
				
			||||||
 | 
					  dependencies:
 | 
				
			||||||
 | 
					    end-of-stream "^1.1.0"
 | 
				
			||||||
 | 
					    once "^1.3.1"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
punycode@^1.4.1:
 | 
					punycode@^1.4.1:
 | 
				
			||||||
  version "1.4.1"
 | 
					  version "1.4.1"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/punycode/-/punycode-1.4.1.tgz#c0d5a63b2718800ad8e1eb0fa5269c84dd41845e"
 | 
					  resolved "https://registry.yarnpkg.com/punycode/-/punycode-1.4.1.tgz#c0d5a63b2718800ad8e1eb0fa5269c84dd41845e"
 | 
				
			||||||
@@ -711,10 +928,6 @@ raw-body@2.4.0:
 | 
				
			|||||||
    iconv-lite "0.4.24"
 | 
					    iconv-lite "0.4.24"
 | 
				
			||||||
    unpipe "1.0.0"
 | 
					    unpipe "1.0.0"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
"readability@https://github.com/mozilla/readability":
 | 
					 | 
				
			||||||
  version "0.3.0"
 | 
					 | 
				
			||||||
  resolved "https://github.com/mozilla/readability#d5eea06a0095b3138dbd1f6233f656d690200509"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
request-promise-core@1.1.2:
 | 
					request-promise-core@1.1.2:
 | 
				
			||||||
  version "1.1.2"
 | 
					  version "1.1.2"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/request-promise-core/-/request-promise-core-1.1.2.tgz#339f6aababcafdb31c799ff158700336301d3346"
 | 
					  resolved "https://registry.yarnpkg.com/request-promise-core/-/request-promise-core-1.1.2.tgz#339f6aababcafdb31c799ff158700336301d3346"
 | 
				
			||||||
@@ -757,6 +970,18 @@ request@^2.88.0:
 | 
				
			|||||||
    tunnel-agent "^0.6.0"
 | 
					    tunnel-agent "^0.6.0"
 | 
				
			||||||
    uuid "^3.3.2"
 | 
					    uuid "^3.3.2"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					retry@^0.12.0:
 | 
				
			||||||
 | 
					  version "0.12.0"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/retry/-/retry-0.12.0.tgz#1b42a6266a21f07421d1b0b54b7dc167b01c013b"
 | 
				
			||||||
 | 
					  integrity sha1-G0KmJmoh8HQh0bC1S33BZ7AcATs=
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					rimraf@^3.0.2:
 | 
				
			||||||
 | 
					  version "3.0.2"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-3.0.2.tgz#f1a5402ba6220ad52cc1282bac1ae3aa49fd061a"
 | 
				
			||||||
 | 
					  integrity sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==
 | 
				
			||||||
 | 
					  dependencies:
 | 
				
			||||||
 | 
					    glob "^7.1.3"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
safe-buffer@5.1.2:
 | 
					safe-buffer@5.1.2:
 | 
				
			||||||
  version "5.1.2"
 | 
					  version "5.1.2"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.1.2.tgz#991ec69d296e0313747d59bdfd2b745c35f8828d"
 | 
					  resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.1.2.tgz#991ec69d296e0313747d59bdfd2b745c35f8828d"
 | 
				
			||||||
@@ -813,6 +1038,11 @@ setprototypeof@1.1.1:
 | 
				
			|||||||
  resolved "https://registry.yarnpkg.com/setprototypeof/-/setprototypeof-1.1.1.tgz#7e95acb24aa92f5885e0abef5ba131330d4ae683"
 | 
					  resolved "https://registry.yarnpkg.com/setprototypeof/-/setprototypeof-1.1.1.tgz#7e95acb24aa92f5885e0abef5ba131330d4ae683"
 | 
				
			||||||
  integrity sha512-JvdAWfbXeIGaZ9cILp38HntZSFSo3mWg6xGcJJsd+d4aRMOqauag1C63dJfDw7OaMYwEbHMOxEZ1lqVRYP2OAw==
 | 
					  integrity sha512-JvdAWfbXeIGaZ9cILp38HntZSFSo3mWg6xGcJJsd+d4aRMOqauag1C63dJfDw7OaMYwEbHMOxEZ1lqVRYP2OAw==
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					signal-exit@^3.0.2:
 | 
				
			||||||
 | 
					  version "3.0.3"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/signal-exit/-/signal-exit-3.0.3.tgz#a1410c2edd8f077b08b4e253c8eacfcaf057461c"
 | 
				
			||||||
 | 
					  integrity sha512-VUJ49FC8U1OxwZLxIbTTrDvLnf/6TDgxZcK8wxR8zs13xpx7xbG60ndBlhNrFi2EMuFRoeDoJO7wthSLq42EjA==
 | 
				
			||||||
 | 
					
 | 
				
			||||||
source-map@~0.6.1:
 | 
					source-map@~0.6.1:
 | 
				
			||||||
  version "0.6.1"
 | 
					  version "0.6.1"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.6.1.tgz#74722af32e9614e9c287a8d0bbde48b5e2f1a263"
 | 
					  resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.6.1.tgz#74722af32e9614e9c287a8d0bbde48b5e2f1a263"
 | 
				
			||||||
@@ -995,6 +1225,11 @@ wordwrap@~1.0.0:
 | 
				
			|||||||
  resolved "https://registry.yarnpkg.com/wordwrap/-/wordwrap-1.0.0.tgz#27584810891456a4171c8d0226441ade90cbcaeb"
 | 
					  resolved "https://registry.yarnpkg.com/wordwrap/-/wordwrap-1.0.0.tgz#27584810891456a4171c8d0226441ade90cbcaeb"
 | 
				
			||||||
  integrity sha1-J1hIEIkUVqQXHI0CJkQa3pDLyus=
 | 
					  integrity sha1-J1hIEIkUVqQXHI0CJkQa3pDLyus=
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					wrappy@1:
 | 
				
			||||||
 | 
					  version "1.0.2"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/wrappy/-/wrappy-1.0.2.tgz#b5243d8f3ec1aa35f1364605bc0d1036e30ab69f"
 | 
				
			||||||
 | 
					  integrity sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ws@^7.0.0:
 | 
					ws@^7.0.0:
 | 
				
			||||||
  version "7.1.2"
 | 
					  version "7.1.2"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/ws/-/ws-7.1.2.tgz#c672d1629de8bb27a9699eb599be47aeeedd8f73"
 | 
					  resolved "https://registry.yarnpkg.com/ws/-/ws-7.1.2.tgz#c672d1629de8bb27a9699eb599be47aeeedd8f73"
 | 
				
			||||||
@@ -1002,6 +1237,11 @@ ws@^7.0.0:
 | 
				
			|||||||
  dependencies:
 | 
					  dependencies:
 | 
				
			||||||
    async-limiter "^1.0.0"
 | 
					    async-limiter "^1.0.0"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ws@^7.3.1:
 | 
				
			||||||
 | 
					  version "7.4.0"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/ws/-/ws-7.4.0.tgz#a5dd76a24197940d4a8bb9e0e152bb4503764da7"
 | 
				
			||||||
 | 
					  integrity sha512-kyFwXuV/5ymf+IXhS6f0+eAFvydbaBW3zjpT6hUdAh/hbVjTIB5EHBGi0bPoCLSK2wcuz3BrEkB9LrYv1Nm4NQ==
 | 
				
			||||||
 | 
					
 | 
				
			||||||
xml-name-validator@^3.0.0:
 | 
					xml-name-validator@^3.0.0:
 | 
				
			||||||
  version "3.0.0"
 | 
					  version "3.0.0"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/xml-name-validator/-/xml-name-validator-3.0.0.tgz#6ae73e06de4d8c6e47f9fb181f78d648ad457c6a"
 | 
					  resolved "https://registry.yarnpkg.com/xml-name-validator/-/xml-name-validator-3.0.0.tgz#6ae73e06de4d8c6e47f9fb181f78d648ad457c6a"
 | 
				
			||||||
@@ -1011,3 +1251,11 @@ xmlchars@^2.1.1:
 | 
				
			|||||||
  version "2.1.1"
 | 
					  version "2.1.1"
 | 
				
			||||||
  resolved "https://registry.yarnpkg.com/xmlchars/-/xmlchars-2.1.1.tgz#ef1a81c05bff629c2280007f12daca21bd6f6c93"
 | 
					  resolved "https://registry.yarnpkg.com/xmlchars/-/xmlchars-2.1.1.tgz#ef1a81c05bff629c2280007f12daca21bd6f6c93"
 | 
				
			||||||
  integrity sha512-7hew1RPJ1iIuje/Y01bGD/mXokXxegAgVS+e+E0wSi2ILHQkYAH1+JXARwTjZSM4Z4Z+c73aKspEcqj+zPPL/w==
 | 
					  integrity sha512-7hew1RPJ1iIuje/Y01bGD/mXokXxegAgVS+e+E0wSi2ILHQkYAH1+JXARwTjZSM4Z4Z+c73aKspEcqj+zPPL/w==
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					yauzl@^2.10.0:
 | 
				
			||||||
 | 
					  version "2.10.0"
 | 
				
			||||||
 | 
					  resolved "https://registry.yarnpkg.com/yauzl/-/yauzl-2.10.0.tgz#c7eb17c93e112cb1086fa6d8e51fb0667b79a5f9"
 | 
				
			||||||
 | 
					  integrity sha1-x+sXyT4RLLEIb6bY5R+wZnt5pfk=
 | 
				
			||||||
 | 
					  dependencies:
 | 
				
			||||||
 | 
					    buffer-crc32 "~0.2.3"
 | 
				
			||||||
 | 
					    fd-slicer "~1.1.0"
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -5,13 +5,14 @@ import './Style-light.css';
 | 
				
			|||||||
import './Style-dark.css';
 | 
					import './Style-dark.css';
 | 
				
			||||||
import './fonts/Fonts.css';
 | 
					import './fonts/Fonts.css';
 | 
				
			||||||
import { ForwardDot } from './utils.js';
 | 
					import { ForwardDot } from './utils.js';
 | 
				
			||||||
import Feed from './Feed.js';
 | 
					 | 
				
			||||||
import Article from './Article.js';
 | 
					 | 
				
			||||||
import Comments from './Comments.js';
 | 
					 | 
				
			||||||
import Search from './Search.js';
 | 
					import Search from './Search.js';
 | 
				
			||||||
import Submit from './Submit.js';
 | 
					import Submit from './Submit.js';
 | 
				
			||||||
import Results from './Results.js';
 | 
					 | 
				
			||||||
import ScrollToTop from './ScrollToTop.js';
 | 
					import ScrollToTop from './ScrollToTop.js';
 | 
				
			||||||
 | 
					import Feed from './pages/Feed.js';
 | 
				
			||||||
 | 
					import Article from './pages/Article.js';
 | 
				
			||||||
 | 
					import Comments from './pages/Comments.js';
 | 
				
			||||||
 | 
					import Results from './pages/Results.js';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class App extends React.Component {
 | 
					class App extends React.Component {
 | 
				
			||||||
	constructor(props) {
 | 
						constructor(props) {
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										34
									
								
								webclient/src/components/StoryItem.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								webclient/src/components/StoryItem.js
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,34 @@
 | 
				
			|||||||
 | 
					import React from "react";
 | 
				
			||||||
 | 
					import { Link } from "react-router-dom";
 | 
				
			||||||
 | 
					import { sourceLink, infoLine, getLogoUrl } from "../utils.js";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export class StoryItem extends React.Component {
 | 
				
			||||||
 | 
						constructor(props) {
 | 
				
			||||||
 | 
							super(props);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						render() {
 | 
				
			||||||
 | 
							const story = this.props.story;
 | 
				
			||||||
 | 
							const { id, title } = story;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							return (
 | 
				
			||||||
 | 
								<div className="item" key={id}>
 | 
				
			||||||
 | 
									<div className="title">
 | 
				
			||||||
 | 
										<Link className="link" to={"/" + id}>
 | 
				
			||||||
 | 
											<img
 | 
				
			||||||
 | 
												className="source-logo"
 | 
				
			||||||
 | 
												src={getLogoUrl(story)}
 | 
				
			||||||
 | 
												alt="source logo"
 | 
				
			||||||
 | 
											/>
 | 
				
			||||||
 | 
											{" "}
 | 
				
			||||||
 | 
											{title}
 | 
				
			||||||
 | 
										</Link>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
										<span className="source">({sourceLink(story)})</span>
 | 
				
			||||||
 | 
									</div>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									{infoLine(story)}
 | 
				
			||||||
 | 
								</div>
 | 
				
			||||||
 | 
							);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -1,7 +1,7 @@
 | 
				
			|||||||
import React from 'react';
 | 
					import React from 'react';
 | 
				
			||||||
import { Helmet } from 'react-helmet';
 | 
					import { Helmet } from 'react-helmet';
 | 
				
			||||||
import localForage from 'localforage';
 | 
					import localForage from 'localforage';
 | 
				
			||||||
import { sourceLink, infoLine, ToggleDot } from './utils.js';
 | 
					import { sourceLink, infoLine, ToggleDot } from '../utils.js';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Article extends React.Component {
 | 
					class Article extends React.Component {
 | 
				
			||||||
	constructor(props) {
 | 
						constructor(props) {
 | 
				
			||||||
@@ -45,7 +45,7 @@ class Article extends React.Component {
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pConvert = (n) => {
 | 
						pConvert = (n) => {
 | 
				
			||||||
		this.setState({ pConv: [...this.state.pConv, n]});
 | 
							this.setState({ pConv: [...this.state.pConv, n] });
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	render() {
 | 
						render() {
 | 
				
			||||||
@@ -4,7 +4,7 @@ import { HashLink } from 'react-router-hash-link';
 | 
				
			|||||||
import { Helmet } from 'react-helmet';
 | 
					import { Helmet } from 'react-helmet';
 | 
				
			||||||
import moment from 'moment';
 | 
					import moment from 'moment';
 | 
				
			||||||
import localForage from 'localforage';
 | 
					import localForage from 'localforage';
 | 
				
			||||||
import { infoLine, ToggleDot } from './utils.js';
 | 
					import { infoLine, ToggleDot } from '../utils.js';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Article extends React.Component {
 | 
					class Article extends React.Component {
 | 
				
			||||||
	constructor(props) {
 | 
						constructor(props) {
 | 
				
			||||||
@@ -72,7 +72,7 @@ class Article extends React.Component {
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	displayComment(story, c, level) {
 | 
						displayComment(story, c, level) {
 | 
				
			||||||
		const cid = c.author+c.date;
 | 
							const cid = c.author + c.date;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		const collapsed = this.state.collapsed.includes(cid);
 | 
							const collapsed = this.state.collapsed.includes(cid);
 | 
				
			||||||
		const expanded = this.state.expanded.includes(cid);
 | 
							const expanded = this.state.expanded.includes(cid);
 | 
				
			||||||
@@ -85,18 +85,21 @@ class Article extends React.Component {
 | 
				
			|||||||
				<div className='info'>
 | 
									<div className='info'>
 | 
				
			||||||
					<p>
 | 
										<p>
 | 
				
			||||||
						{c.author === story.author ? '[OP]' : ''} {c.author || '[Deleted]'}
 | 
											{c.author === story.author ? '[OP]' : ''} {c.author || '[Deleted]'}
 | 
				
			||||||
						{' '} | <HashLink to={'#'+cid} id={cid}>{moment.unix(c.date).fromNow()}</HashLink>
 | 
											{' '} | <HashLink to={'#' + cid} id={cid}>{moment.unix(c.date).fromNow()}</HashLink>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
						{hidden || hasChildren &&
 | 
											{hasChildren && (
 | 
				
			||||||
 | 
												hidden ?
 | 
				
			||||||
 | 
													<span className='collapser expander pointer' onClick={() => this.expandComment(cid)}>+</span>
 | 
				
			||||||
 | 
													:
 | 
				
			||||||
								<span className='collapser pointer' onClick={() => this.collapseComment(cid)}>–</span>
 | 
													<span className='collapser pointer' onClick={() => this.collapseComment(cid)}>–</span>
 | 
				
			||||||
						}
 | 
											)}
 | 
				
			||||||
					</p>
 | 
										</p>
 | 
				
			||||||
				</div>
 | 
									</div>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
				<div className={collapsed ? 'text hidden' : 'text'} dangerouslySetInnerHTML={{ __html: c.text }} />
 | 
									<div className={collapsed ? 'text hidden' : 'text'} dangerouslySetInnerHTML={{ __html: c.text }} />
 | 
				
			||||||
 | 
					
 | 
				
			||||||
				{hidden && hasChildren ?
 | 
									{hidden && hasChildren ?
 | 
				
			||||||
					<div className='comment lined info pointer' onClick={() => this.expandComment(cid)}>[show {this.countComments(c)-1} more]</div>
 | 
										<div className='comment lined info pointer' onClick={() => this.expandComment(cid)}>[show {this.countComments(c) - 1} more]</div>
 | 
				
			||||||
					:
 | 
										:
 | 
				
			||||||
					c.comments.map(i => this.displayComment(story, i, level + 1))
 | 
										c.comments.map(i => this.displayComment(story, i, level + 1))
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
@@ -1,8 +1,7 @@
 | 
				
			|||||||
import React from 'react';
 | 
					import React from 'react';
 | 
				
			||||||
import { Link } from 'react-router-dom';
 | 
					 | 
				
			||||||
import { Helmet } from 'react-helmet';
 | 
					import { Helmet } from 'react-helmet';
 | 
				
			||||||
import localForage from 'localforage';
 | 
					import localForage from 'localforage';
 | 
				
			||||||
import { sourceLink, infoLine, logos } from './utils.js';
 | 
					import { StoryItem } from '../components/StoryItem.js';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Feed extends React.Component {
 | 
					class Feed extends React.Component {
 | 
				
			||||||
	constructor(props) {
 | 
						constructor(props) {
 | 
				
			||||||
@@ -22,19 +21,20 @@ class Feed extends React.Component {
 | 
				
			|||||||
					const updated = !this.state.stories || this.state.stories[0].id !== result.stories[0].id;
 | 
										const updated = !this.state.stories || this.state.stories[0].id !== result.stories[0].id;
 | 
				
			||||||
					console.log('updated:', updated);
 | 
										console.log('updated:', updated);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
					this.setState({ stories: result.stories });
 | 
										const { stories } = result;
 | 
				
			||||||
					localStorage.setItem('stories', JSON.stringify(result.stories));
 | 
										this.setState({ stories });
 | 
				
			||||||
 | 
										localStorage.setItem('stories', JSON.stringify(stories));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
					if (updated) {
 | 
										if (updated) {
 | 
				
			||||||
						localForage.clear();
 | 
											localForage.clear();
 | 
				
			||||||
						result.stories.forEach((x, i) => {
 | 
											stories.forEach((x, i) => {
 | 
				
			||||||
							fetch('/api/' + x.id)
 | 
												fetch('/api/' + x.id)
 | 
				
			||||||
								.then(res => res.json())
 | 
													.then(res => res.json())
 | 
				
			||||||
								.then(result => {
 | 
													.then(({ story }) => {
 | 
				
			||||||
									localForage.setItem(x.id, result.story)
 | 
														localForage.setItem(x.id, story)
 | 
				
			||||||
										.then(console.log('preloaded', x.id, x.title));
 | 
															.then(console.log('preloaded', x.id, x.title));
 | 
				
			||||||
									this.props.updateCache(x.id, result.story);
 | 
														this.props.updateCache(x.id, story);
 | 
				
			||||||
								}, error => {}
 | 
													}, error => { }
 | 
				
			||||||
								);
 | 
													);
 | 
				
			||||||
						});
 | 
											});
 | 
				
			||||||
					}
 | 
										}
 | 
				
			||||||
@@ -55,27 +55,7 @@ class Feed extends React.Component {
 | 
				
			|||||||
					<title>Feed - QotNews</title>
 | 
										<title>Feed - QotNews</title>
 | 
				
			||||||
				</Helmet>
 | 
									</Helmet>
 | 
				
			||||||
				{error && <p>Connection error?</p>}
 | 
									{error && <p>Connection error?</p>}
 | 
				
			||||||
				{stories ?
 | 
									{stories ? stories.map(story => <StoryItem story={story}></StoryItem>) : <p>loading...</p>}
 | 
				
			||||||
					<div>
 | 
					 | 
				
			||||||
						{stories.map(x =>
 | 
					 | 
				
			||||||
							<div className='item' key={x.id}>
 | 
					 | 
				
			||||||
								<div className='title'>
 | 
					 | 
				
			||||||
									<Link className='link' to={'/' + x.id}>
 | 
					 | 
				
			||||||
										<img className='source-logo' src={logos[x.source]} alt='source logo' /> {x.title}
 | 
					 | 
				
			||||||
									</Link>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
									<span className='source'>
 | 
					 | 
				
			||||||
										({sourceLink(x)})
 | 
					 | 
				
			||||||
									</span>
 | 
					 | 
				
			||||||
								</div>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
								{infoLine(x)}
 | 
					 | 
				
			||||||
							</div>
 | 
					 | 
				
			||||||
						)}
 | 
					 | 
				
			||||||
					</div>
 | 
					 | 
				
			||||||
				:
 | 
					 | 
				
			||||||
					<p>loading...</p>
 | 
					 | 
				
			||||||
				}
 | 
					 | 
				
			||||||
			</div>
 | 
								</div>
 | 
				
			||||||
		);
 | 
							);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
@@ -1,8 +1,7 @@
 | 
				
			|||||||
import React from 'react';
 | 
					import React from 'react';
 | 
				
			||||||
import { Link } from 'react-router-dom';
 | 
					 | 
				
			||||||
import { Helmet } from 'react-helmet';
 | 
					import { Helmet } from 'react-helmet';
 | 
				
			||||||
import { sourceLink, infoLine, logos } from './utils.js';
 | 
					 | 
				
			||||||
import AbortController from 'abort-controller';
 | 
					import AbortController from 'abort-controller';
 | 
				
			||||||
 | 
					import { StoryItem } from '../components/StoryItem.js';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Results extends React.Component {
 | 
					class Results extends React.Component {
 | 
				
			||||||
	constructor(props) {
 | 
						constructor(props) {
 | 
				
			||||||
@@ -63,25 +62,7 @@ class Results extends React.Component {
 | 
				
			|||||||
					<>
 | 
										<>
 | 
				
			||||||
						<p>Search results:</p>
 | 
											<p>Search results:</p>
 | 
				
			||||||
						<div className='comment lined'>
 | 
											<div className='comment lined'>
 | 
				
			||||||
							{stories.length ?
 | 
												{stories ? stories.map(story => <StoryItem story={story}></StoryItem>) : <p>loading...</p>}
 | 
				
			||||||
								stories.map(x =>
 | 
					 | 
				
			||||||
									<div className='item' key={x.id}>
 | 
					 | 
				
			||||||
										<div className='title'>
 | 
					 | 
				
			||||||
											<Link className='link' to={'/' + x.id}>
 | 
					 | 
				
			||||||
												<img className='source-logo' src={logos[x.source]} alt='source logo' /> {x.title}
 | 
					 | 
				
			||||||
											</Link>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
											<span className='source'>
 | 
					 | 
				
			||||||
												({sourceLink(x)})
 | 
					 | 
				
			||||||
											</span>
 | 
					 | 
				
			||||||
										</div>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
										{infoLine(x)}
 | 
					 | 
				
			||||||
									</div>
 | 
					 | 
				
			||||||
								)
 | 
					 | 
				
			||||||
							:
 | 
					 | 
				
			||||||
								<p>none</p>
 | 
					 | 
				
			||||||
							}
 | 
					 | 
				
			||||||
						</div>
 | 
											</div>
 | 
				
			||||||
					</>
 | 
										</>
 | 
				
			||||||
					:
 | 
										:
 | 
				
			||||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
		Reference in New Issue
	
	Block a user