forked from tanner/qotnews
		
	Compare commits
	
		
			56 Commits
		
	
	
		
			b80c1a5cb5
			...
			master
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| f1a30d0af2 | |||
| 9ec61ea5bc | |||
| bdc7a6c10d | |||
| 4858516b01 | |||
| f10e6063fc | |||
| 249a616531 | |||
| ab92bd5441 | |||
| 6b16a768a7 | |||
| 57de076fec | |||
| 074b898508 | |||
| f049d194ab | |||
| c2b9a1cb7a | |||
| 4435f49e17 | |||
| 494d89ac30 | |||
| e79fca6ecc | |||
| c65fb69092 | |||
| 632d028e4c | |||
| ea8e9e5a23 | |||
| 2838ea9b41 | |||
| f15d108971 | |||
| f777348af8 | |||
| 486404a413 | |||
| 7c9c07a4cf | |||
| 08d02f6013 | |||
| 1b54342702 | |||
| 9e9571a3c0 | |||
| dc83a70887 | |||
| 2e2c9ae837 | |||
| 61021d8f91 | |||
| e65047fead | |||
| 8e775c189f | |||
| 3d9274309a | |||
| 7bdbbf10b2 | |||
| 6aa0f78536 | |||
| bf3663bbec | |||
| e6589dc61c | |||
| 307e8349f3 | |||
| 04cd56daa8 | |||
| c80769def6 | |||
| ebd1ad2140 | |||
| 2cc7dd0d6d | |||
| 6e7cb86d2e | |||
| a25457254f | |||
| a693ea5342 | |||
| 7386e1d8b0 | |||
| f8e8597e3a | |||
| 55c282ee69 | |||
| 3f774a9e38 | |||
| dcedd4caa1 | |||
| 7a131ebd03 | |||
| 6f64401785 | |||
| 3ff917e806 | |||
| c9fb9bd5df | |||
| fd9c9c888d | |||
| 42dcf15374 | |||
| d8a0b77765 | 
							
								
								
									
										3
									
								
								.gitmodules
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.gitmodules
									
									
									
									
										vendored
									
									
								
							@@ -1,3 +0,0 @@
 | 
				
			|||||||
[submodule "readerserver/scraper/browser/scripts/bypass-paywalls-chrome"]
 | 
					 | 
				
			||||||
	path = readerserver/scraper/browser/scripts/bypass-paywalls-chrome
 | 
					 | 
				
			||||||
	url = https://github.com/iamadamdev/bypass-paywalls-chrome.git
 | 
					 | 
				
			||||||
							
								
								
									
										1
									
								
								apiserver/.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								apiserver/.gitignore
									
									
									
									
										vendored
									
									
								
							@@ -109,4 +109,5 @@ settings.py
 | 
				
			|||||||
data.db
 | 
					data.db
 | 
				
			||||||
data.db.bak
 | 
					data.db.bak
 | 
				
			||||||
data/archive/*
 | 
					data/archive/*
 | 
				
			||||||
 | 
					data/backup/*
 | 
				
			||||||
qotnews.sqlite
 | 
					qotnews.sqlite
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,11 +1,11 @@
 | 
				
			|||||||
from datetime import datetime, timedelta
 | 
					import json
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from sqlalchemy import create_engine, Column, String, ForeignKey, Integer
 | 
					from sqlalchemy import create_engine, Column, String, ForeignKey, Integer
 | 
				
			||||||
from sqlalchemy.ext.declarative import declarative_base
 | 
					from sqlalchemy.ext.declarative import declarative_base
 | 
				
			||||||
from sqlalchemy.orm import sessionmaker
 | 
					from sqlalchemy.orm import sessionmaker
 | 
				
			||||||
from sqlalchemy.exc import IntegrityError
 | 
					from sqlalchemy.exc import IntegrityError
 | 
				
			||||||
from sqlalchemy.types import JSON
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
engine = create_engine('sqlite:///data/qotnews.sqlite')
 | 
					engine = create_engine('sqlite:///data/qotnews.sqlite', connect_args={'timeout': 360})
 | 
				
			||||||
Session = sessionmaker(bind=engine)
 | 
					Session = sessionmaker(bind=engine)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Base = declarative_base()
 | 
					Base = declarative_base()
 | 
				
			||||||
@@ -15,8 +15,8 @@ class Story(Base):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    sid = Column(String(16), primary_key=True)
 | 
					    sid = Column(String(16), primary_key=True)
 | 
				
			||||||
    ref = Column(String(16), unique=True)
 | 
					    ref = Column(String(16), unique=True)
 | 
				
			||||||
    meta = Column(JSON)
 | 
					    meta_json = Column(String)
 | 
				
			||||||
    data = Column(JSON)
 | 
					    full_json = Column(String)
 | 
				
			||||||
    title = Column(String)
 | 
					    title = Column(String)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Reflist(Base):
 | 
					class Reflist(Base):
 | 
				
			||||||
@@ -36,21 +36,19 @@ def get_story(sid):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
def put_story(story):
 | 
					def put_story(story):
 | 
				
			||||||
    story = story.copy()
 | 
					    story = story.copy()
 | 
				
			||||||
    data = {}
 | 
					    full_json = json.dumps(story)
 | 
				
			||||||
    data.update(story)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    meta = {}
 | 
					    story.pop('text', None)
 | 
				
			||||||
    meta.update(story)
 | 
					    story.pop('comments', None)
 | 
				
			||||||
    meta.pop('text', None)
 | 
					    meta_json = json.dumps(story)
 | 
				
			||||||
    meta.pop('comments', None)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
        session = Session()
 | 
					        session = Session()
 | 
				
			||||||
        s = Story(
 | 
					        s = Story(
 | 
				
			||||||
            sid=story['id'],
 | 
					            sid=story['id'],
 | 
				
			||||||
            ref=story['ref'],
 | 
					            ref=story['ref'],
 | 
				
			||||||
            data=data,
 | 
					            full_json=full_json,
 | 
				
			||||||
            meta=meta,
 | 
					            meta_json=meta_json,
 | 
				
			||||||
            title=story.get('title', None),
 | 
					            title=story.get('title', None),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        session.merge(s)
 | 
					        session.merge(s)
 | 
				
			||||||
@@ -65,26 +63,19 @@ def get_story_by_ref(ref):
 | 
				
			|||||||
    session = Session()
 | 
					    session = Session()
 | 
				
			||||||
    return session.query(Story).filter(Story.ref==ref).first()
 | 
					    return session.query(Story).filter(Story.ref==ref).first()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_stories_by_url(url):
 | 
					def get_reflist(amount):
 | 
				
			||||||
    session = Session()
 | 
					    session = Session()
 | 
				
			||||||
    return session.query(Story).\
 | 
					    q = session.query(Reflist).order_by(Reflist.rid.desc()).limit(amount)
 | 
				
			||||||
            filter(Story.title != None).\
 | 
					 | 
				
			||||||
            filter(Story.meta['url'].as_string() == url).\
 | 
					 | 
				
			||||||
            order_by(Story.meta['date'].desc())
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def get_reflist():
 | 
					 | 
				
			||||||
    session = Session()
 | 
					 | 
				
			||||||
    q = session.query(Reflist).order_by(Reflist.rid.desc())
 | 
					 | 
				
			||||||
    return [dict(ref=x.ref, sid=x.sid, source=x.source) for x in q.all()]
 | 
					    return [dict(ref=x.ref, sid=x.sid, source=x.source) for x in q.all()]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_stories(maxage=60*60*24*2):
 | 
					def get_stories(amount, skip=0):
 | 
				
			||||||
    time = datetime.now().timestamp() - maxage
 | 
					 | 
				
			||||||
    session = Session()
 | 
					    session = Session()
 | 
				
			||||||
    q = session.query(Reflist, Story.meta).\
 | 
					    q = session.query(Reflist, Story.meta_json).\
 | 
				
			||||||
 | 
					            order_by(Reflist.rid.desc()).\
 | 
				
			||||||
            join(Story).\
 | 
					            join(Story).\
 | 
				
			||||||
            filter(Story.title != None).\
 | 
					            filter(Story.title != None).\
 | 
				
			||||||
            filter(Story.meta['date'].as_integer() > time).\
 | 
					            offset(skip).\
 | 
				
			||||||
            order_by(Story.meta['date'].desc())
 | 
					            limit(amount)
 | 
				
			||||||
    return [x[1] for x in q]
 | 
					    return [x[1] for x in q]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def put_ref(ref, sid, source):
 | 
					def put_ref(ref, sid, source):
 | 
				
			||||||
@@ -110,7 +101,22 @@ def del_ref(ref):
 | 
				
			|||||||
    finally:
 | 
					    finally:
 | 
				
			||||||
        session.close()
 | 
					        session.close()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def count_stories():
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        session = Session()
 | 
				
			||||||
 | 
					        return session.query(Story).count()
 | 
				
			||||||
 | 
					    finally:
 | 
				
			||||||
 | 
					        session.close()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_story_list():
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        session = Session()
 | 
				
			||||||
 | 
					        return session.query(Story.sid).all()
 | 
				
			||||||
 | 
					    finally:
 | 
				
			||||||
 | 
					        session.close()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if __name__ == '__main__':
 | 
					if __name__ == '__main__':
 | 
				
			||||||
    init()
 | 
					    init()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    print(get_story_by_ref('hgi3sy'))
 | 
					    #print(get_story_by_ref('hgi3sy'))
 | 
				
			||||||
 | 
					    print(len(get_reflist(99999)))
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -6,118 +6,84 @@ logging.basicConfig(
 | 
				
			|||||||
import requests
 | 
					import requests
 | 
				
			||||||
import time
 | 
					import time
 | 
				
			||||||
from bs4 import BeautifulSoup
 | 
					from bs4 import BeautifulSoup
 | 
				
			||||||
import itertools
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
import settings
 | 
					import settings
 | 
				
			||||||
from feeds import hackernews, reddit, tildes, substack, manual, news
 | 
					from feeds import hackernews, reddit, tildes, manual, lobsters
 | 
				
			||||||
from scrapers import outline, declutter, browser, local
 | 
					import utils
 | 
				
			||||||
 | 
					
 | 
				
			||||||
INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com']
 | 
					INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com', 'sec.gov']
 | 
				
			||||||
 | 
					TWO_DAYS = 60*60*24*2
 | 
				
			||||||
substacks = {}
 | 
					 | 
				
			||||||
for key, value in settings.SUBSTACK.items():
 | 
					 | 
				
			||||||
    substacks[key] = substack.Publication(value['url'])
 | 
					 | 
				
			||||||
categories = {}
 | 
					 | 
				
			||||||
for key, value in settings.CATEGORY.items():
 | 
					 | 
				
			||||||
    categories[key] = news.Category(value['url'], value.get('tz'))
 | 
					 | 
				
			||||||
sitemaps = {}
 | 
					 | 
				
			||||||
for key, value in settings.SITEMAP.items():
 | 
					 | 
				
			||||||
    sitemaps[key] = news.Sitemap(value['url'], value.get('tz'))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def get_list():
 | 
					 | 
				
			||||||
    feeds = {}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def list():
 | 
				
			||||||
 | 
					    feed = []
 | 
				
			||||||
    if settings.NUM_HACKERNEWS:
 | 
					    if settings.NUM_HACKERNEWS:
 | 
				
			||||||
        feeds['hackernews'] = [(x, 'hackernews') for x in hackernews.feed()[:settings.NUM_HACKERNEWS]]
 | 
					        feed += [(x, 'hackernews') for x in hackernews.feed()[:settings.NUM_HACKERNEWS]]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if settings.NUM_LOBSTERS:
 | 
				
			||||||
 | 
					        feed += [(x, 'lobsters') for x in lobsters.feed()[:settings.NUM_LOBSTERS]]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if settings.NUM_REDDIT:
 | 
					    if settings.NUM_REDDIT:
 | 
				
			||||||
        feeds['reddit'] = [(x, 'reddit') for x in reddit.feed()[:settings.NUM_REDDIT]]
 | 
					        feed += [(x, 'reddit') for x in reddit.feed()[:settings.NUM_REDDIT]]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if settings.NUM_TILDES:
 | 
					    if settings.NUM_TILDES:
 | 
				
			||||||
        feeds['tildes'] = [(x, 'tildes') for x in tildes.feed()[:settings.NUM_TILDES]]
 | 
					        feed += [(x, 'tildes') for x in tildes.feed()[:settings.NUM_TILDES]]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if settings.NUM_SUBSTACK:
 | 
					 | 
				
			||||||
        feeds['substack'] = [(x, 'substack') for x in substack.top.feed()[:settings.NUM_SUBSTACK]]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for key, publication in substacks.items():
 | 
					 | 
				
			||||||
        count = settings.SUBSTACK[key]['count']
 | 
					 | 
				
			||||||
        feeds[key] = [(x, key) for x in publication.feed()[:count]]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for key, sites in categories.items():
 | 
					 | 
				
			||||||
        count = settings.CATEGORY[key].get('count') or 0
 | 
					 | 
				
			||||||
        excludes = settings.CATEGORY[key].get('excludes')
 | 
					 | 
				
			||||||
        tz = settings.CATEGORY[key].get('tz')
 | 
					 | 
				
			||||||
        feeds[key] = [(x, key) for x in sites.feed(excludes)[:count]]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for key, sites in sitemaps.items():
 | 
					 | 
				
			||||||
        count = settings.SITEMAP[key].get('count') or 0
 | 
					 | 
				
			||||||
        excludes = settings.SITEMAP[key].get('excludes')
 | 
					 | 
				
			||||||
        feeds[key] = [(x, key) for x in sites.feed(excludes)[:count]]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    values = feeds.values()
 | 
					 | 
				
			||||||
    feed = itertools.chain.from_iterable(itertools.zip_longest(*values, fillvalue=None))
 | 
					 | 
				
			||||||
    feed = list(filter(None, feed))
 | 
					 | 
				
			||||||
    return feed
 | 
					    return feed
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_article(url):
 | 
					def get_article(url):
 | 
				
			||||||
    scrapers = {
 | 
					    if not settings.READER_URL:
 | 
				
			||||||
        'declutter': declutter,
 | 
					        logging.info('Readerserver not configured, aborting.')
 | 
				
			||||||
        'outline': outline,
 | 
					        return ''
 | 
				
			||||||
        'browser': browser,
 | 
					
 | 
				
			||||||
        'local': local,
 | 
					    if url.startswith('https://twitter.com'):
 | 
				
			||||||
    }
 | 
					        logging.info('Replacing twitter.com url with nitter.net')
 | 
				
			||||||
    available = settings.SCRAPERS or ['local']
 | 
					        url = url.replace('twitter.com', 'nitter.net')
 | 
				
			||||||
    if 'local' not in available:
 | 
					 | 
				
			||||||
        available += ['local']
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for scraper in available:
 | 
					 | 
				
			||||||
        if scraper not in scrapers.keys():
 | 
					 | 
				
			||||||
            continue
 | 
					 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
            html = scrapers[scraper].get_html(url)
 | 
					        r = requests.post(settings.READER_URL, data=dict(url=url), timeout=20)
 | 
				
			||||||
            if html:
 | 
					        if r.status_code != 200:
 | 
				
			||||||
                return html
 | 
					            raise Exception('Bad response code ' + str(r.status_code))
 | 
				
			||||||
 | 
					        return r.text
 | 
				
			||||||
    except KeyboardInterrupt:
 | 
					    except KeyboardInterrupt:
 | 
				
			||||||
        raise
 | 
					        raise
 | 
				
			||||||
        except:
 | 
					    except BaseException as e:
 | 
				
			||||||
            pass
 | 
					        logging.error('Problem getting article: {}'.format(str(e)))
 | 
				
			||||||
        return ''
 | 
					        return ''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_content_type(url):
 | 
					def get_content_type(url):
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'}
 | 
				
			||||||
 | 
					        return requests.get(url, headers=headers, timeout=5).headers['content-type']
 | 
				
			||||||
 | 
					    except:
 | 
				
			||||||
 | 
					        return ''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
        headers = {
 | 
					        headers = {
 | 
				
			||||||
            'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
 | 
					            'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
 | 
				
			||||||
            'X-Forwarded-For': '66.249.66.1',
 | 
					            'X-Forwarded-For': '66.249.66.1',
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        return requests.get(url, headers=headers, timeout=5).headers['content-type']
 | 
					 | 
				
			||||||
    except:
 | 
					 | 
				
			||||||
        pass
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    try:
 | 
					 | 
				
			||||||
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'}
 | 
					 | 
				
			||||||
        return requests.get(url, headers=headers, timeout=10).headers['content-type']
 | 
					        return requests.get(url, headers=headers, timeout=10).headers['content-type']
 | 
				
			||||||
    except:
 | 
					    except:
 | 
				
			||||||
        return ''
 | 
					        pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def update_story(story, is_manual=False):
 | 
					def update_story(story, is_manual=False):
 | 
				
			||||||
    res = {}
 | 
					    res = {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
        if story['source'] == 'hackernews':
 | 
					        if story['source'] == 'hackernews':
 | 
				
			||||||
            res = hackernews.story(story['ref'])
 | 
					            res = hackernews.story(story['ref'])
 | 
				
			||||||
 | 
					        elif story['source'] == 'lobsters':
 | 
				
			||||||
 | 
					            res = lobsters.story(story['ref'])
 | 
				
			||||||
        elif story['source'] == 'reddit':
 | 
					        elif story['source'] == 'reddit':
 | 
				
			||||||
            res = reddit.story(story['ref'])
 | 
					            res = reddit.story(story['ref'])
 | 
				
			||||||
        elif story['source'] == 'tildes':
 | 
					        elif story['source'] == 'tildes':
 | 
				
			||||||
            res = tildes.story(story['ref'])
 | 
					            res = tildes.story(story['ref'])
 | 
				
			||||||
    elif story['source'] == 'substack':
 | 
					 | 
				
			||||||
        res = substack.top.story(story['ref'])
 | 
					 | 
				
			||||||
    elif story['source'] in categories.keys():
 | 
					 | 
				
			||||||
        res = categories[story['source']].story(story['ref'])
 | 
					 | 
				
			||||||
    elif story['source'] in sitemaps.keys():
 | 
					 | 
				
			||||||
        res = sitemaps[story['source']].story(story['ref'])
 | 
					 | 
				
			||||||
    elif story['source'] in substacks.keys():
 | 
					 | 
				
			||||||
        res = substacks[story['source']].story(story['ref'])
 | 
					 | 
				
			||||||
        elif story['source'] == 'manual':
 | 
					        elif story['source'] == 'manual':
 | 
				
			||||||
            res = manual.story(story['ref'])
 | 
					            res = manual.story(story['ref'])
 | 
				
			||||||
 | 
					    except BaseException as e:
 | 
				
			||||||
 | 
					        utils.alert_tanner('Problem updating {} story, ref {}: {}'.format(story['source'], story['ref'], str(e)))
 | 
				
			||||||
 | 
					        logging.exception(e)
 | 
				
			||||||
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if res:
 | 
					    if res:
 | 
				
			||||||
        story.update(res) # join dicts
 | 
					        story.update(res) # join dicts
 | 
				
			||||||
@@ -125,8 +91,8 @@ def update_story(story, is_manual=False):
 | 
				
			|||||||
        logging.info('Story not ready yet')
 | 
					        logging.info('Story not ready yet')
 | 
				
			||||||
        return False
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if story['date'] and not is_manual and story['date'] + settings.MAX_STORY_AGE < time.time():
 | 
					    if story['date'] and not is_manual and story['date'] + TWO_DAYS < time.time():
 | 
				
			||||||
        logging.info('Story too old, removing')
 | 
					        logging.info('Story too old, removing. Date: {}'.format(story['date']))
 | 
				
			||||||
        return False
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if story.get('url', '') and not story.get('text', ''):
 | 
					    if story.get('url', '') and not story.get('text', ''):
 | 
				
			||||||
@@ -140,6 +106,12 @@ def update_story(story, is_manual=False):
 | 
				
			|||||||
            logging.info(story['url'])
 | 
					            logging.info(story['url'])
 | 
				
			||||||
            return False
 | 
					            return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if 'trump' in story['title'].lower() or 'musk' in story['title'].lower() or 'Removed by moderator' in story['title']:
 | 
				
			||||||
 | 
					            logging.info('Trump / Musk / removed story, skipping')
 | 
				
			||||||
 | 
					            logging.info(story['url'])
 | 
				
			||||||
 | 
					            return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        logging.info('Getting article ' + story['url'])
 | 
					        logging.info('Getting article ' + story['url'])
 | 
				
			||||||
        story['text'] = get_article(story['url'])
 | 
					        story['text'] = get_article(story['url'])
 | 
				
			||||||
        if not story['text']: return False
 | 
					        if not story['text']: return False
 | 
				
			||||||
@@ -157,7 +129,7 @@ if __name__ == '__main__':
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    #print(get_article('https://www.bloomberg.com/news/articles/2019-09-23/xi-s-communists-under-pressure-as-high-prices-hit-china-workers'))
 | 
					    #print(get_article('https://www.bloomberg.com/news/articles/2019-09-23/xi-s-communists-under-pressure-as-high-prices-hit-china-workers'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    a = get_article('https://blog.joinmastodon.org/2019/10/mastodon-3.0/')
 | 
					    a = get_content_type('https://tefkos.comminfo.rutgers.edu/Courses/e530/Readings/Beal%202008%20full%20text%20searching.pdf')
 | 
				
			||||||
    print(a)
 | 
					    print(a)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    print('done')
 | 
					    print('done')
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -12,7 +12,8 @@ import requests
 | 
				
			|||||||
from utils import clean
 | 
					from utils import clean
 | 
				
			||||||
 | 
					
 | 
				
			||||||
API_TOPSTORIES = lambda x: 'https://hacker-news.firebaseio.com/v0/topstories.json'
 | 
					API_TOPSTORIES = lambda x: 'https://hacker-news.firebaseio.com/v0/topstories.json'
 | 
				
			||||||
API_ITEM = lambda x : 'https://hn.algolia.com/api/v1/items/{}'.format(x)
 | 
					ALG_API_ITEM = lambda x : 'https://hn.algolia.com/api/v1/items/{}'.format(x)
 | 
				
			||||||
 | 
					BHN_API_ITEM = lambda x : 'https://api.hnpwa.com/v0/item/{}.json'.format(x)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
SITE_LINK = lambda x : 'https://news.ycombinator.com/item?id={}'.format(x)
 | 
					SITE_LINK = lambda x : 'https://news.ycombinator.com/item?id={}'.format(x)
 | 
				
			||||||
SITE_AUTHOR_LINK = lambda x : 'https://news.ycombinator.com/user?id={}'.format(x)
 | 
					SITE_AUTHOR_LINK = lambda x : 'https://news.ycombinator.com/user?id={}'.format(x)
 | 
				
			||||||
@@ -42,7 +43,7 @@ def api(route, ref=None):
 | 
				
			|||||||
def feed():
 | 
					def feed():
 | 
				
			||||||
    return [str(x) for x in api(API_TOPSTORIES) or []]
 | 
					    return [str(x) for x in api(API_TOPSTORIES) or []]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def comment(i):
 | 
					def alg_comment(i):
 | 
				
			||||||
    if 'author' not in i:
 | 
					    if 'author' not in i:
 | 
				
			||||||
        return False
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -51,21 +52,25 @@ def comment(i):
 | 
				
			|||||||
    c['score'] = i.get('points', 0)
 | 
					    c['score'] = i.get('points', 0)
 | 
				
			||||||
    c['date'] = i.get('created_at_i', 0)
 | 
					    c['date'] = i.get('created_at_i', 0)
 | 
				
			||||||
    c['text'] = clean(i.get('text', '') or '')
 | 
					    c['text'] = clean(i.get('text', '') or '')
 | 
				
			||||||
    c['comments'] = [comment(j) for j in i['children']]
 | 
					    c['comments'] = [alg_comment(j) for j in i['children']]
 | 
				
			||||||
    c['comments'] = list(filter(bool, c['comments']))
 | 
					    c['comments'] = list(filter(bool, c['comments']))
 | 
				
			||||||
    return c
 | 
					    return c
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def comment_count(i):
 | 
					def alg_comment_count(i):
 | 
				
			||||||
    alive = 1 if i['author'] else 0
 | 
					    alive = 1 if i['author'] else 0
 | 
				
			||||||
    return sum([comment_count(c) for c in i['comments']]) + alive
 | 
					    return sum([alg_comment_count(c) for c in i['comments']]) + alive
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def story(ref):
 | 
					def alg_story(ref):
 | 
				
			||||||
    r = api(API_ITEM, ref)
 | 
					    r = api(ALG_API_ITEM, ref)
 | 
				
			||||||
    if not r: return False
 | 
					    if not r:
 | 
				
			||||||
 | 
					        logging.info('Bad Algolia Hackernews API response.')
 | 
				
			||||||
 | 
					        return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if 'deleted' in r:
 | 
					    if 'deleted' in r:
 | 
				
			||||||
 | 
					        logging.info('Story was deleted.')
 | 
				
			||||||
        return False
 | 
					        return False
 | 
				
			||||||
    elif r.get('type', '') != 'story':
 | 
					    elif r.get('type', '') != 'story':
 | 
				
			||||||
 | 
					        logging.info('Type "{}" is not "story".'.format(r.get('type', '')))
 | 
				
			||||||
        return False
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    s = {}
 | 
					    s = {}
 | 
				
			||||||
@@ -76,17 +81,85 @@ def story(ref):
 | 
				
			|||||||
    s['title'] = r.get('title', '')
 | 
					    s['title'] = r.get('title', '')
 | 
				
			||||||
    s['link'] = SITE_LINK(ref)
 | 
					    s['link'] = SITE_LINK(ref)
 | 
				
			||||||
    s['url'] = r.get('url', '')
 | 
					    s['url'] = r.get('url', '')
 | 
				
			||||||
    s['comments'] = [comment(i) for i in r['children']]
 | 
					    s['comments'] = [alg_comment(i) for i in r['children']]
 | 
				
			||||||
    s['comments'] = list(filter(bool, s['comments']))
 | 
					    s['comments'] = list(filter(bool, s['comments']))
 | 
				
			||||||
    s['num_comments'] = comment_count(s) - 1
 | 
					    s['num_comments'] = alg_comment_count(s) - 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if 'text' in r and r['text']:
 | 
					    if 'text' in r and r['text']:
 | 
				
			||||||
        s['text'] = clean(r['text'] or '')
 | 
					        s['text'] = clean(r['text'] or '')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return s
 | 
					    return s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def bhn_comment(i):
 | 
				
			||||||
 | 
					    if 'user' not in i:
 | 
				
			||||||
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    c = {}
 | 
				
			||||||
 | 
					    c['author'] = i.get('user', '')
 | 
				
			||||||
 | 
					    c['score'] = 0   # Not present?
 | 
				
			||||||
 | 
					    c['date'] = i.get('time', 0)
 | 
				
			||||||
 | 
					    c['text'] = clean(i.get('content', '') or '')
 | 
				
			||||||
 | 
					    c['comments'] = [bhn_comment(j) for j in i['comments']]
 | 
				
			||||||
 | 
					    c['comments'] = list(filter(bool, c['comments']))
 | 
				
			||||||
 | 
					    return c
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def bhn_story(ref):
 | 
				
			||||||
 | 
					    r = api(BHN_API_ITEM, ref)
 | 
				
			||||||
 | 
					    if not r:
 | 
				
			||||||
 | 
					        logging.info('Bad BetterHN Hackernews API response.')
 | 
				
			||||||
 | 
					        return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if 'deleted' in r:   # TODO: verify
 | 
				
			||||||
 | 
					        logging.info('Story was deleted.')
 | 
				
			||||||
 | 
					        return False
 | 
				
			||||||
 | 
					    elif r.get('dead', False):
 | 
				
			||||||
 | 
					        logging.info('Story was deleted.')
 | 
				
			||||||
 | 
					        return False
 | 
				
			||||||
 | 
					    elif r.get('type', '') != 'link':
 | 
				
			||||||
 | 
					        logging.info('Type "{}" is not "link".'.format(r.get('type', '')))
 | 
				
			||||||
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    s = {}
 | 
				
			||||||
 | 
					    s['author'] = r.get('user', '')
 | 
				
			||||||
 | 
					    s['author_link'] = SITE_AUTHOR_LINK(r.get('user', ''))
 | 
				
			||||||
 | 
					    s['score'] = r.get('points', 0)
 | 
				
			||||||
 | 
					    s['date'] = r.get('time', 0)
 | 
				
			||||||
 | 
					    s['title'] = r.get('title', '')
 | 
				
			||||||
 | 
					    s['link'] = SITE_LINK(ref)
 | 
				
			||||||
 | 
					    s['url'] = r.get('url', '')
 | 
				
			||||||
 | 
					    if s['url'].startswith('item'):
 | 
				
			||||||
 | 
					        s['url'] = SITE_LINK(ref)
 | 
				
			||||||
 | 
					    s['comments'] = [bhn_comment(i) for i in r['comments']]
 | 
				
			||||||
 | 
					    s['comments'] = list(filter(bool, s['comments']))
 | 
				
			||||||
 | 
					    s['num_comments'] = r.get('comments_count', 0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if 'content' in r and r['content']:
 | 
				
			||||||
 | 
					        s['text'] = clean(r['content'] or '')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def story(ref):
 | 
				
			||||||
 | 
					    s = alg_story(ref)
 | 
				
			||||||
 | 
					    if s is None:
 | 
				
			||||||
 | 
					        s = bhn_story(ref)
 | 
				
			||||||
 | 
					    if not s:
 | 
				
			||||||
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if s['score'] < 25 and s['num_comments'] < 10:
 | 
				
			||||||
 | 
					        logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
 | 
				
			||||||
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# scratchpad so I can quickly develop the parser
 | 
					# scratchpad so I can quickly develop the parser
 | 
				
			||||||
if __name__ == '__main__':
 | 
					if __name__ == '__main__':
 | 
				
			||||||
    print(feed())
 | 
					    print(feed())
 | 
				
			||||||
    #print(story(20763961))
 | 
					    #print(story(20763961))
 | 
				
			||||||
    #print(story(20802050))
 | 
					    #print(story(20802050))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    #print(story(42899834))   # type "job"
 | 
				
			||||||
 | 
					    #print(story(42900076))   # Ask HN
 | 
				
			||||||
 | 
					    #print(story(42898201))   # Show HN
 | 
				
			||||||
 | 
					    #print(story(42899703))   # normal
 | 
				
			||||||
 | 
					    print(story(42902678))   # bad title?
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										120
									
								
								apiserver/feeds/lobsters.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										120
									
								
								apiserver/feeds/lobsters.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,120 @@
 | 
				
			|||||||
 | 
					import logging
 | 
				
			||||||
 | 
					logging.basicConfig(
 | 
				
			||||||
 | 
					        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 | 
				
			||||||
 | 
					        level=logging.DEBUG)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == '__main__':
 | 
				
			||||||
 | 
					    import sys
 | 
				
			||||||
 | 
					    sys.path.insert(0,'.')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import requests
 | 
				
			||||||
 | 
					from datetime import datetime
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from utils import clean
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					API_HOTTEST = lambda x: 'https://lobste.rs/hottest.json'
 | 
				
			||||||
 | 
					API_ITEM = lambda x : 'https://lobste.rs/s/{}.json'.format(x)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SITE_LINK = lambda x : 'https://lobste.rs/s/{}'.format(x)
 | 
				
			||||||
 | 
					SITE_AUTHOR_LINK = lambda x : 'https://lobste.rs/u/{}'.format(x)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def api(route, ref=None):
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        r = requests.get(route(ref), timeout=5)
 | 
				
			||||||
 | 
					        if r.status_code != 200:
 | 
				
			||||||
 | 
					            raise Exception('Bad response code ' + str(r.status_code))
 | 
				
			||||||
 | 
					        return r.json()
 | 
				
			||||||
 | 
					    except KeyboardInterrupt:
 | 
				
			||||||
 | 
					        raise
 | 
				
			||||||
 | 
					    except BaseException as e:
 | 
				
			||||||
 | 
					        logging.error('Problem hitting lobsters API: {}, trying again'.format(str(e)))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        r = requests.get(route(ref), timeout=15)
 | 
				
			||||||
 | 
					        if r.status_code != 200:
 | 
				
			||||||
 | 
					            raise Exception('Bad response code ' + str(r.status_code))
 | 
				
			||||||
 | 
					        return r.json()
 | 
				
			||||||
 | 
					    except KeyboardInterrupt:
 | 
				
			||||||
 | 
					        raise
 | 
				
			||||||
 | 
					    except BaseException as e:
 | 
				
			||||||
 | 
					        logging.error('Problem hitting lobsters API: {}'.format(str(e)))
 | 
				
			||||||
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def feed():
 | 
				
			||||||
 | 
					    return [x['short_id'] for x in api(API_HOTTEST) or []]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def unix(date_str):
 | 
				
			||||||
 | 
					    date_str = date_str.replace(':', '')
 | 
				
			||||||
 | 
					    return int(datetime.strptime(date_str, '%Y-%m-%dT%H%M%S.%f%z').timestamp())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def make_comment(i):
 | 
				
			||||||
 | 
					    c = {}
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        c['author'] = i['commenting_user']
 | 
				
			||||||
 | 
					    except KeyError:
 | 
				
			||||||
 | 
					        c['author'] = ''
 | 
				
			||||||
 | 
					    c['score'] = i.get('score', 0)
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        c['date'] = unix(i['created_at'])
 | 
				
			||||||
 | 
					    except KeyError:
 | 
				
			||||||
 | 
					        c['date'] = 0
 | 
				
			||||||
 | 
					    c['text'] = clean(i.get('comment', '') or '')
 | 
				
			||||||
 | 
					    c['comments'] = []
 | 
				
			||||||
 | 
					    return c
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def iter_comments(flat_comments):
 | 
				
			||||||
 | 
					    nested_comments = []
 | 
				
			||||||
 | 
					    parent_stack = []
 | 
				
			||||||
 | 
					    for comment in flat_comments:
 | 
				
			||||||
 | 
					        c = make_comment(comment)
 | 
				
			||||||
 | 
					        indent = comment['depth']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if indent == 0:
 | 
				
			||||||
 | 
					            nested_comments.append(c)
 | 
				
			||||||
 | 
					            parent_stack = [c]
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            parent_stack = parent_stack[:indent]
 | 
				
			||||||
 | 
					            p = parent_stack[-1]
 | 
				
			||||||
 | 
					            p['comments'].append(c)
 | 
				
			||||||
 | 
					            parent_stack.append(c)
 | 
				
			||||||
 | 
					    return nested_comments
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def story(ref):
 | 
				
			||||||
 | 
					    r = api(API_ITEM, ref)
 | 
				
			||||||
 | 
					    if not r:
 | 
				
			||||||
 | 
					        logging.info('Bad Lobsters API response.')
 | 
				
			||||||
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    s = {}
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        s['author'] = r['submitter_user']
 | 
				
			||||||
 | 
					        s['author_link'] = SITE_AUTHOR_LINK(s['author'])
 | 
				
			||||||
 | 
					    except KeyError:
 | 
				
			||||||
 | 
					        s['author'] = ''
 | 
				
			||||||
 | 
					        s['author_link'] = ''
 | 
				
			||||||
 | 
					    s['score'] = r.get('score', 0)
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        s['date'] = unix(r['created_at'])
 | 
				
			||||||
 | 
					    except KeyError:
 | 
				
			||||||
 | 
					        s['date'] = 0
 | 
				
			||||||
 | 
					    s['title'] = r.get('title', '')
 | 
				
			||||||
 | 
					    s['link'] = SITE_LINK(ref)
 | 
				
			||||||
 | 
					    s['url'] = r.get('url', '')
 | 
				
			||||||
 | 
					    s['comments'] = iter_comments(r['comments'])
 | 
				
			||||||
 | 
					    s['num_comments'] = r['comment_count']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if s['score'] < 15 and s['num_comments'] < 10:
 | 
				
			||||||
 | 
					        logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
 | 
				
			||||||
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if 'description' in r and r['description']:
 | 
				
			||||||
 | 
					        s['text'] = clean(r['description'] or '')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# scratchpad so I can quickly develop the parser
 | 
				
			||||||
 | 
					if __name__ == '__main__':
 | 
				
			||||||
 | 
					    #print(feed())
 | 
				
			||||||
 | 
					    import json
 | 
				
			||||||
 | 
					    print(json.dumps(story('fzvd1v'), indent=4))
 | 
				
			||||||
 | 
					    #print(json.dumps(story('ixyv5u'), indent=4))
 | 
				
			||||||
@@ -7,8 +7,6 @@ import requests
 | 
				
			|||||||
import time
 | 
					import time
 | 
				
			||||||
from bs4 import BeautifulSoup
 | 
					from bs4 import BeautifulSoup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import settings
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'
 | 
					USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def api(route):
 | 
					def api(route):
 | 
				
			||||||
@@ -29,13 +27,15 @@ def api(route):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
def story(ref):
 | 
					def story(ref):
 | 
				
			||||||
    html = api(ref)
 | 
					    html = api(ref)
 | 
				
			||||||
    if not html: return False
 | 
					    if not html:
 | 
				
			||||||
 | 
					        logging.info('Bad http GET response.')
 | 
				
			||||||
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    soup = BeautifulSoup(html, features='html.parser')
 | 
					    soup = BeautifulSoup(html, features='html.parser')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    s = {}
 | 
					    s = {}
 | 
				
			||||||
    s['author'] = 'manual submission'
 | 
					    s['author'] = 'manual submission'
 | 
				
			||||||
    s['author_link'] = 'https://{}'.format(settings.HOSTNAME)
 | 
					    s['author_link'] = 'https://news.t0.vc'
 | 
				
			||||||
    s['score'] = 0
 | 
					    s['score'] = 0
 | 
				
			||||||
    s['date'] = int(time.time())
 | 
					    s['date'] = int(time.time())
 | 
				
			||||||
    s['title'] = str(soup.title.string) if soup.title else ref
 | 
					    s['title'] = str(soup.title.string) if soup.title else ref
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,307 +0,0 @@
 | 
				
			|||||||
import logging
 | 
					 | 
				
			||||||
logging.basicConfig(
 | 
					 | 
				
			||||||
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 | 
					 | 
				
			||||||
        level=logging.DEBUG)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
if __name__ == '__main__':
 | 
					 | 
				
			||||||
    import sys
 | 
					 | 
				
			||||||
    sys.path.insert(0,'.')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
import requests
 | 
					 | 
				
			||||||
from datetime import datetime
 | 
					 | 
				
			||||||
from bs4 import BeautifulSoup
 | 
					 | 
				
			||||||
from scrapers import declutter
 | 
					 | 
				
			||||||
import dateutil.parser
 | 
					 | 
				
			||||||
import extruct
 | 
					 | 
				
			||||||
import pytz
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from utils import clean
 | 
					 | 
				
			||||||
import settings
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
tzinfos = {
 | 
					 | 
				
			||||||
    'NZDT': pytz.timezone('Pacific/Auckland'),
 | 
					 | 
				
			||||||
    'NZST': pytz.timezone('Pacific/Auckland')
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'
 | 
					 | 
				
			||||||
#USER_AGENT = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def unix(date_str, tz=None):
 | 
					 | 
				
			||||||
    try:
 | 
					 | 
				
			||||||
        dt = dateutil.parser.parse(date_str, tzinfos=tzinfos)
 | 
					 | 
				
			||||||
        if tz:
 | 
					 | 
				
			||||||
            dt = pytz.timezone(tz).localize(dt)
 | 
					 | 
				
			||||||
        return int(dt.timestamp())
 | 
					 | 
				
			||||||
    except:
 | 
					 | 
				
			||||||
        pass
 | 
					 | 
				
			||||||
    return 0
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def xml(route, ref=None):
 | 
					 | 
				
			||||||
    try:
 | 
					 | 
				
			||||||
        headers = {'User-Agent': USER_AGENT, 'X-Forwarded-For': '66.249.66.1'}
 | 
					 | 
				
			||||||
        r = requests.get(route(ref), headers=headers, timeout=5)
 | 
					 | 
				
			||||||
        if r.status_code != 200:
 | 
					 | 
				
			||||||
            raise Exception('Bad response code ' + str(r.status_code))
 | 
					 | 
				
			||||||
        return r.text
 | 
					 | 
				
			||||||
    except KeyboardInterrupt:
 | 
					 | 
				
			||||||
        raise
 | 
					 | 
				
			||||||
    except BaseException as e:
 | 
					 | 
				
			||||||
        logging.error('Problem hitting URL: {}'.format(str(e)))
 | 
					 | 
				
			||||||
        return False
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def parse_extruct(s, data):
 | 
					 | 
				
			||||||
    rdfa_keys = {
 | 
					 | 
				
			||||||
        'title': [
 | 
					 | 
				
			||||||
            'http://ogp.me/ns#title',
 | 
					 | 
				
			||||||
            'https://ogp.me/ns#title',
 | 
					 | 
				
			||||||
        ],
 | 
					 | 
				
			||||||
        'date': [
 | 
					 | 
				
			||||||
            'http://ogp.me/ns/article#modified_time',
 | 
					 | 
				
			||||||
            'https://ogp.me/ns/article#modified_time',
 | 
					 | 
				
			||||||
            'http://ogp.me/ns/article#published_time',
 | 
					 | 
				
			||||||
            'https://ogp.me/ns/article#published_time',
 | 
					 | 
				
			||||||
        ]
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    for rdfa in data['rdfa']:
 | 
					 | 
				
			||||||
        for key, props in rdfa.items():
 | 
					 | 
				
			||||||
            for attribute, properties in rdfa_keys.items():
 | 
					 | 
				
			||||||
                for prop in properties:
 | 
					 | 
				
			||||||
                    if prop in props:
 | 
					 | 
				
			||||||
                        for values in props[prop]:
 | 
					 | 
				
			||||||
                            s[attribute] = values['@value']
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for og in data['opengraph']:
 | 
					 | 
				
			||||||
        titles = list(filter(None, [value if 'og:title' in key else None for key, value in og['properties']]))
 | 
					 | 
				
			||||||
        modified = list(filter(None, [value if 'article:modified_time' in key else None for key, value in og['properties']]))
 | 
					 | 
				
			||||||
        published = list(filter(None, [value if 'article:published_time' in key else None for key, value in og['properties']]))
 | 
					 | 
				
			||||||
        if len(modified):
 | 
					 | 
				
			||||||
            s['date'] = modified[0]
 | 
					 | 
				
			||||||
        if len(published):
 | 
					 | 
				
			||||||
            s['date'] = published[0]
 | 
					 | 
				
			||||||
        if len(titles):
 | 
					 | 
				
			||||||
            s['title'] = titles[0]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for md in data['microdata']:
 | 
					 | 
				
			||||||
        if md['type'] in ['https://schema.org/NewsArticle', 'http://schema.org/NewsArticle']:
 | 
					 | 
				
			||||||
            props = md['properties']
 | 
					 | 
				
			||||||
            s['title'] = props['headline']
 | 
					 | 
				
			||||||
            if props['dateModified']:
 | 
					 | 
				
			||||||
                s['date'] = props['dateModified']
 | 
					 | 
				
			||||||
            if props['datePublished']:
 | 
					 | 
				
			||||||
                s['date'] = props['datePublished']
 | 
					 | 
				
			||||||
            if 'author' in props and props['author']:
 | 
					 | 
				
			||||||
                if 'properties' in props['author']:
 | 
					 | 
				
			||||||
                    s['author'] = props['author']['properties']['name']
 | 
					 | 
				
			||||||
                elif isinstance(props['author'], list):
 | 
					 | 
				
			||||||
                    s['author'] = props['author'][0]['properties']['name']
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for ld in data['json-ld']:
 | 
					 | 
				
			||||||
        if '@type' in ld and ld['@type'] in ['Article', 'NewsArticle']:
 | 
					 | 
				
			||||||
            s['title'] = ld['headline']
 | 
					 | 
				
			||||||
            if ld['dateModified']:
 | 
					 | 
				
			||||||
                s['date'] = ld['dateModified']
 | 
					 | 
				
			||||||
            if ld['datePublished']:
 | 
					 | 
				
			||||||
                s['date'] = ld['datePublished']
 | 
					 | 
				
			||||||
            if 'author' in ld and ld['author']:
 | 
					 | 
				
			||||||
                if 'name' in ld['author']:
 | 
					 | 
				
			||||||
                    s['author'] = ld['author']['name']
 | 
					 | 
				
			||||||
                elif isinstance(ld['author'], list):
 | 
					 | 
				
			||||||
                    s['author'] = ld['author'][0]['name']
 | 
					 | 
				
			||||||
        if '@graph' in ld:
 | 
					 | 
				
			||||||
            for gld in ld['@graph']:
 | 
					 | 
				
			||||||
                if '@type' in gld and gld['@type'] in ['Article', 'NewsArticle']:
 | 
					 | 
				
			||||||
                    s['title'] = gld['headline']
 | 
					 | 
				
			||||||
                    if gld['dateModified']:
 | 
					 | 
				
			||||||
                        s['date'] = gld['dateModified']
 | 
					 | 
				
			||||||
                    if gld['datePublished']:
 | 
					 | 
				
			||||||
                        s['date'] = gld['datePublished']
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return s
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def comment(i):
 | 
					 | 
				
			||||||
    if 'author' not in i:
 | 
					 | 
				
			||||||
        return False
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    c = {}
 | 
					 | 
				
			||||||
    c['author'] = i.get('author', '')
 | 
					 | 
				
			||||||
    c['score'] = i.get('points', 0)
 | 
					 | 
				
			||||||
    c['date'] = unix(i.get('date', 0))
 | 
					 | 
				
			||||||
    c['text'] = clean(i.get('text', '') or '')
 | 
					 | 
				
			||||||
    c['comments'] = [comment(j) for j in i['children']]
 | 
					 | 
				
			||||||
    c['comments'] = list(filter(bool, c['comments']))
 | 
					 | 
				
			||||||
    return c
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def comment_count(i):
 | 
					 | 
				
			||||||
    alive = 1 if i['author'] else 0
 | 
					 | 
				
			||||||
    return sum([comment_count(c) for c in i['comments']]) + alive
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class _Base:
 | 
					 | 
				
			||||||
    def __init__(url, tz=None):
 | 
					 | 
				
			||||||
        self.url = url
 | 
					 | 
				
			||||||
        self.tz = tz
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def feed(self, excludes=None):
 | 
					 | 
				
			||||||
        return []
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def story(self, ref):
 | 
					 | 
				
			||||||
        markup = xml(lambda x: ref)
 | 
					 | 
				
			||||||
        if not markup:
 | 
					 | 
				
			||||||
            return False
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        s = {}
 | 
					 | 
				
			||||||
        s['author_link'] = ''
 | 
					 | 
				
			||||||
        s['score'] = 0
 | 
					 | 
				
			||||||
        s['comments'] = []
 | 
					 | 
				
			||||||
        s['num_comments'] = 0
 | 
					 | 
				
			||||||
        s['link'] = ref
 | 
					 | 
				
			||||||
        s['url'] = ref
 | 
					 | 
				
			||||||
        s['date'] = 0
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        soup = BeautifulSoup(markup, features='html.parser')
 | 
					 | 
				
			||||||
        icon32 = soup.find_all('link', rel="icon", href=True, sizes="32x32")
 | 
					 | 
				
			||||||
        icon16 = soup.find_all('link', rel="icon", href=True, sizes="16x16")
 | 
					 | 
				
			||||||
        favicon = soup.find_all('link', rel="shortcut icon", href=True)
 | 
					 | 
				
			||||||
        others = soup.find_all('link', rel="icon", href=True)
 | 
					 | 
				
			||||||
        icons = icon32 + icon16 + favicon + others
 | 
					 | 
				
			||||||
        base_url = '/'.join(ref.split('/')[:3])
 | 
					 | 
				
			||||||
        icons = list(set([i.get('href') for i in icons]))
 | 
					 | 
				
			||||||
        icons = [i if i.startswith('http') else base_url + i for i in icons]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if icons:
 | 
					 | 
				
			||||||
            s['icon'] = icons[0]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        data = extruct.extract(markup)
 | 
					 | 
				
			||||||
        s = parse_extruct(s, data)
 | 
					 | 
				
			||||||
        if s['date']:
 | 
					 | 
				
			||||||
            s['date'] = unix(s['date'], tz=self.tz)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if 'disqus' in markup:
 | 
					 | 
				
			||||||
            try:
 | 
					 | 
				
			||||||
                s['comments'] = declutter.get_comments(ref)
 | 
					 | 
				
			||||||
                c['comments'] = list(filter(bool, c['comments']))
 | 
					 | 
				
			||||||
                s['num_comments'] = comment_count(s['comments'])
 | 
					 | 
				
			||||||
            except KeyboardInterrupt:
 | 
					 | 
				
			||||||
                raise
 | 
					 | 
				
			||||||
            except:
 | 
					 | 
				
			||||||
                pass
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if not s['date']:
 | 
					 | 
				
			||||||
            return False
 | 
					 | 
				
			||||||
        return s
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def get_sitemap_date(a):
 | 
					 | 
				
			||||||
    if a.find('lastmod'):
 | 
					 | 
				
			||||||
        return a.find('lastmod').text
 | 
					 | 
				
			||||||
    if a.find('news:publication_date'):
 | 
					 | 
				
			||||||
        return a.find('news:publication_date').text
 | 
					 | 
				
			||||||
    if a.find('ns2:publication_date'):
 | 
					 | 
				
			||||||
        return a.find('ns2:publication_date').text
 | 
					 | 
				
			||||||
    return ''
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class Sitemap(_Base):
 | 
					 | 
				
			||||||
    def __init__(self, url, tz=None):
 | 
					 | 
				
			||||||
        self.tz = tz
 | 
					 | 
				
			||||||
        self.sitemap_url = url
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def feed(self, excludes=None):
 | 
					 | 
				
			||||||
        links = []
 | 
					 | 
				
			||||||
        if isinstance(self.sitemap_url, str):
 | 
					 | 
				
			||||||
            links += self._get_sitemap(self.sitemap_url, excludes)
 | 
					 | 
				
			||||||
        elif isinstance(self.sitemap_url, list):
 | 
					 | 
				
			||||||
            for url in self.sitemap_url:
 | 
					 | 
				
			||||||
                links += self._get_sitemap(url, excludes)
 | 
					 | 
				
			||||||
        return list(set(links))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def _filter_links(self, links, excludes=None):
 | 
					 | 
				
			||||||
        too_old = datetime.now().timestamp() - settings.MAX_STORY_AGE
 | 
					 | 
				
			||||||
        links = list(filter(None, [a if get_sitemap_date(a) else None for a in links]))
 | 
					 | 
				
			||||||
        links = list(filter(None, [a if unix(get_sitemap_date(a)) > too_old else None for a in links]))
 | 
					 | 
				
			||||||
        links.sort(key=lambda a: unix(get_sitemap_date(a)), reverse=True)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        links = [x.find('loc').text for x in links] or []
 | 
					 | 
				
			||||||
        links = list(set(links))
 | 
					 | 
				
			||||||
        if excludes:
 | 
					 | 
				
			||||||
            links = list(filter(None, [None if any(e in link for e in excludes) else link for link in links]))
 | 
					 | 
				
			||||||
        return links
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def _get_sitemap(self, feed_url, excludes=None):
 | 
					 | 
				
			||||||
        markup = xml(lambda x: feed_url)
 | 
					 | 
				
			||||||
        if not markup: return []
 | 
					 | 
				
			||||||
        soup = BeautifulSoup(markup, features='lxml')
 | 
					 | 
				
			||||||
        links = []
 | 
					 | 
				
			||||||
        feed_urls = []
 | 
					 | 
				
			||||||
        if soup.find('sitemapindex'):
 | 
					 | 
				
			||||||
            sitemap = soup.find('sitemapindex').findAll('sitemap')
 | 
					 | 
				
			||||||
            feed_urls = list(filter(None, [a if a.find('loc') else None for a in sitemap]))
 | 
					 | 
				
			||||||
        if soup.find('urlset'):
 | 
					 | 
				
			||||||
            sitemap = soup.find('urlset').findAll('url')
 | 
					 | 
				
			||||||
            links = list(filter(None, [a if a.find('loc') else None for a in sitemap]))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        feed_urls = self._filter_links(feed_urls, excludes)
 | 
					 | 
				
			||||||
        links = self._filter_links(links, excludes)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        for url in feed_urls:
 | 
					 | 
				
			||||||
            links += self._get_sitemap(url, excludes)
 | 
					 | 
				
			||||||
        return list(set(links))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class Category(_Base):
 | 
					 | 
				
			||||||
    def __init__(self, url, tz=None):
 | 
					 | 
				
			||||||
        self.tz = tz
 | 
					 | 
				
			||||||
        self.category_url = url
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def _filter_links(self, links, category_url, excludes=None):
 | 
					 | 
				
			||||||
        links = list(filter(None, [link if link.startswith(category_url) else None for link in links]))
 | 
					 | 
				
			||||||
        links = list(filter(None, [link if link != category_url else None for link in links]))
 | 
					 | 
				
			||||||
        links = list(set(links))
 | 
					 | 
				
			||||||
        if excludes:
 | 
					 | 
				
			||||||
            links = list(filter(None, [None if any(e in link for e in excludes) else link for link in links]))
 | 
					 | 
				
			||||||
        return links
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def _get_category(self, category_url, excludes=None):
 | 
					 | 
				
			||||||
        base_url = '/'.join(category_url.split('/')[:3])
 | 
					 | 
				
			||||||
        markup = xml(lambda x: category_url)
 | 
					 | 
				
			||||||
        if not markup: return []
 | 
					 | 
				
			||||||
        soup = BeautifulSoup(markup, features='html.parser')
 | 
					 | 
				
			||||||
        links = soup.find_all('a', href=True)
 | 
					 | 
				
			||||||
        links = [link.get('href') for link in links]
 | 
					 | 
				
			||||||
        links = [f"{base_url}{link}" if link.startswith('/') else link for link in links]
 | 
					 | 
				
			||||||
        links = self._filter_links(links, category_url, excludes)
 | 
					 | 
				
			||||||
        return links
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def feed(self, excludes=None):
 | 
					 | 
				
			||||||
        links = []
 | 
					 | 
				
			||||||
        if isinstance(self.category_url, str):
 | 
					 | 
				
			||||||
            links += self._get_category(self.category_url, excludes)
 | 
					 | 
				
			||||||
        elif isinstance(self.category_url, list):
 | 
					 | 
				
			||||||
            for url in self.category_url:
 | 
					 | 
				
			||||||
                links += self._get_category(url, excludes)
 | 
					 | 
				
			||||||
        return list(set(links))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# scratchpad so I can quickly develop the parser
 | 
					 | 
				
			||||||
if __name__ == '__main__':
 | 
					 | 
				
			||||||
    print("Sitemap: The Spinoff")
 | 
					 | 
				
			||||||
    site = Sitemap("https://thespinoff.co.nz/sitemap.xml")
 | 
					 | 
				
			||||||
    excludes = [
 | 
					 | 
				
			||||||
        'thespinoff.co.nz/sitemap-misc.xml',
 | 
					 | 
				
			||||||
        'thespinoff.co.nz/sitemap-authors.xml',
 | 
					 | 
				
			||||||
        'thespinoff.co.nz/sitemap-tax-category.xml',
 | 
					 | 
				
			||||||
    ]
 | 
					 | 
				
			||||||
    posts = site.feed(excludes)
 | 
					 | 
				
			||||||
    print(posts[:5])
 | 
					 | 
				
			||||||
    print(site.story(posts[0]))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    print("Sitemap: Newshub")
 | 
					 | 
				
			||||||
    site = Sitemap([
 | 
					 | 
				
			||||||
        'https://www.newshub.co.nz/home/politics.gnewssitemap.xml',
 | 
					 | 
				
			||||||
        'https://www.newshub.co.nz/home/new-zealand.gnewssitemap.xml',
 | 
					 | 
				
			||||||
        'https://www.newshub.co.nz/home/world.gnewssitemap.xml',
 | 
					 | 
				
			||||||
        'https://www.newshub.co.nz/home/money.gnewssitemap.xml',
 | 
					 | 
				
			||||||
    ])
 | 
					 | 
				
			||||||
    posts = site.feed()
 | 
					 | 
				
			||||||
    print(posts[:5])
 | 
					 | 
				
			||||||
    print(site.story(posts[0]))
 | 
					 | 
				
			||||||
    print(site.story(posts[:-1]))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
@@ -32,11 +32,8 @@ def feed():
 | 
				
			|||||||
        return [x.id for x in reddit.subreddit(subs).hot()]
 | 
					        return [x.id for x in reddit.subreddit(subs).hot()]
 | 
				
			||||||
    except KeyboardInterrupt:
 | 
					    except KeyboardInterrupt:
 | 
				
			||||||
        raise
 | 
					        raise
 | 
				
			||||||
    except PRAWException as e:
 | 
					    except BaseException as e:
 | 
				
			||||||
        logging.error('Problem hitting reddit API: {}'.format(str(e)))
 | 
					        logging.critical('Problem hitting reddit API: {}'.format(str(e)))
 | 
				
			||||||
        return []
 | 
					 | 
				
			||||||
    except PrawcoreException as e:
 | 
					 | 
				
			||||||
        logging.error('Problem hitting reddit API: {}'.format(str(e)))
 | 
					 | 
				
			||||||
        return []
 | 
					        return []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def comment(i):
 | 
					def comment(i):
 | 
				
			||||||
@@ -59,7 +56,9 @@ def comment(i):
 | 
				
			|||||||
def story(ref):
 | 
					def story(ref):
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
        r = reddit.submission(ref)
 | 
					        r = reddit.submission(ref)
 | 
				
			||||||
        if not r: return False
 | 
					        if not r:
 | 
				
			||||||
 | 
					            logging.info('Bad Reddit API response.')
 | 
				
			||||||
 | 
					            return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        s = {}
 | 
					        s = {}
 | 
				
			||||||
        s['author'] = r.author.name if r.author else '[Deleted]'
 | 
					        s['author'] = r.author.name if r.author else '[Deleted]'
 | 
				
			||||||
@@ -73,7 +72,8 @@ def story(ref):
 | 
				
			|||||||
        s['comments'] = list(filter(bool, s['comments']))
 | 
					        s['comments'] = list(filter(bool, s['comments']))
 | 
				
			||||||
        s['num_comments'] = r.num_comments
 | 
					        s['num_comments'] = r.num_comments
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if s['score'] < settings.REDDIT_SCORE_THRESHOLD and s['num_comments'] < settings.REDDIT_COMMENT_THRESHOLD:
 | 
					        if s['score'] < 25 and s['num_comments'] < 10:
 | 
				
			||||||
 | 
					            logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
 | 
				
			||||||
            return False
 | 
					            return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if r.selftext:
 | 
					        if r.selftext:
 | 
				
			||||||
@@ -84,10 +84,10 @@ def story(ref):
 | 
				
			|||||||
    except KeyboardInterrupt:
 | 
					    except KeyboardInterrupt:
 | 
				
			||||||
        raise
 | 
					        raise
 | 
				
			||||||
    except PRAWException as e:
 | 
					    except PRAWException as e:
 | 
				
			||||||
        logging.error('Problem hitting reddit API: {}'.format(str(e)))
 | 
					        logging.critical('Problem hitting reddit API: {}'.format(str(e)))
 | 
				
			||||||
        return False
 | 
					        return False
 | 
				
			||||||
    except PrawcoreException as e:
 | 
					    except PrawcoreException as e:
 | 
				
			||||||
        logging.error('Problem hitting reddit API: {}'.format(str(e)))
 | 
					        logging.critical('Problem hitting reddit API: {}'.format(str(e)))
 | 
				
			||||||
        return False
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# scratchpad so I can quickly develop the parser
 | 
					# scratchpad so I can quickly develop the parser
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,165 +0,0 @@
 | 
				
			|||||||
import logging
 | 
					 | 
				
			||||||
logging.basicConfig(
 | 
					 | 
				
			||||||
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 | 
					 | 
				
			||||||
        level=logging.DEBUG)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
if __name__ == '__main__':
 | 
					 | 
				
			||||||
    import sys
 | 
					 | 
				
			||||||
    sys.path.insert(0,'.')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
import requests
 | 
					 | 
				
			||||||
from datetime import datetime
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from utils import clean
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
SUBSTACK_REFERER = 'https://substack.com'
 | 
					 | 
				
			||||||
SUBSTACK_API_TOP_POSTS = lambda x: "https://substack.com/api/v1/reader/top-posts"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def author_link(author_id, base_url):
 | 
					 | 
				
			||||||
    return f"{base_url}/people/{author_id}"
 | 
					 | 
				
			||||||
def api_comments(post_id, base_url):
 | 
					 | 
				
			||||||
    return f"{base_url}/api/v1/post/{post_id}/comments?all_comments=true&sort=best_first"
 | 
					 | 
				
			||||||
def api_stories(x, base_url): 
 | 
					 | 
				
			||||||
    return f"{base_url}/api/v1/archive?sort=new&search=&offset=0&limit=100"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def unix(date_str):
 | 
					 | 
				
			||||||
    return int(datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S.%fZ').timestamp())
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def api(route, ref=None, referer=None):
 | 
					 | 
				
			||||||
    headers = {'Referer': referer} if referer else None
 | 
					 | 
				
			||||||
    try:
 | 
					 | 
				
			||||||
        r = requests.get(route(ref), headers=headers, timeout=10)
 | 
					 | 
				
			||||||
        if r.status_code != 200:
 | 
					 | 
				
			||||||
            raise Exception('Bad response code ' + str(r.status_code))
 | 
					 | 
				
			||||||
        return r.json()
 | 
					 | 
				
			||||||
    except KeyboardInterrupt:
 | 
					 | 
				
			||||||
        raise
 | 
					 | 
				
			||||||
    except BaseException as e:
 | 
					 | 
				
			||||||
        logging.error('Problem hitting Substack API: {}, trying again'.format(str(e)))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    try:
 | 
					 | 
				
			||||||
        r = requests.get(route(ref), headers=headers, timeout=20)
 | 
					 | 
				
			||||||
        if r.status_code != 200:
 | 
					 | 
				
			||||||
            raise Exception('Bad response code ' + str(r.status_code))
 | 
					 | 
				
			||||||
        return r.json()
 | 
					 | 
				
			||||||
    except KeyboardInterrupt:
 | 
					 | 
				
			||||||
        raise
 | 
					 | 
				
			||||||
    except BaseException as e:
 | 
					 | 
				
			||||||
        logging.error('Problem hitting Substack API: {}'.format(str(e)))
 | 
					 | 
				
			||||||
        return False
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def comment(i):
 | 
					 | 
				
			||||||
    if 'body' not in i:
 | 
					 | 
				
			||||||
        return False
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    c = {}
 | 
					 | 
				
			||||||
    c['date'] = unix(i.get('date'))
 | 
					 | 
				
			||||||
    c['author'] = i.get('name', '')
 | 
					 | 
				
			||||||
    c['score'] = i.get('reactions').get('❤')
 | 
					 | 
				
			||||||
    c['text'] = clean(i.get('body', '') or '')
 | 
					 | 
				
			||||||
    c['comments'] = [comment(j) for j in i['children']]
 | 
					 | 
				
			||||||
    c['comments'] = list(filter(bool, c['comments']))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return c
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class Publication:
 | 
					 | 
				
			||||||
    def __init__(self, domain):
 | 
					 | 
				
			||||||
        self.BASE_DOMAIN = domain
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def feed(self):
 | 
					 | 
				
			||||||
        stories = api(lambda x: api_stories(x, self.BASE_DOMAIN), referer=self.BASE_DOMAIN)
 | 
					 | 
				
			||||||
        if not stories: return []
 | 
					 | 
				
			||||||
        stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
 | 
					 | 
				
			||||||
        return [str(i.get("id")) for i in stories or []]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def story(self, ref):
 | 
					 | 
				
			||||||
        stories = api(lambda x: api_stories(x, self.BASE_DOMAIN), referer=self.BASE_DOMAIN)
 | 
					 | 
				
			||||||
        if not stories: return False
 | 
					 | 
				
			||||||
        stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
 | 
					 | 
				
			||||||
        stories = list(filter(None, [i if str(i.get('id')) == ref else None for i in stories]))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if len(stories) == 0:
 | 
					 | 
				
			||||||
            return False
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        r = stories[0]
 | 
					 | 
				
			||||||
        if not r:
 | 
					 | 
				
			||||||
            return False
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        s = {}
 | 
					 | 
				
			||||||
        s['author'] = ''
 | 
					 | 
				
			||||||
        s['author_link'] = ''
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        s['date'] = unix(r.get('post_date'))
 | 
					 | 
				
			||||||
        s['score'] = r.get('reactions').get('❤')
 | 
					 | 
				
			||||||
        s['title'] = r.get('title', '')
 | 
					 | 
				
			||||||
        s['link'] = r.get('canonical_url', '')
 | 
					 | 
				
			||||||
        s['url'] = r.get('canonical_url', '')
 | 
					 | 
				
			||||||
        comments = api(lambda x: api_comments(x, self.BASE_DOMAIN), r.get('id'), referer=self.BASE_DOMAIN)
 | 
					 | 
				
			||||||
        s['comments'] = [comment(i) for i in comments.get('comments')]
 | 
					 | 
				
			||||||
        s['comments'] = list(filter(bool, s['comments']))
 | 
					 | 
				
			||||||
        s['num_comments'] = r.get('comment_count', 0)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        authors = list(filter(None, [self._bylines(byline) for byline in r.get('publishedBylines')]))
 | 
					 | 
				
			||||||
        if len(authors):
 | 
					 | 
				
			||||||
            s['author'] = authors[0].get('name')
 | 
					 | 
				
			||||||
            s['author_link'] = authors[0].get('link')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        return s
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def _bylines(self, b):
 | 
					 | 
				
			||||||
        if 'id' not in b:
 | 
					 | 
				
			||||||
            return None
 | 
					 | 
				
			||||||
        a = {}
 | 
					 | 
				
			||||||
        a['name'] = b.get('name')
 | 
					 | 
				
			||||||
        a['link'] = author_link(b.get('id'), self.BASE_DOMAIN)
 | 
					 | 
				
			||||||
        return a
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class Top:
 | 
					 | 
				
			||||||
    def feed(self):
 | 
					 | 
				
			||||||
        stories = api(SUBSTACK_API_TOP_POSTS, referer=SUBSTACK_REFERER)
 | 
					 | 
				
			||||||
        if not stories: return []
 | 
					 | 
				
			||||||
        stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
 | 
					 | 
				
			||||||
        return [str(i.get("id")) for i in stories or []]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def story(self, ref):
 | 
					 | 
				
			||||||
        stories = api(SUBSTACK_API_TOP_POSTS, referer=SUBSTACK_REFERER)
 | 
					 | 
				
			||||||
        if not stories: return False
 | 
					 | 
				
			||||||
        stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
 | 
					 | 
				
			||||||
        stories = list(filter(None, [i if str(i.get('id')) == ref else None for i in stories]))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if len(stories) == 0:
 | 
					 | 
				
			||||||
            return False
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        r = stories[0]
 | 
					 | 
				
			||||||
        if not r:
 | 
					 | 
				
			||||||
            return False
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        s = {}
 | 
					 | 
				
			||||||
        pub = r.get('pub')
 | 
					 | 
				
			||||||
        base_url = pub.get('base_url')
 | 
					 | 
				
			||||||
        s['author'] = pub.get('author_name')
 | 
					 | 
				
			||||||
        s['author_link'] = author_link(pub.get('author_id'), base_url)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        s['date'] = unix(r.get('post_date'))
 | 
					 | 
				
			||||||
        s['score'] = r.get('score')
 | 
					 | 
				
			||||||
        s['title'] = r.get('title', '')
 | 
					 | 
				
			||||||
        s['link'] = r.get('canonical_url', '')
 | 
					 | 
				
			||||||
        s['url'] = r.get('canonical_url', '')
 | 
					 | 
				
			||||||
        comments = api(lambda x: api_comments(x, base_url), r.get('id'), referer=SUBSTACK_REFERER)
 | 
					 | 
				
			||||||
        s['comments'] = [comment(i) for i in comments.get('comments')]
 | 
					 | 
				
			||||||
        s['comments'] = list(filter(bool, s['comments']))
 | 
					 | 
				
			||||||
        s['num_comments'] = r.get('comment_count', 0)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        return s
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
top = Top()        
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# scratchpad so I can quickly develop the parser
 | 
					 | 
				
			||||||
if __name__ == '__main__':
 | 
					 | 
				
			||||||
    top_posts = top.feed()
 | 
					 | 
				
			||||||
    print(top.story(top_posts[0]))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    webworm = Publication("https://www.webworm.co/")
 | 
					 | 
				
			||||||
    posts = webworm.feed()
 | 
					 | 
				
			||||||
    print(webworm.story(posts[0]))
 | 
					 | 
				
			||||||
@@ -34,7 +34,7 @@ def api(route):
 | 
				
			|||||||
    except KeyboardInterrupt:
 | 
					    except KeyboardInterrupt:
 | 
				
			||||||
        raise
 | 
					        raise
 | 
				
			||||||
    except BaseException as e:
 | 
					    except BaseException as e:
 | 
				
			||||||
        logging.error('Problem hitting tildes website: {}'.format(str(e)))
 | 
					        logging.critical('Problem hitting tildes website: {}'.format(str(e)))
 | 
				
			||||||
        return False
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def feed():
 | 
					def feed():
 | 
				
			||||||
@@ -71,11 +71,15 @@ def story(ref):
 | 
				
			|||||||
        html = api(SITE_LINK(group_lookup[ref], ref))
 | 
					        html = api(SITE_LINK(group_lookup[ref], ref))
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        html = api(API_ITEM(ref))
 | 
					        html = api(API_ITEM(ref))
 | 
				
			||||||
    if not html: return False
 | 
					    if not html:
 | 
				
			||||||
 | 
					        logging.info('Bad Tildes API response.')
 | 
				
			||||||
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    soup = BeautifulSoup(html, features='html.parser')
 | 
					    soup = BeautifulSoup(html, features='html.parser')
 | 
				
			||||||
    a = soup.find('article', class_='topic-full')
 | 
					    a = soup.find('article', class_='topic-full')
 | 
				
			||||||
    if a is None: return False
 | 
					    if a is None:
 | 
				
			||||||
 | 
					        logging.info('Tildes <article> element not found.')
 | 
				
			||||||
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    h = a.find('header')
 | 
					    h = a.find('header')
 | 
				
			||||||
    lu = h.find('a', class_='link-user')
 | 
					    lu = h.find('a', class_='link-user')
 | 
				
			||||||
@@ -83,6 +87,7 @@ def story(ref):
 | 
				
			|||||||
    error = a.find('div', class_='text-error')
 | 
					    error = a.find('div', class_='text-error')
 | 
				
			||||||
    if error:
 | 
					    if error:
 | 
				
			||||||
        if 'deleted' in error.string or 'removed' in error.string:
 | 
					        if 'deleted' in error.string or 'removed' in error.string:
 | 
				
			||||||
 | 
					            logging.info('Article was deleted or removed.')
 | 
				
			||||||
            return False
 | 
					            return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    s = {}
 | 
					    s = {}
 | 
				
			||||||
@@ -102,7 +107,21 @@ def story(ref):
 | 
				
			|||||||
    ch = a.find('header', class_='topic-comments-header')
 | 
					    ch = a.find('header', class_='topic-comments-header')
 | 
				
			||||||
    s['num_comments'] = int(ch.h2.string.split(' ')[0]) if ch else 0
 | 
					    s['num_comments'] = int(ch.h2.string.split(' ')[0]) if ch else 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if s['score'] < 8 and s['num_comments'] < 6:
 | 
					    if s['group'].split('.')[0] not in [
 | 
				
			||||||
 | 
					        '~arts',
 | 
				
			||||||
 | 
					        '~comp',
 | 
				
			||||||
 | 
					        '~creative',
 | 
				
			||||||
 | 
					        '~design',
 | 
				
			||||||
 | 
					        '~engineering',
 | 
				
			||||||
 | 
					        '~finance',
 | 
				
			||||||
 | 
					        '~science',
 | 
				
			||||||
 | 
					        '~tech',
 | 
				
			||||||
 | 
					    ]:
 | 
				
			||||||
 | 
					        logging.info('Group ({}) not in whitelist.'.format(s['group']))
 | 
				
			||||||
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if s['score'] < 15 and s['num_comments'] < 10:
 | 
				
			||||||
 | 
					        logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
 | 
				
			||||||
        return False
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    td = a.find('div', class_='topic-full-text')
 | 
					    td = a.find('div', class_='topic-full-text')
 | 
				
			||||||
@@ -113,7 +132,7 @@ def story(ref):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
# scratchpad so I can quickly develop the parser
 | 
					# scratchpad so I can quickly develop the parser
 | 
				
			||||||
if __name__ == '__main__':
 | 
					if __name__ == '__main__':
 | 
				
			||||||
    #print(feed())
 | 
					    print(feed())
 | 
				
			||||||
    #normal = story('gxt')
 | 
					    #normal = story('gxt')
 | 
				
			||||||
    #print(normal)
 | 
					    #print(normal)
 | 
				
			||||||
    #no_comments = story('gxr')
 | 
					    #no_comments = story('gxr')
 | 
				
			||||||
@@ -122,8 +141,8 @@ if __name__ == '__main__':
 | 
				
			|||||||
    #print(self_post)
 | 
					    #print(self_post)
 | 
				
			||||||
    #li_comment = story('gqx')
 | 
					    #li_comment = story('gqx')
 | 
				
			||||||
    #print(li_comment)
 | 
					    #print(li_comment)
 | 
				
			||||||
    broken = story('q4y')
 | 
					    #broken = story('q4y')
 | 
				
			||||||
    print(broken)
 | 
					    #print(broken)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # make sure there's no self-reference
 | 
					    # make sure there's no self-reference
 | 
				
			||||||
    #import copy
 | 
					    #import copy
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -4,7 +4,6 @@ certifi==2020.6.20
 | 
				
			|||||||
chardet==3.0.4
 | 
					chardet==3.0.4
 | 
				
			||||||
click==7.1.2
 | 
					click==7.1.2
 | 
				
			||||||
commonmark==0.9.1
 | 
					commonmark==0.9.1
 | 
				
			||||||
extruct==0.10.0
 | 
					 | 
				
			||||||
Flask==1.1.2
 | 
					Flask==1.1.2
 | 
				
			||||||
Flask-Cors==3.0.8
 | 
					Flask-Cors==3.0.8
 | 
				
			||||||
gevent==20.6.2
 | 
					gevent==20.6.2
 | 
				
			||||||
@@ -12,13 +11,11 @@ greenlet==0.4.16
 | 
				
			|||||||
idna==2.10
 | 
					idna==2.10
 | 
				
			||||||
itsdangerous==1.1.0
 | 
					itsdangerous==1.1.0
 | 
				
			||||||
Jinja2==2.11.2
 | 
					Jinja2==2.11.2
 | 
				
			||||||
lxml==4.6.1
 | 
					 | 
				
			||||||
MarkupSafe==1.1.1
 | 
					MarkupSafe==1.1.1
 | 
				
			||||||
packaging==20.4
 | 
					packaging==20.4
 | 
				
			||||||
praw==6.4.0
 | 
					praw==6.4.0
 | 
				
			||||||
prawcore==1.4.0
 | 
					prawcore==1.4.0
 | 
				
			||||||
pyparsing==2.4.7
 | 
					pyparsing==2.4.7
 | 
				
			||||||
pytz==2020.4
 | 
					 | 
				
			||||||
requests==2.24.0
 | 
					requests==2.24.0
 | 
				
			||||||
six==1.15.0
 | 
					six==1.15.0
 | 
				
			||||||
soupsieve==2.0.1
 | 
					soupsieve==2.0.1
 | 
				
			||||||
@@ -30,4 +27,3 @@ websocket-client==0.57.0
 | 
				
			|||||||
Werkzeug==1.0.1
 | 
					Werkzeug==1.0.1
 | 
				
			||||||
zope.event==4.4
 | 
					zope.event==4.4
 | 
				
			||||||
zope.interface==5.1.0
 | 
					zope.interface==5.1.0
 | 
				
			||||||
python-dateutil==2.8.1
 | 
					 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,41 +0,0 @@
 | 
				
			|||||||
import logging
 | 
					 | 
				
			||||||
logging.basicConfig(
 | 
					 | 
				
			||||||
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 | 
					 | 
				
			||||||
        level=logging.DEBUG)
 | 
					 | 
				
			||||||
import requests
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
READ_API = 'http://127.0.0.1:33843/browser/details'
 | 
					 | 
				
			||||||
READ_COMMENT__API = 'http://127.0.0.1:33843/browser/commentd'
 | 
					 | 
				
			||||||
TIMEOUT = 60
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def get_html(url):
 | 
					 | 
				
			||||||
    logging.info(f"Reader Scraper: {url}")
 | 
					 | 
				
			||||||
    details = get_details(url)
 | 
					 | 
				
			||||||
    if not details:
 | 
					 | 
				
			||||||
        return ''
 | 
					 | 
				
			||||||
    return details['content']
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def get_details(url):
 | 
					 | 
				
			||||||
    try:
 | 
					 | 
				
			||||||
        r = requests.post(READ_API, data=dict(url=url), timeout=TIMEOUT)
 | 
					 | 
				
			||||||
        if r.status_code != 200:
 | 
					 | 
				
			||||||
            raise Exception('Bad response code ' + str(r.status_code))
 | 
					 | 
				
			||||||
        return r.json()
 | 
					 | 
				
			||||||
    except KeyboardInterrupt:
 | 
					 | 
				
			||||||
        raise
 | 
					 | 
				
			||||||
    except BaseException as e:
 | 
					 | 
				
			||||||
        logging.error('Problem Scraping article: {}'.format(str(e)))
 | 
					 | 
				
			||||||
        return None
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def get_comments(url):
 | 
					 | 
				
			||||||
    try:
 | 
					 | 
				
			||||||
        r = requests.post(READ_COMMENT_API, data=dict(url=url), timeout=TIMEOUT)
 | 
					 | 
				
			||||||
        if r.status_code != 200:
 | 
					 | 
				
			||||||
            raise Exception('Bad response code ' + str(r.status_code))
 | 
					 | 
				
			||||||
        return r.json()
 | 
					 | 
				
			||||||
    except KeyboardInterrupt:
 | 
					 | 
				
			||||||
        raise
 | 
					 | 
				
			||||||
    except BaseException as e:
 | 
					 | 
				
			||||||
        logging.error('Problem getting comments for article: {}'.format(str(e)))
 | 
					 | 
				
			||||||
        return None
 | 
					 | 
				
			||||||
@@ -1,41 +0,0 @@
 | 
				
			|||||||
import logging
 | 
					 | 
				
			||||||
logging.basicConfig(
 | 
					 | 
				
			||||||
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 | 
					 | 
				
			||||||
        level=logging.DEBUG)
 | 
					 | 
				
			||||||
import requests
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
DECLUTTER_API = 'https://declutter.1j.nz/details'
 | 
					 | 
				
			||||||
DECLUTTER_COMMENT_API = 'https://declutter.1j.nz/comments'
 | 
					 | 
				
			||||||
TIMEOUT = 30
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def get_html(url):
 | 
					 | 
				
			||||||
    logging.info(f"Declutter Scraper: {url}")
 | 
					 | 
				
			||||||
    details = get_details(url)
 | 
					 | 
				
			||||||
    if not details:
 | 
					 | 
				
			||||||
        return ''
 | 
					 | 
				
			||||||
    return details['content']
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def get_details(url):
 | 
					 | 
				
			||||||
    try:
 | 
					 | 
				
			||||||
        r = requests.post(DECLUTTER_API, data=dict(url=url), timeout=TIMEOUT)
 | 
					 | 
				
			||||||
        if r.status_code != 200:
 | 
					 | 
				
			||||||
            raise Exception('Bad response code ' + str(r.status_code))
 | 
					 | 
				
			||||||
        return r.json()
 | 
					 | 
				
			||||||
    except KeyboardInterrupt:
 | 
					 | 
				
			||||||
        raise
 | 
					 | 
				
			||||||
    except BaseException as e:
 | 
					 | 
				
			||||||
        logging.error('Problem decluttering article: {}'.format(str(e)))
 | 
					 | 
				
			||||||
        return None
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def get_comments(url):
 | 
					 | 
				
			||||||
    try:
 | 
					 | 
				
			||||||
        r = requests.post(DECLUTTER_COMMENT_API, data=dict(url=url), timeout=TIMEOUT)
 | 
					 | 
				
			||||||
        if r.status_code != 200:
 | 
					 | 
				
			||||||
            raise Exception('Bad response code ' + str(r.status_code))
 | 
					 | 
				
			||||||
        return r.json()
 | 
					 | 
				
			||||||
    except KeyboardInterrupt:
 | 
					 | 
				
			||||||
        raise
 | 
					 | 
				
			||||||
    except BaseException as e:
 | 
					 | 
				
			||||||
        logging.error('Problem getting comments for article: {}'.format(str(e)))
 | 
					 | 
				
			||||||
        return None
 | 
					 | 
				
			||||||
@@ -1,27 +0,0 @@
 | 
				
			|||||||
import logging
 | 
					 | 
				
			||||||
logging.basicConfig(
 | 
					 | 
				
			||||||
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 | 
					 | 
				
			||||||
        level=logging.DEBUG)
 | 
					 | 
				
			||||||
import requests
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
READ_API = 'http://127.0.0.1:33843/details'
 | 
					 | 
				
			||||||
TIMEOUT = 20
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def get_html(url):
 | 
					 | 
				
			||||||
    logging.info(f"Local Scraper: {url}")
 | 
					 | 
				
			||||||
    details = get_details(url)
 | 
					 | 
				
			||||||
    if not details:
 | 
					 | 
				
			||||||
        return ''
 | 
					 | 
				
			||||||
    return details['content']
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def get_details(url):
 | 
					 | 
				
			||||||
    try:
 | 
					 | 
				
			||||||
        r = requests.post(READ_API, data=dict(url=url), timeout=TIMEOUT)
 | 
					 | 
				
			||||||
        if r.status_code != 200:
 | 
					 | 
				
			||||||
            raise Exception('Bad response code ' + str(r.status_code))
 | 
					 | 
				
			||||||
        return r.json()
 | 
					 | 
				
			||||||
    except KeyboardInterrupt:
 | 
					 | 
				
			||||||
        raise
 | 
					 | 
				
			||||||
    except BaseException as e:
 | 
					 | 
				
			||||||
        logging.error('Problem getting article: {}'.format(str(e)))
 | 
					 | 
				
			||||||
        return None
 | 
					 | 
				
			||||||
@@ -1,37 +0,0 @@
 | 
				
			|||||||
import logging
 | 
					 | 
				
			||||||
logging.basicConfig(
 | 
					 | 
				
			||||||
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 | 
					 | 
				
			||||||
        level=logging.DEBUG)
 | 
					 | 
				
			||||||
import requests
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
OUTLINE_REFERER = 'https://outline.com/'
 | 
					 | 
				
			||||||
OUTLINE_API = 'https://api.outline.com/v3/parse_article'
 | 
					 | 
				
			||||||
TIMEOUT = 20
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def get_html(url):
 | 
					 | 
				
			||||||
    details = get_details(url)
 | 
					 | 
				
			||||||
    if not details:
 | 
					 | 
				
			||||||
        return ''
 | 
					 | 
				
			||||||
    return details['html']
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def get_details(url):
 | 
					 | 
				
			||||||
    try:
 | 
					 | 
				
			||||||
        logging.info(f"Outline Scraper: {url}")
 | 
					 | 
				
			||||||
        params = {'source_url': url}
 | 
					 | 
				
			||||||
        headers = {'Referer': OUTLINE_REFERER}
 | 
					 | 
				
			||||||
        r = requests.get(OUTLINE_API, params=params, headers=headers, timeout=TIMEOUT)
 | 
					 | 
				
			||||||
        if r.status_code == 429:
 | 
					 | 
				
			||||||
            logging.info('Rate limited by outline, sleeping 30s and skipping...')
 | 
					 | 
				
			||||||
            time.sleep(30)
 | 
					 | 
				
			||||||
            return None
 | 
					 | 
				
			||||||
        if r.status_code != 200:
 | 
					 | 
				
			||||||
            raise Exception('Bad response code ' + str(r.status_code))
 | 
					 | 
				
			||||||
        data = r.json()['data']
 | 
					 | 
				
			||||||
        if 'URL is not supported by Outline' in data['html']:
 | 
					 | 
				
			||||||
            raise Exception('URL not supported by Outline')
 | 
					 | 
				
			||||||
        return data
 | 
					 | 
				
			||||||
    except KeyboardInterrupt:
 | 
					 | 
				
			||||||
        raise
 | 
					 | 
				
			||||||
    except BaseException as e:
 | 
					 | 
				
			||||||
        logging.error('Problem outlining article: {}'.format(str(e)))
 | 
					 | 
				
			||||||
        return None
 | 
					 | 
				
			||||||
@@ -1,6 +1,8 @@
 | 
				
			|||||||
import database
 | 
					import database
 | 
				
			||||||
import search
 | 
					import search
 | 
				
			||||||
import sys
 | 
					import sys
 | 
				
			||||||
 | 
					import settings
 | 
				
			||||||
 | 
					import logging
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import json
 | 
					import json
 | 
				
			||||||
import requests
 | 
					import requests
 | 
				
			||||||
@@ -21,7 +23,7 @@ def database_del_story(sid):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
def search_del_story(sid):
 | 
					def search_del_story(sid):
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
        r = requests.delete(search.MEILI_URL + 'indexes/qotnews/documents/'+sid, timeout=2)
 | 
					        r = requests.delete(settings.MEILI_URL + 'indexes/qotnews/documents/'+sid, timeout=2)
 | 
				
			||||||
        if r.status_code != 202:
 | 
					        if r.status_code != 202:
 | 
				
			||||||
            raise Exception('Bad response code ' + str(r.status_code))
 | 
					            raise Exception('Bad response code ' + str(r.status_code))
 | 
				
			||||||
        return r.json()
 | 
					        return r.json()
 | 
				
			||||||
							
								
								
									
										58
									
								
								apiserver/scripts/fix-stories.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								apiserver/scripts/fix-stories.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,58 @@
 | 
				
			|||||||
 | 
					import time
 | 
				
			||||||
 | 
					import json
 | 
				
			||||||
 | 
					import logging
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import feed
 | 
				
			||||||
 | 
					import database
 | 
				
			||||||
 | 
					import search
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					database.init()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def fix_gzip_bug(story_list):
 | 
				
			||||||
 | 
					    FIX_THRESHOLD = 150
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    count = 1
 | 
				
			||||||
 | 
					    for sid in story_list:
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            sid = sid[0]
 | 
				
			||||||
 | 
					            story = database.get_story(sid)
 | 
				
			||||||
 | 
					            full_json = json.loads(story.full_json)
 | 
				
			||||||
 | 
					            meta_json = json.loads(story.meta_json)
 | 
				
			||||||
 | 
					            text = full_json.get('text', '')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            count = text.count('<EFBFBD>')
 | 
				
			||||||
 | 
					            if not count: continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            ratio = count / len(text) * 1000
 | 
				
			||||||
 | 
					            print('Bad story:', sid, 'Num ?:', count, 'Ratio:', ratio)
 | 
				
			||||||
 | 
					            if ratio < FIX_THRESHOLD: continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            print('Attempting to fix...')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            valid = feed.update_story(meta_json, is_manual=True)
 | 
				
			||||||
 | 
					            if valid:
 | 
				
			||||||
 | 
					                database.put_story(meta_json)
 | 
				
			||||||
 | 
					                search.put_story(meta_json)
 | 
				
			||||||
 | 
					                print('Success')
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                print('Story was not valid')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            time.sleep(3)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        except KeyboardInterrupt:
 | 
				
			||||||
 | 
					            raise
 | 
				
			||||||
 | 
					        except BaseException as e:
 | 
				
			||||||
 | 
					            logging.exception(e)
 | 
				
			||||||
 | 
					            breakpoint()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == '__main__':
 | 
				
			||||||
 | 
					    num_stories = database.count_stories()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print('Fix {} stories?'.format(num_stories))
 | 
				
			||||||
 | 
					    print('Press ENTER to continue, ctrl-c to cancel')
 | 
				
			||||||
 | 
					    input()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    story_list = database.get_story_list()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fix_gzip_bug(story_list)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										62
									
								
								apiserver/scripts/reindex.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										62
									
								
								apiserver/scripts/reindex.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,62 @@
 | 
				
			|||||||
 | 
					import logging
 | 
				
			||||||
 | 
					logging.basicConfig(
 | 
				
			||||||
 | 
					        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 | 
				
			||||||
 | 
					        level=logging.INFO)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import database
 | 
				
			||||||
 | 
					from sqlalchemy import select
 | 
				
			||||||
 | 
					import search
 | 
				
			||||||
 | 
					import sys
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import time
 | 
				
			||||||
 | 
					import json
 | 
				
			||||||
 | 
					import requests
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					database.init()
 | 
				
			||||||
 | 
					search.init()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					BATCH_SIZE = 5000
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def put_stories(stories):
 | 
				
			||||||
 | 
					    return search.meili_api(requests.post, 'indexes/qotnews/documents', stories)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_update(update_id):
 | 
				
			||||||
 | 
					    return search.meili_api(requests.get, 'tasks/{}'.format(update_id))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == '__main__':
 | 
				
			||||||
 | 
					    num_stories = database.count_stories()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print('Reindex {} stories?'.format(num_stories))
 | 
				
			||||||
 | 
					    print('Press ENTER to continue, ctrl-c to cancel')
 | 
				
			||||||
 | 
					    input()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    story_list = database.get_story_list()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    count = 1
 | 
				
			||||||
 | 
					    while len(story_list):
 | 
				
			||||||
 | 
					        stories = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for _ in range(BATCH_SIZE):
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
 | 
					                sid = story_list.pop()
 | 
				
			||||||
 | 
					            except IndexError:
 | 
				
			||||||
 | 
					                break
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            story = database.get_story(sid)
 | 
				
			||||||
 | 
					            print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title))
 | 
				
			||||||
 | 
					            story_obj = json.loads(story.meta_json)
 | 
				
			||||||
 | 
					            stories.append(story_obj)
 | 
				
			||||||
 | 
					            count += 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        res = put_stories(stories)
 | 
				
			||||||
 | 
					        update_id = res['uid']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        print('Waiting for processing', end='')
 | 
				
			||||||
 | 
					        while get_update(update_id)['status'] != 'succeeded':
 | 
				
			||||||
 | 
					            time.sleep(0.5)
 | 
				
			||||||
 | 
					            print('.', end='', flush=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        print()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print('Done.')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										23
									
								
								apiserver/scripts/tests.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								apiserver/scripts/tests.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,23 @@
 | 
				
			|||||||
 | 
					import time
 | 
				
			||||||
 | 
					import requests
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def test_search_api():
 | 
				
			||||||
 | 
					    num_tests = 100
 | 
				
			||||||
 | 
					    total_time = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for i in range(num_tests):
 | 
				
			||||||
 | 
					        start = time.time()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        res = requests.get('http://127.0.0.1:33842/api/search?q=iphone')
 | 
				
			||||||
 | 
					        res.raise_for_status()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        duration = time.time() - start
 | 
				
			||||||
 | 
					        total_time += duration
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    avg_time = total_time / num_tests
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print('Average search time:', avg_time)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == '__main__':
 | 
				
			||||||
 | 
					    test_search_api()
 | 
				
			||||||
@@ -4,83 +4,62 @@ logging.basicConfig(
 | 
				
			|||||||
        level=logging.DEBUG)
 | 
					        level=logging.DEBUG)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import requests
 | 
					import requests
 | 
				
			||||||
 | 
					import settings
 | 
				
			||||||
 | 
					
 | 
				
			||||||
MEILI_URL = 'http://127.0.0.1:7700/'
 | 
					SEARCH_ENABLED = bool(settings.MEILI_URL)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def meili_api(method, route, json=None, params=None, parse_json=True):
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        r = method(settings.MEILI_URL + route, json=json, params=params, timeout=4)
 | 
				
			||||||
 | 
					        if r.status_code > 299:
 | 
				
			||||||
 | 
					            raise Exception('Bad response code ' + str(r.status_code))
 | 
				
			||||||
 | 
					        if parse_json:
 | 
				
			||||||
 | 
					            return r.json()
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            r.encoding = 'utf-8'
 | 
				
			||||||
 | 
					            return r.text
 | 
				
			||||||
 | 
					    except KeyboardInterrupt:
 | 
				
			||||||
 | 
					        raise
 | 
				
			||||||
 | 
					    except BaseException as e:
 | 
				
			||||||
 | 
					        logging.error('Problem with MeiliSearch api route: %s: %s', route, str(e))
 | 
				
			||||||
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def create_index():
 | 
					def create_index():
 | 
				
			||||||
    try:
 | 
					    json = dict(uid='qotnews', primaryKey='id')
 | 
				
			||||||
        json = dict(name='qotnews', uid='qotnews')
 | 
					    return meili_api(requests.post, 'indexes', json=json)
 | 
				
			||||||
        r = requests.post(MEILI_URL + 'indexes', json=json, timeout=2)
 | 
					 | 
				
			||||||
        if r.status_code != 201:
 | 
					 | 
				
			||||||
            raise Exception('Bad response code ' + str(r.status_code))
 | 
					 | 
				
			||||||
        return r.json()
 | 
					 | 
				
			||||||
    except KeyboardInterrupt:
 | 
					 | 
				
			||||||
        raise
 | 
					 | 
				
			||||||
    except BaseException as e:
 | 
					 | 
				
			||||||
        logging.error('Problem creating MeiliSearch index: {}'.format(str(e)))
 | 
					 | 
				
			||||||
        return False
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
def update_rankings():
 | 
					def update_rankings():
 | 
				
			||||||
    try:
 | 
					    json = ['typo', 'words', 'proximity', 'date:desc', 'exactness']
 | 
				
			||||||
        json = ['typo', 'words', 'proximity', 'attribute', 'desc(date)', 'wordsPosition', 'exactness']
 | 
					    return meili_api(requests.post, 'indexes/qotnews/settings/ranking-rules', json=json)
 | 
				
			||||||
        r = requests.post(MEILI_URL + 'indexes/qotnews/settings/ranking-rules', json=json, timeout=2)
 | 
					 | 
				
			||||||
        if r.status_code != 202:
 | 
					 | 
				
			||||||
            raise Exception('Bad response code ' + str(r.status_code))
 | 
					 | 
				
			||||||
        return r.json()
 | 
					 | 
				
			||||||
    except KeyboardInterrupt:
 | 
					 | 
				
			||||||
        raise
 | 
					 | 
				
			||||||
    except BaseException as e:
 | 
					 | 
				
			||||||
        logging.error('Problem setting MeiliSearch ranking rules: {}'.format(str(e)))
 | 
					 | 
				
			||||||
        return False
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
def update_attributes():
 | 
					def update_attributes():
 | 
				
			||||||
    try:
 | 
					    json = ['title', 'url', 'author']
 | 
				
			||||||
        json = ['title', 'url', 'author', 'link', 'id', 'source']
 | 
					    r = meili_api(requests.post, 'indexes/qotnews/settings/searchable-attributes', json=json)
 | 
				
			||||||
        r = requests.post(MEILI_URL + 'indexes/qotnews/settings/searchable-attributes', json=json, timeout=2)
 | 
					    json = ['id', 'ref', 'source', 'author', 'author_link', 'score', 'date', 'title', 'link', 'url', 'num_comments']
 | 
				
			||||||
        if r.status_code != 202:
 | 
					    r = meili_api(requests.post, 'indexes/qotnews/settings/displayed-attributes', json=json)
 | 
				
			||||||
            raise Exception('Bad response code ' + str(r.status_code))
 | 
					    return r
 | 
				
			||||||
        requests.delete(MEILI_URL + 'indexes/qotnews/settings/displayed-attributes', timeout=2)
 | 
					 | 
				
			||||||
        return r.json()
 | 
					 | 
				
			||||||
    except KeyboardInterrupt:
 | 
					 | 
				
			||||||
        raise
 | 
					 | 
				
			||||||
    except BaseException as e:
 | 
					 | 
				
			||||||
        logging.error('Problem setting MeiliSearch searchable attributes: {}'.format(str(e)))
 | 
					 | 
				
			||||||
        return False
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
def init():
 | 
					def init():
 | 
				
			||||||
    create_index()
 | 
					    if not SEARCH_ENABLED:
 | 
				
			||||||
 | 
					        logging.info('Search is not enabled, skipping init.')
 | 
				
			||||||
 | 
					        return
 | 
				
			||||||
 | 
					    print(create_index())
 | 
				
			||||||
    update_rankings()
 | 
					    update_rankings()
 | 
				
			||||||
    update_attributes()
 | 
					    update_attributes()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def put_story(story):
 | 
					def put_story(story):
 | 
				
			||||||
    story = story.copy()
 | 
					    if not SEARCH_ENABLED: return
 | 
				
			||||||
    story.pop('text', None)
 | 
					    return meili_api(requests.post, 'indexes/qotnews/documents', [story])
 | 
				
			||||||
    story.pop('comments', None)
 | 
					 | 
				
			||||||
    try:
 | 
					 | 
				
			||||||
        r = requests.post(MEILI_URL + 'indexes/qotnews/documents', json=[story], timeout=2)
 | 
					 | 
				
			||||||
        if r.status_code != 202:
 | 
					 | 
				
			||||||
            raise Exception('Bad response code ' + str(r.status_code))
 | 
					 | 
				
			||||||
        return r.json()
 | 
					 | 
				
			||||||
    except KeyboardInterrupt:
 | 
					 | 
				
			||||||
        raise
 | 
					 | 
				
			||||||
    except BaseException as e:
 | 
					 | 
				
			||||||
        logging.error('Problem putting MeiliSearch story: {}'.format(str(e)))
 | 
					 | 
				
			||||||
        return False
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
def search(q):
 | 
					def search(q):
 | 
				
			||||||
    try:
 | 
					    if not SEARCH_ENABLED: return []
 | 
				
			||||||
        params = dict(q=q, limit=250)
 | 
					    params = dict(q=q, limit=settings.FEED_LENGTH)
 | 
				
			||||||
        r = requests.get(MEILI_URL + 'indexes/qotnews/search', params=params, timeout=2)
 | 
					    r = meili_api(requests.get, 'indexes/qotnews/search', params=params, parse_json=False)
 | 
				
			||||||
        if r.status_code != 200:
 | 
					    return r
 | 
				
			||||||
            raise Exception('Bad response code ' + str(r.status_code))
 | 
					 | 
				
			||||||
        return r.json()['hits']
 | 
					 | 
				
			||||||
    except KeyboardInterrupt:
 | 
					 | 
				
			||||||
        raise
 | 
					 | 
				
			||||||
    except BaseException as e:
 | 
					 | 
				
			||||||
        logging.error('Problem searching MeiliSearch: {}'.format(str(e)))
 | 
					 | 
				
			||||||
        return False
 | 
					 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
if __name__ == '__main__':
 | 
					if __name__ == '__main__':
 | 
				
			||||||
    create_index()
 | 
					    init()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    print(search('the'))
 | 
					    print(update_rankings())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print(search('facebook'))
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -28,6 +28,8 @@ from flask_cors import CORS
 | 
				
			|||||||
database.init()
 | 
					database.init()
 | 
				
			||||||
search.init()
 | 
					search.init()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					news_index = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def new_id():
 | 
					def new_id():
 | 
				
			||||||
    nid = gen_rand_id()
 | 
					    nid = gen_rand_id()
 | 
				
			||||||
    while database.get_story(nid):
 | 
					    while database.get_story(nid):
 | 
				
			||||||
@@ -40,8 +42,11 @@ cors = CORS(flask_app)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
@flask_app.route('/api')
 | 
					@flask_app.route('/api')
 | 
				
			||||||
def api():
 | 
					def api():
 | 
				
			||||||
    stories = database.get_stories(settings.MAX_STORY_AGE)
 | 
					    skip = request.args.get('skip', 0)
 | 
				
			||||||
    res = Response(json.dumps({"stories": stories}))
 | 
					    limit = request.args.get('limit', settings.FEED_LENGTH)
 | 
				
			||||||
 | 
					    stories = database.get_stories(limit, skip)
 | 
				
			||||||
 | 
					    # hacky nested json
 | 
				
			||||||
 | 
					    res = Response('{"stories":[' + ','.join(stories) + ']}')
 | 
				
			||||||
    res.headers['content-type'] = 'application/json'
 | 
					    res.headers['content-type'] = 'application/json'
 | 
				
			||||||
    return res
 | 
					    return res
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -51,8 +56,10 @@ def apisearch():
 | 
				
			|||||||
    if len(q) >= 3:
 | 
					    if len(q) >= 3:
 | 
				
			||||||
        results = search.search(q)
 | 
					        results = search.search(q)
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        results = []
 | 
					        results = '[]'
 | 
				
			||||||
    return dict(results=results)
 | 
					    res = Response(results)
 | 
				
			||||||
 | 
					    res.headers['content-type'] = 'application/json'
 | 
				
			||||||
 | 
					    return res
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@flask_app.route('/api/submit', methods=['POST'], strict_slashes=False)
 | 
					@flask_app.route('/api/submit', methods=['POST'], strict_slashes=False)
 | 
				
			||||||
def submit():
 | 
					def submit():
 | 
				
			||||||
@@ -60,6 +67,8 @@ def submit():
 | 
				
			|||||||
        url = request.form['url']
 | 
					        url = request.form['url']
 | 
				
			||||||
        nid = new_id()
 | 
					        nid = new_id()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        logging.info('Manual submission: ' + url)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        parse = urlparse(url)
 | 
					        parse = urlparse(url)
 | 
				
			||||||
        if 'news.ycombinator.com' in parse.hostname:
 | 
					        if 'news.ycombinator.com' in parse.hostname:
 | 
				
			||||||
            source = 'hackernews'
 | 
					            source = 'hackernews'
 | 
				
			||||||
@@ -67,10 +76,13 @@ def submit():
 | 
				
			|||||||
        elif 'tildes.net' in parse.hostname and '~' in url:
 | 
					        elif 'tildes.net' in parse.hostname and '~' in url:
 | 
				
			||||||
            source = 'tildes'
 | 
					            source = 'tildes'
 | 
				
			||||||
            ref = parse.path.split('/')[2]
 | 
					            ref = parse.path.split('/')[2]
 | 
				
			||||||
 | 
					        elif 'lobste.rs' in parse.hostname and '/s/' in url:
 | 
				
			||||||
 | 
					            source = 'lobsters'
 | 
				
			||||||
 | 
					            ref = parse.path.split('/')[2]
 | 
				
			||||||
        elif 'reddit.com' in parse.hostname and 'comments' in url:
 | 
					        elif 'reddit.com' in parse.hostname and 'comments' in url:
 | 
				
			||||||
            source = 'reddit'
 | 
					            source = 'reddit'
 | 
				
			||||||
            ref = parse.path.split('/')[4]
 | 
					            ref = parse.path.split('/')[4]
 | 
				
			||||||
        elif settings.HOSTNAME in parse.hostname:
 | 
					        elif 'news.t0.vc' in parse.hostname:
 | 
				
			||||||
            raise Exception('Invalid article')
 | 
					            raise Exception('Invalid article')
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            source = 'manual'
 | 
					            source = 'manual'
 | 
				
			||||||
@@ -99,9 +111,8 @@ def submit():
 | 
				
			|||||||
def story(sid):
 | 
					def story(sid):
 | 
				
			||||||
    story = database.get_story(sid)
 | 
					    story = database.get_story(sid)
 | 
				
			||||||
    if story:
 | 
					    if story:
 | 
				
			||||||
        related = database.get_stories_by_url(story.meta['url'])
 | 
					        # hacky nested json
 | 
				
			||||||
        related = [r.meta for r in related]
 | 
					        res = Response('{"story":' + story.full_json + '}')
 | 
				
			||||||
        res = Response(json.dumps({"story": story.data, "related": related}))
 | 
					 | 
				
			||||||
        res.headers['content-type'] = 'application/json'
 | 
					        res.headers['content-type'] = 'application/json'
 | 
				
			||||||
        return res
 | 
					        return res
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
@@ -111,9 +122,11 @@ def story(sid):
 | 
				
			|||||||
@flask_app.route('/search')
 | 
					@flask_app.route('/search')
 | 
				
			||||||
def index():
 | 
					def index():
 | 
				
			||||||
    return render_template('index.html',
 | 
					    return render_template('index.html',
 | 
				
			||||||
            title='Feed',
 | 
					        title='QotNews',
 | 
				
			||||||
            url=settings.HOSTNAME,
 | 
					        url='news.t0.vc',
 | 
				
			||||||
            description='Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode')
 | 
					        description='Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode',
 | 
				
			||||||
 | 
					        robots='index',
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@flask_app.route('/<sid>', strict_slashes=False)
 | 
					@flask_app.route('/<sid>', strict_slashes=False)
 | 
				
			||||||
@flask_app.route('/<sid>/c', strict_slashes=False)
 | 
					@flask_app.route('/<sid>/c', strict_slashes=False)
 | 
				
			||||||
@@ -125,7 +138,7 @@ def static_story(sid):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    story = database.get_story(sid)
 | 
					    story = database.get_story(sid)
 | 
				
			||||||
    if not story: return abort(404)
 | 
					    if not story: return abort(404)
 | 
				
			||||||
    story = story.data
 | 
					    story = json.loads(story.full_json)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    score = story['score']
 | 
					    score = story['score']
 | 
				
			||||||
    num_comments = story['num_comments']
 | 
					    num_comments = story['num_comments']
 | 
				
			||||||
@@ -138,30 +151,45 @@ def static_story(sid):
 | 
				
			|||||||
    url = url.replace('www.', '')
 | 
					    url = url.replace('www.', '')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return render_template('index.html',
 | 
					    return render_template('index.html',
 | 
				
			||||||
            title=story['title'],
 | 
					        title=story['title'] + ' | QotNews',
 | 
				
			||||||
        url=url,
 | 
					        url=url,
 | 
				
			||||||
            description=description)
 | 
					        description=description,
 | 
				
			||||||
 | 
					        robots='noindex',
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
http_server = WSGIServer(('', 33842), flask_app)
 | 
					http_server = WSGIServer(('', 33842), flask_app)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def _add_new_refs():
 | 
					def feed_thread():
 | 
				
			||||||
    for ref, source in feed.get_list():
 | 
					    global news_index
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        while True:
 | 
				
			||||||
 | 
					            # onboard new stories
 | 
				
			||||||
 | 
					            if news_index == 0:
 | 
				
			||||||
 | 
					                for ref, source in feed.list():
 | 
				
			||||||
                    if database.get_story_by_ref(ref):
 | 
					                    if database.get_story_by_ref(ref):
 | 
				
			||||||
                        continue
 | 
					                        continue
 | 
				
			||||||
                    try:
 | 
					                    try:
 | 
				
			||||||
                        nid = new_id()
 | 
					                        nid = new_id()
 | 
				
			||||||
 | 
					                        logging.info('Adding ref: {}, id: {}, source: {}'.format(ref, nid, source))
 | 
				
			||||||
                        database.put_ref(ref, nid, source)
 | 
					                        database.put_ref(ref, nid, source)
 | 
				
			||||||
            logging.info('Added ref ' + ref)
 | 
					 | 
				
			||||||
                    except database.IntegrityError:
 | 
					                    except database.IntegrityError:
 | 
				
			||||||
 | 
					                        logging.info('Already have ID / ref, skipping.')
 | 
				
			||||||
                        continue
 | 
					                        continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def _update_current_story(item):
 | 
					            ref_list = database.get_reflist(settings.FEED_LENGTH)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            # update current stories
 | 
				
			||||||
 | 
					            if news_index < len(ref_list):
 | 
				
			||||||
 | 
					                item = ref_list[news_index]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                try:
 | 
					                try:
 | 
				
			||||||
        story = database.get_story(item['sid']).data
 | 
					                    story_json = database.get_story(item['sid']).full_json
 | 
				
			||||||
 | 
					                    story = json.loads(story_json)
 | 
				
			||||||
                except AttributeError:
 | 
					                except AttributeError:
 | 
				
			||||||
                    story = dict(id=item['sid'], ref=item['ref'], source=item['source'])
 | 
					                    story = dict(id=item['sid'], ref=item['ref'], source=item['source'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    logging.info('Updating story: {}'.format(str(story['ref'])))
 | 
					                logging.info('Updating {} story: {}, index: {}'.format(story['source'], story['ref'], news_index))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                valid = feed.update_story(story)
 | 
					                valid = feed.update_story(story)
 | 
				
			||||||
                if valid:
 | 
					                if valid:
 | 
				
			||||||
@@ -170,33 +198,24 @@ def _update_current_story(item):
 | 
				
			|||||||
                else:
 | 
					                else:
 | 
				
			||||||
                    database.del_ref(item['ref'])
 | 
					                    database.del_ref(item['ref'])
 | 
				
			||||||
                    logging.info('Removed ref {}'.format(item['ref']))
 | 
					                    logging.info('Removed ref {}'.format(item['ref']))
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
def feed_thread():
 | 
					                logging.info('Skipping index: ' + str(news_index))
 | 
				
			||||||
    ref_list = []
 | 
					 | 
				
			||||||
    try:
 | 
					 | 
				
			||||||
        while True:
 | 
					 | 
				
			||||||
            # onboard new stories
 | 
					 | 
				
			||||||
            if not len(ref_list):
 | 
					 | 
				
			||||||
                _add_new_refs()
 | 
					 | 
				
			||||||
                ref_list = database.get_reflist()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            # update current stories
 | 
					 | 
				
			||||||
            if len(ref_list):
 | 
					 | 
				
			||||||
                item = ref_list.pop(0)
 | 
					 | 
				
			||||||
                _update_current_story(item)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
            gevent.sleep(6)
 | 
					            gevent.sleep(6)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            news_index += 1
 | 
				
			||||||
 | 
					            if news_index == settings.FEED_LENGTH: news_index = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    except KeyboardInterrupt:
 | 
					    except KeyboardInterrupt:
 | 
				
			||||||
        logging.info('Ending feed thread...')
 | 
					        logging.info('Ending feed thread...')
 | 
				
			||||||
    except ValueError as e:
 | 
					    except ValueError as e:
 | 
				
			||||||
        logging.error('feed_thread error: {} {}'.format(e.__class__.__name__, e))
 | 
					        logging.critical('feed_thread error: {} {}'.format(e.__class__.__name__, e))
 | 
				
			||||||
        http_server.stop()
 | 
					        http_server.stop()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
print('Starting Feed thread...')
 | 
					logging.info('Starting Feed thread...')
 | 
				
			||||||
gevent.spawn(feed_thread)
 | 
					gevent.spawn(feed_thread)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
print('Starting HTTP thread...')
 | 
					logging.info('Starting HTTP thread...')
 | 
				
			||||||
try:
 | 
					try:
 | 
				
			||||||
    http_server.serve_forever()
 | 
					    http_server.serve_forever()
 | 
				
			||||||
except KeyboardInterrupt:
 | 
					except KeyboardInterrupt:
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,29 +1,23 @@
 | 
				
			|||||||
# QotNews settings
 | 
					# QotNews settings
 | 
				
			||||||
# edit this file and save it as settings.py
 | 
					# edit this file and save it as settings.py
 | 
				
			||||||
 | 
					
 | 
				
			||||||
HOSTNAME = 'news.t0.vc'
 | 
					 | 
				
			||||||
MAX_STORY_AGE = 3*24*60*60
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Feed Lengths
 | 
					# Feed Lengths
 | 
				
			||||||
# Number of top items from each site to pull
 | 
					# Number of top items from each site to pull
 | 
				
			||||||
# set to 0 to disable that site
 | 
					# set to 0 to disable that site
 | 
				
			||||||
 | 
					FEED_LENGTH = 75
 | 
				
			||||||
NUM_HACKERNEWS = 15
 | 
					NUM_HACKERNEWS = 15
 | 
				
			||||||
NUM_REDDIT = 10
 | 
					NUM_LOBSTERS = 10
 | 
				
			||||||
 | 
					NUM_REDDIT = 15
 | 
				
			||||||
NUM_TILDES = 5
 | 
					NUM_TILDES = 5
 | 
				
			||||||
NUM_SUBSTACK = 10
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
SITEMAP = {}
 | 
					# Meilisearch server URL
 | 
				
			||||||
# SITEMAP['nzherald'] = { 'url': "https://www.nzherald.co.nz/arcio/news-sitemap/", 'count': 10},
 | 
					# Leave blank if not using search
 | 
				
			||||||
# SITEMAP['stuff'] = { 'url': "https://www.stuff.co.nz/sitemap.xml", 'count': 10},
 | 
					#MEILI_URL = 'http://127.0.0.1:7700/'
 | 
				
			||||||
 | 
					MEILI_URL = ''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
SUBSTACK = {}
 | 
					# Readerserver URL
 | 
				
			||||||
# SUBSTACK['webworm'] = { 'url': "https://www.webworm.co", 'count': 10},
 | 
					# Leave blank if not using, but that defeats the whole point
 | 
				
			||||||
# SUBSTACK['the bulletin'] = { 'url': "https://thespinoff.substack.com", 'count': 10},
 | 
					READER_URL = 'http://127.0.0.1:33843/'
 | 
				
			||||||
 | 
					 | 
				
			||||||
CATEGORY = {}
 | 
					 | 
				
			||||||
# CATEGORY['rnz national'] = { 'url': "https://www.rnz.co.nz/news/national", 'count': 10},
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
SCRAPERS = ['browser', 'declutter', 'outline', 'local']
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Reddit account info
 | 
					# Reddit account info
 | 
				
			||||||
# leave blank if not using Reddit
 | 
					# leave blank if not using Reddit
 | 
				
			||||||
@@ -31,10 +25,6 @@ REDDIT_CLIENT_ID = ''
 | 
				
			|||||||
REDDIT_CLIENT_SECRET = ''
 | 
					REDDIT_CLIENT_SECRET = ''
 | 
				
			||||||
REDDIT_USER_AGENT = ''
 | 
					REDDIT_USER_AGENT = ''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Minimum points or number of comments before including a thread:
 | 
					 | 
				
			||||||
REDDIT_COMMENT_THRESHOLD = 10
 | 
					 | 
				
			||||||
REDDIT_SCORE_THRESHOLD = 25
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
SUBREDDITS = [
 | 
					SUBREDDITS = [
 | 
				
			||||||
    'Economics',
 | 
					    'Economics',
 | 
				
			||||||
    'AcademicPhilosophy',
 | 
					    'AcademicPhilosophy',
 | 
				
			||||||
@@ -43,13 +33,9 @@ SUBREDDITS = [
 | 
				
			|||||||
    'HistoryofIdeas',
 | 
					    'HistoryofIdeas',
 | 
				
			||||||
    'LaymanJournals',
 | 
					    'LaymanJournals',
 | 
				
			||||||
    'PhilosophyofScience',
 | 
					    'PhilosophyofScience',
 | 
				
			||||||
    'PoliticsPDFs',
 | 
					 | 
				
			||||||
    'Scholar',
 | 
					 | 
				
			||||||
    'StateOfTheUnion',
 | 
					    'StateOfTheUnion',
 | 
				
			||||||
    'TheAgora',
 | 
					    'TheAgora',
 | 
				
			||||||
    'TrueFilm',
 | 
					 | 
				
			||||||
    'TrueReddit',
 | 
					    'TrueReddit',
 | 
				
			||||||
    'UniversityofReddit',
 | 
					 | 
				
			||||||
    'culturalstudies',
 | 
					    'culturalstudies',
 | 
				
			||||||
    'hardscience',
 | 
					    'hardscience',
 | 
				
			||||||
    'indepthsports',
 | 
					    'indepthsports',
 | 
				
			||||||
@@ -58,4 +44,7 @@ SUBREDDITS = [
 | 
				
			|||||||
    'neurophilosophy',
 | 
					    'neurophilosophy',
 | 
				
			||||||
    'resilientcommunities',
 | 
					    'resilientcommunities',
 | 
				
			||||||
    'worldevents',
 | 
					    'worldevents',
 | 
				
			||||||
 | 
					    'StallmanWasRight',
 | 
				
			||||||
 | 
					    'EverythingScience',
 | 
				
			||||||
 | 
					    'longevity',
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -8,6 +8,14 @@ import string
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
from bleach.sanitizer import Cleaner
 | 
					from bleach.sanitizer import Cleaner
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def alert_tanner(message):
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        logging.info('Alerting Tanner: ' + message)
 | 
				
			||||||
 | 
					        params = dict(qotnews=message)
 | 
				
			||||||
 | 
					        requests.get('https://tbot.tannercollin.com/message', params=params, timeout=4)
 | 
				
			||||||
 | 
					    except BaseException as e:
 | 
				
			||||||
 | 
					        logging.error('Problem alerting Tanner: ' + str(e))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def gen_rand_id():
 | 
					def gen_rand_id():
 | 
				
			||||||
    return ''.join(random.choice(string.ascii_uppercase) for _ in range(4))
 | 
					    return ''.join(random.choice(string.ascii_uppercase) for _ in range(4))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,29 +1,53 @@
 | 
				
			|||||||
const port = 33843;
 | 
					 | 
				
			||||||
const express = require('express');
 | 
					const express = require('express');
 | 
				
			||||||
const app = express();
 | 
					const app = express();
 | 
				
			||||||
const simple = require('./scraper/simple');
 | 
					const port = 33843;
 | 
				
			||||||
const browser = require('./scraper/browser');
 | 
					
 | 
				
			||||||
 | 
					const request = require('request');
 | 
				
			||||||
 | 
					const JSDOM = require('jsdom').JSDOM;
 | 
				
			||||||
 | 
					const { Readability } = require('readability');
 | 
				
			||||||
 | 
					
 | 
				
			||||||
app.use(express.urlencoded({ extended: true }));
 | 
					app.use(express.urlencoded({ extended: true }));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
app.get('/', (req, res) => {
 | 
					app.get('/', (req, res) => {
 | 
				
			||||||
	const routes = ['/', '/details', '/browser', '/browser/details', '/browser/comments'];
 | 
						res.send('<form method="POST" accept-charset="UTF-8"><input name="url"><button type="submit">SUBMIT</button></form>');
 | 
				
			||||||
 | 
					});
 | 
				
			||||||
	const html = routes.map(route => `
 | 
					
 | 
				
			||||||
	<form method="POST" action="${route}" accept-charset="UTF-8">
 | 
					const requestCallback = (url, res) => (error, response, body) => {
 | 
				
			||||||
		<fieldset>
 | 
						if (!error && response.statusCode == 200) {
 | 
				
			||||||
			<legend>route: POST ${route}</legend>
 | 
							console.log('Response OK.');
 | 
				
			||||||
			<input name="url">
 | 
					
 | 
				
			||||||
			<button type="submit">SUBMIT</button>
 | 
							const doc = new JSDOM(body, {url: url});
 | 
				
			||||||
		</fieldset>
 | 
							const reader = new Readability(doc.window.document);
 | 
				
			||||||
	</form>`).join('<hr />');
 | 
							const article = reader.parse();
 | 
				
			||||||
	res.send(html);
 | 
					
 | 
				
			||||||
 | 
							if (article && article.content) {
 | 
				
			||||||
 | 
								res.send(article.content);
 | 
				
			||||||
 | 
							} else {
 | 
				
			||||||
 | 
								res.sendStatus(404);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							console.log('Response error:', error ? error.toString() : response.statusCode);
 | 
				
			||||||
 | 
							res.sendStatus(response ? response.statusCode : 404);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					app.post('/', (req, res) => {
 | 
				
			||||||
 | 
						const url = req.body.url;
 | 
				
			||||||
 | 
						const requestOptions = {
 | 
				
			||||||
 | 
							url: url,
 | 
				
			||||||
 | 
							gzip: true,
 | 
				
			||||||
 | 
							//headers: {'User-Agent': 'Googlebot/2.1 (+http://www.google.com/bot.html)'},
 | 
				
			||||||
 | 
							//headers: {'User-Agent': 'Twitterbot/1.0'},
 | 
				
			||||||
 | 
							headers: {
 | 
				
			||||||
 | 
								'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0',
 | 
				
			||||||
 | 
								'X-Forwarded-For': '66.249.66.1',
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						console.log('Parse request for:', url);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						request(requestOptions, requestCallback(url, res));
 | 
				
			||||||
});
 | 
					});
 | 
				
			||||||
app.post('/', simple.scrape);
 | 
					 | 
				
			||||||
app.post('/details', simple.details);
 | 
					 | 
				
			||||||
app.post('/browser', browser.scrape);
 | 
					 | 
				
			||||||
app.post('/browser/details', browser.details);
 | 
					 | 
				
			||||||
app.post('/browser/comments', browser.comments);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
app.listen(port, () => {
 | 
					app.listen(port, () => {
 | 
				
			||||||
	console.log(`Example app listening on port ${port}!`);
 | 
						console.log(`Example app listening on port ${port}!`);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -4,12 +4,10 @@
 | 
				
			|||||||
  "main": "main.js",
 | 
					  "main": "main.js",
 | 
				
			||||||
  "license": "MIT",
 | 
					  "license": "MIT",
 | 
				
			||||||
  "dependencies": {
 | 
					  "dependencies": {
 | 
				
			||||||
    "@mozilla/readability": "^0.3.0",
 | 
					 | 
				
			||||||
    "dompurify": "^1.0.11",
 | 
					    "dompurify": "^1.0.11",
 | 
				
			||||||
    "express": "^4.17.1",
 | 
					    "express": "^4.17.1",
 | 
				
			||||||
    "jsdom": "^15.1.1",
 | 
					    "jsdom": "^15.1.1",
 | 
				
			||||||
    "node-fetch": "^2.6.1",
 | 
					    "readability": "https://github.com/mozilla/readability",
 | 
				
			||||||
    "playwright": "^1.5.2",
 | 
					 | 
				
			||||||
    "request": "^2.88.0"
 | 
					    "request": "^2.88.0"
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,45 +0,0 @@
 | 
				
			|||||||
const { firefox } = require("playwright");
 | 
					 | 
				
			||||||
const { JSDOM } = require("jsdom");
 | 
					 | 
				
			||||||
const { Readability } = require("@mozilla/readability");
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
const { getUserAgent } = require('../../utils/user-agent');
 | 
					 | 
				
			||||||
const { blockedRegexes, matchUrlDomain } = require("../../utils/sites");
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
module.exports.getDetails = async (url) => {
 | 
					 | 
				
			||||||
	const { userAgent, headers } = getUserAgent(url);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	const browser = await firefox.launch({ args: [], headless: true });
 | 
					 | 
				
			||||||
	const tab = await browser.newPage({
 | 
					 | 
				
			||||||
		extraHTTPHeaders: headers,
 | 
					 | 
				
			||||||
		userAgent,
 | 
					 | 
				
			||||||
		viewport: { width: 2000, height: 10000 },
 | 
					 | 
				
			||||||
	});
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	try {
 | 
					 | 
				
			||||||
		await tab.route(/.*/, (route) => {
 | 
					 | 
				
			||||||
			const routeUrl = route.request().url();
 | 
					 | 
				
			||||||
			const blockedDomains = Object.keys(blockedRegexes);
 | 
					 | 
				
			||||||
			const domain = matchUrlDomain(blockedDomains, routeUrl);
 | 
					 | 
				
			||||||
			if (domain && routeUrl.match(blockedRegexes[domain])) {
 | 
					 | 
				
			||||||
				return route.abort();
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
			return route.continue();
 | 
					 | 
				
			||||||
		});
 | 
					 | 
				
			||||||
		await tab.addInitScript({ path: "scraper/browser/scripts/bypass-paywalls-chrome/src/js/contentScript.js" });
 | 
					 | 
				
			||||||
		await tab.addInitScript({ path: "scraper/browser/scripts/cosmetic-filters.js" });
 | 
					 | 
				
			||||||
		await tab.addInitScript({ path: "scraper/browser/scripts/fix-relative-links.js" });
 | 
					 | 
				
			||||||
		await tab.goto(url, { timeout: 60000, waitUntil: "domcontentloaded" });
 | 
					 | 
				
			||||||
		await tab.waitForTimeout(2000);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		const body = await tab.content();
 | 
					 | 
				
			||||||
		const doc = new JSDOM(body, { url });
 | 
					 | 
				
			||||||
		const reader = new Readability(doc.window.document);
 | 
					 | 
				
			||||||
		const article = reader.parse();
 | 
					 | 
				
			||||||
		return article;
 | 
					 | 
				
			||||||
	} catch (e) {
 | 
					 | 
				
			||||||
		throw e;
 | 
					 | 
				
			||||||
	} finally {
 | 
					 | 
				
			||||||
		await tab.close();
 | 
					 | 
				
			||||||
		await browser.close();
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
@@ -1,34 +0,0 @@
 | 
				
			|||||||
const { JSDOM } = require("jsdom");
 | 
					 | 
				
			||||||
const { firefox } = require("playwright");
 | 
					 | 
				
			||||||
const { getUserAgent } = require('../../utils/user-agent');
 | 
					 | 
				
			||||||
const { disqusThread } = require('../../utils/disqus-thread');
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
const DISQUS_EMBED = 'https://disqus.com/embed/comments/';
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
module.exports.getComments = async (url) => {
 | 
					 | 
				
			||||||
	const { userAgent, headers } = getUserAgent(url);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	const browser = await firefox.launch({ args: [], headless: true });
 | 
					 | 
				
			||||||
	const tab = await browser.newPage({
 | 
					 | 
				
			||||||
		extraHTTPHeaders: headers,
 | 
					 | 
				
			||||||
		userAgent,
 | 
					 | 
				
			||||||
		viewport: { width: 2000, height: 10000 },
 | 
					 | 
				
			||||||
	});
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	try {
 | 
					 | 
				
			||||||
		await tab.goto(url, { timeout: 60000, waitUntil: "domcontentloaded" });
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		const response = await tab.waitForResponse(response => response.url().includes(DISQUS_EMBED));
 | 
					 | 
				
			||||||
		const text = await response.text();
 | 
					 | 
				
			||||||
		const dom = new JSDOM(text, response.url());
 | 
					 | 
				
			||||||
		const script = dom.window.document.querySelector('#disqus-threadData')
 | 
					 | 
				
			||||||
		const data = JSON.parse(script.innerHTML);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		return disqusThread(data);
 | 
					 | 
				
			||||||
	} catch (e) {
 | 
					 | 
				
			||||||
		throw e;
 | 
					 | 
				
			||||||
	} finally {
 | 
					 | 
				
			||||||
		await tab.close();
 | 
					 | 
				
			||||||
		await browser.close();
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
@@ -1,40 +0,0 @@
 | 
				
			|||||||
const { getDetails } = require('./_browser');
 | 
					 | 
				
			||||||
const { getComments } = require('./_comments');
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
module.exports.scrape = async (req, res) => {
 | 
					 | 
				
			||||||
	try {
 | 
					 | 
				
			||||||
		const article = await getDetails(req.body.url);
 | 
					 | 
				
			||||||
		if (!article || !article.content) {
 | 
					 | 
				
			||||||
			throw new Error('failed to get details.');
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		return res.send(article.content);
 | 
					 | 
				
			||||||
	} catch (e) {
 | 
					 | 
				
			||||||
		return res.sendStatus(500);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
module.exports.details = async (req, res) => {
 | 
					 | 
				
			||||||
	try {
 | 
					 | 
				
			||||||
		const article = await getDetails(req.body.url);
 | 
					 | 
				
			||||||
		if (!article) {
 | 
					 | 
				
			||||||
			throw new Error('failed to get details.');
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		return res.send(article);
 | 
					 | 
				
			||||||
	} catch (e) {
 | 
					 | 
				
			||||||
		console.log(e);
 | 
					 | 
				
			||||||
		return res.sendStatus(500);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
module.exports.comments = async (req, res) => {
 | 
					 | 
				
			||||||
	try {
 | 
					 | 
				
			||||||
		const comments = await getComments(req.body.url);
 | 
					 | 
				
			||||||
		if (!comments) {
 | 
					 | 
				
			||||||
			throw new Error('failed to get comments.');
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		return res.send(comments);
 | 
					 | 
				
			||||||
	} catch (e) {
 | 
					 | 
				
			||||||
		console.log(e);
 | 
					 | 
				
			||||||
		return res.sendStatus(500);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
 Submodule readerserver/scraper/browser/scripts/bypass-paywalls-chrome deleted from 44f3d1b114
									
								
							@@ -1,96 +0,0 @@
 | 
				
			|||||||
(function () {
 | 
					 | 
				
			||||||
	removeHiddenElements();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (matchDomain("stuff.co.nz")) {
 | 
					 | 
				
			||||||
		removeSelectors([
 | 
					 | 
				
			||||||
			".support-brief-container",
 | 
					 | 
				
			||||||
			'[class*="donation-in-"]',
 | 
					 | 
				
			||||||
			".sics-component__sharebar",
 | 
					 | 
				
			||||||
			".breaking-news-pointer",
 | 
					 | 
				
			||||||
			".bigbyline-container",
 | 
					 | 
				
			||||||
			[
 | 
					 | 
				
			||||||
				".sics-component__html-injector.sics-component__story__paragraph",
 | 
					 | 
				
			||||||
				"READ MORE:",
 | 
					 | 
				
			||||||
			],
 | 
					 | 
				
			||||||
		]);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	if (matchDomain("nzherald.co.nz")) {
 | 
					 | 
				
			||||||
		removeSelectors([
 | 
					 | 
				
			||||||
			"[href$='#commenting-widget']",
 | 
					 | 
				
			||||||
			".related-articles",
 | 
					 | 
				
			||||||
			".article__print-button",
 | 
					 | 
				
			||||||
			".share-bar",
 | 
					 | 
				
			||||||
			".c-suggest-links.read-more-links",
 | 
					 | 
				
			||||||
			".website-of-year",
 | 
					 | 
				
			||||||
			".meta-data",
 | 
					 | 
				
			||||||
			".article__kicker",
 | 
					 | 
				
			||||||
			".author__image",
 | 
					 | 
				
			||||||
		]);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	if (matchDomain(["rnz.co.nz", "radionz.co.nz"])) {
 | 
					 | 
				
			||||||
		removeSelectors([".c-advert-app", ".c-sub-nav"]);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	if (matchDomain(["newsroom.co.nz"])) {
 | 
					 | 
				
			||||||
		removeSelectors([".article_content__section", ".bio"]);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	if (matchDomain(["newshub.co.nz"])) {
 | 
					 | 
				
			||||||
		removeSelectors([".c-ArticleHeading-authorPicture", ".relatedarticles"]);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	if (matchDomain(["tvnz.co.nz"])) {
 | 
					 | 
				
			||||||
		removeSelectors([".signup-container container"]);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	function matchDomain(domains) {
 | 
					 | 
				
			||||||
		const hostname = window.location.hostname;
 | 
					 | 
				
			||||||
		if (typeof domains === "string") {
 | 
					 | 
				
			||||||
			domains = [domains];
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		return domains.some(
 | 
					 | 
				
			||||||
			(domain) => hostname === domain || hostname.endsWith("." + domain)
 | 
					 | 
				
			||||||
		);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	function removeDOMElement(...elements) {
 | 
					 | 
				
			||||||
		for (const element of elements) {
 | 
					 | 
				
			||||||
			if (element) {
 | 
					 | 
				
			||||||
				element.remove();
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	function pageContains(selector, text) {
 | 
					 | 
				
			||||||
		const elements = document.querySelectorAll(selector);
 | 
					 | 
				
			||||||
		return Array.prototype.filter.call(elements, function (element) {
 | 
					 | 
				
			||||||
			return RegExp(text).test(element.textContent);
 | 
					 | 
				
			||||||
		});
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	function removeHiddenElements() {
 | 
					 | 
				
			||||||
		window.setTimeout(function () {
 | 
					 | 
				
			||||||
			const selector = "*:not(script):not(head):not(meta):not(link):not(style)";
 | 
					 | 
				
			||||||
			Array.from(document.querySelectorAll(selector))
 | 
					 | 
				
			||||||
				.filter((element) => {
 | 
					 | 
				
			||||||
					const computed = getComputedStyle(element);
 | 
					 | 
				
			||||||
					const displayNone = computed["display"] === "none";
 | 
					 | 
				
			||||||
					const visibilityHidden = computed["visibility"] === "hidden";
 | 
					 | 
				
			||||||
					return displayNone || visibilityHidden;
 | 
					 | 
				
			||||||
				})
 | 
					 | 
				
			||||||
				.forEach((element) => element && element.remove());
 | 
					 | 
				
			||||||
		}, 1000);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	function removeSelectors(selectors) {
 | 
					 | 
				
			||||||
		window.setTimeout(function () {
 | 
					 | 
				
			||||||
			const elements = selectors.flatMap((s) => {
 | 
					 | 
				
			||||||
				if (typeof s === "string") {
 | 
					 | 
				
			||||||
					return Array.from(document.querySelectorAll(s));
 | 
					 | 
				
			||||||
				}
 | 
					 | 
				
			||||||
				if (s && s.constructor.name === "Array") {
 | 
					 | 
				
			||||||
					return pageContains(...s);
 | 
					 | 
				
			||||||
				}
 | 
					 | 
				
			||||||
				return undefined;
 | 
					 | 
				
			||||||
			});
 | 
					 | 
				
			||||||
			removeDOMElement(...elements);
 | 
					 | 
				
			||||||
		}, 1000);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
})();
 | 
					 | 
				
			||||||
@@ -1,14 +0,0 @@
 | 
				
			|||||||
(function () {
 | 
					 | 
				
			||||||
	const { host, protocol } = window.location;
 | 
					 | 
				
			||||||
	const url = `${protocol}//${host}`;
 | 
					 | 
				
			||||||
	[
 | 
					 | 
				
			||||||
		['[src^="/"]', 'src'],
 | 
					 | 
				
			||||||
		['[href^="/"]', 'href']
 | 
					 | 
				
			||||||
	].forEach(([selector, attribute]) => {
 | 
					 | 
				
			||||||
		Array.from(document.querySelectorAll(selector))
 | 
					 | 
				
			||||||
			.filter(e => e.attributes[attribute] && /^\/[^\/]/.test(e.attributes[attribute].value))
 | 
					 | 
				
			||||||
			.forEach((e) => {
 | 
					 | 
				
			||||||
				e.attributes[attribute].value = `${url}${e.attributes[attribute].value}`;
 | 
					 | 
				
			||||||
			});
 | 
					 | 
				
			||||||
	});
 | 
					 | 
				
			||||||
})();
 | 
					 | 
				
			||||||
@@ -1,59 +0,0 @@
 | 
				
			|||||||
const fetch = require('node-fetch');
 | 
					 | 
				
			||||||
const { JSDOM } = require('jsdom');
 | 
					 | 
				
			||||||
const { Readability } = require('@mozilla/readability');
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
const { getUserAgent } = require('../utils/user-agent');
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
const extract = (url, body) => {
 | 
					 | 
				
			||||||
	const doc = new JSDOM(body, { url: url });
 | 
					 | 
				
			||||||
	const reader = new Readability(doc.window.document);
 | 
					 | 
				
			||||||
	return reader.parse();
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
module.exports.scrape = async (req, res) => {
 | 
					 | 
				
			||||||
	try {
 | 
					 | 
				
			||||||
		const { userAgent, headers } = getUserAgent(req.body.url);
 | 
					 | 
				
			||||||
		const response = await fetch(req.body.url, {
 | 
					 | 
				
			||||||
			headers: {
 | 
					 | 
				
			||||||
				...headers,
 | 
					 | 
				
			||||||
				'User-Agent': userAgent
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
		});
 | 
					 | 
				
			||||||
		if (!response.ok) {
 | 
					 | 
				
			||||||
			return res.sendStatus(response.statusCode);
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		const html = await response.text();
 | 
					 | 
				
			||||||
		const article = await extract(req.body.url, html);
 | 
					 | 
				
			||||||
		if (article && article.content) {
 | 
					 | 
				
			||||||
			return res.send(article.content);
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		return res.sendStatus(404);
 | 
					 | 
				
			||||||
	} catch (e) {
 | 
					 | 
				
			||||||
		console.error(e);
 | 
					 | 
				
			||||||
		return res.sendStatus(500);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
module.exports.details = async (req, res) => {
 | 
					 | 
				
			||||||
	try {
 | 
					 | 
				
			||||||
		const { userAgent, headers } = getUserAgent(req.body.url);
 | 
					 | 
				
			||||||
		const response = await fetch(req.body.url, {
 | 
					 | 
				
			||||||
			headers: {
 | 
					 | 
				
			||||||
				...headers,
 | 
					 | 
				
			||||||
				'User-Agent': userAgent
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
		});
 | 
					 | 
				
			||||||
		if (!response.ok) {
 | 
					 | 
				
			||||||
			return res.sendStatus(response.statusCode);
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		const html = await response.text();
 | 
					 | 
				
			||||||
		const article = await extract(req.body.url, html);
 | 
					 | 
				
			||||||
		if (article) {
 | 
					 | 
				
			||||||
			return res.send(article);
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		return res.sendStatus(404);
 | 
					 | 
				
			||||||
	} catch (e) {
 | 
					 | 
				
			||||||
		console.error(e);
 | 
					 | 
				
			||||||
		return res.sendStatus(500);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
@@ -1,11 +0,0 @@
 | 
				
			|||||||
const googleBotUserAgent = 'Googlebot/2.1 (+http://www.google.com/bot.html)';
 | 
					 | 
				
			||||||
const googleBotIp = '66.249.66.1';
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
module.exports.googleBot = {
 | 
					 | 
				
			||||||
	userAgent: googleBotUserAgent,
 | 
					 | 
				
			||||||
	ip: googleBotIp,
 | 
					 | 
				
			||||||
	headers: {
 | 
					 | 
				
			||||||
		'User-Agent': googleBotUserAgent,
 | 
					 | 
				
			||||||
		'X-Forwarded-For': googleBotIp,
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
@@ -1,21 +0,0 @@
 | 
				
			|||||||
module.exports.disqusThread = data => {
 | 
					 | 
				
			||||||
	const comments = data.response.posts.reduce((c, post) => ({
 | 
					 | 
				
			||||||
		...c,
 | 
					 | 
				
			||||||
		[post.id.toString()]: {
 | 
					 | 
				
			||||||
			author: post.author.name,
 | 
					 | 
				
			||||||
			authorLink: post.author.profileUrl,
 | 
					 | 
				
			||||||
			date: post.createdAt,
 | 
					 | 
				
			||||||
			text: post.raw_message,
 | 
					 | 
				
			||||||
			score: post.points,
 | 
					 | 
				
			||||||
			children: [],
 | 
					 | 
				
			||||||
			id: post.id.toString(),
 | 
					 | 
				
			||||||
			parent: (post.parent || '').toString(),
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}), {});
 | 
					 | 
				
			||||||
	Object.keys(comments).filter(id => !!comments[id].parent).forEach(id => {
 | 
					 | 
				
			||||||
		const comment = comments[id];
 | 
					 | 
				
			||||||
		comments[comment.parent].children.push(comment);
 | 
					 | 
				
			||||||
	});
 | 
					 | 
				
			||||||
	const parents = Object.keys(comments).filter(id => comments[id].parent).map(id => comments[id]);
 | 
					 | 
				
			||||||
	return parents;
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
@@ -1,98 +0,0 @@
 | 
				
			|||||||
module.exports.blockedRegexes = {
 | 
					 | 
				
			||||||
	"adweek.com": /.+\.lightboxcdn\.com\/.+/,
 | 
					 | 
				
			||||||
	"afr.com": /afr\.com\/assets\/vendorsReactRedux_client.+\.js/,
 | 
					 | 
				
			||||||
	"businessinsider.com": /(.+\.tinypass\.com\/.+|cdn\.onesignal\.com\/sdks\/.+\.js)/,
 | 
					 | 
				
			||||||
	"chicagotribune.com": /.+:\/\/.+\.tribdss\.com\//,
 | 
					 | 
				
			||||||
	"economist.com": /(.+\.tinypass\.com\/.+|economist\.com\/engassets\/_next\/static\/chunks\/framework.+\.js)/,
 | 
					 | 
				
			||||||
	"editorialedomani.it": /(js\.pelcro\.com\/.+|editorialedomani.it\/pelcro\.js)/,
 | 
					 | 
				
			||||||
	"foreignpolicy.com": /.+\.tinypass\.com\/.+/,
 | 
					 | 
				
			||||||
	"fortune.com": /.+\.tinypass\.com\/.+/,
 | 
					 | 
				
			||||||
	"haaretz.co.il": /haaretz\.co\.il\/htz\/js\/inter\.js/,
 | 
					 | 
				
			||||||
	"haaretz.com": /haaretz\.com\/hdc\/web\/js\/minified\/header-scripts-int.js.+/,
 | 
					 | 
				
			||||||
	"inquirer.com": /.+\.tinypass\.com\/.+/,
 | 
					 | 
				
			||||||
	"lastampa.it": /.+\.repstatic\.it\/minify\/sites\/lastampa\/.+\/config\.cache\.php\?name=social_js/,
 | 
					 | 
				
			||||||
	"lrb.co.uk": /.+\.tinypass\.com\/.+/,
 | 
					 | 
				
			||||||
	"nzherald.co.nz": /(.+nzherald\.co\.nz\/.+\/subs\/p\.js|.+nzherald\.co\.nz\/.+\/react\.js|.+nzherald\.co\.nz\/.+\/appear\.js|.+nzherald\.co\.nz\/.+\/tracking\/.+|.+nzherald\.co\.nz\/.+\/default\.js|.+\/newsbarscript\.js)/,
 | 
					 | 
				
			||||||
	"medscape.com": /.+\.medscapestatic\.com\/.*medscape-library\.js/,
 | 
					 | 
				
			||||||
	"interest.co.nz": /(.+\.presspatron\.com.+|.+interest\.co\.nz.+pp-ablock-banner\.js)/,
 | 
					 | 
				
			||||||
	"repubblica.it": /scripts\.repubblica\.it\/pw\/pw\.js.+/,
 | 
					 | 
				
			||||||
	"spectator.co.uk": /.+\.tinypass\.com\/.+/,
 | 
					 | 
				
			||||||
	"spectator.com.au": /.+\.tinypass\.com\/.+/,
 | 
					 | 
				
			||||||
	"telegraph.co.uk": /.+telegraph\.co\.uk.+martech.+/,
 | 
					 | 
				
			||||||
	"thecourier.com.au": /.+cdn-au\.piano\.io\/api\/tinypass.+\.js/,
 | 
					 | 
				
			||||||
	"thenation.com": /thenation\.com\/.+\/paywall-script\.php/,
 | 
					 | 
				
			||||||
	"thenational.scot": /(.+\.tinypass\.com\/.+|.+thenational\.scot.+omniture\.js|.+thenational\.scot.+responsive-sync.+)/,
 | 
					 | 
				
			||||||
	"thewrap.com": /thewrap\.com\/.+\/wallkit\.js/,
 | 
					 | 
				
			||||||
	"wsj.com": /cdn\.ampproject\.org\/v\d\/amp-access-.+\.js/,
 | 
					 | 
				
			||||||
	"historyextra.com": /.+\.evolok\.net\/.+\/authorize\/.+/,
 | 
					 | 
				
			||||||
	"barrons.com": /cdn\.ampproject\.org\/v\d\/amp-access-.+\.js/,
 | 
					 | 
				
			||||||
	"irishtimes.com": /cdn\.ampproject\.org\/v\d\/amp-access-.+\.js/,
 | 
					 | 
				
			||||||
	"elmercurio.com": /(merreader\.emol\.cl\/assets\/js\/merPramV2.js|staticmer\.emol\.cl\/js\/inversiones\/PramModal.+\.js)/,
 | 
					 | 
				
			||||||
	"sloanreview.mit.edu": /(.+\.tinypass\.com\/.+|.+\.netdna-ssl\.com\/wp-content\/themes\/smr\/assets\/js\/libs\/welcome-ad\.js)/,
 | 
					 | 
				
			||||||
	"latercera.com": /.+\.cxense\.com\/+/,
 | 
					 | 
				
			||||||
	"lesechos.fr": /.+\.tinypass\.com\/.+/,
 | 
					 | 
				
			||||||
	"washingtonpost.com": /.+\.washingtonpost\.com\/.+\/pwapi-proxy\.min\.js/,
 | 
					 | 
				
			||||||
	"thehindu.com": /ajax\.cloudflare\.com\/cdn-cgi\/scripts\/.+\/cloudflare-static\/rocket-loader\.min\.js/,
 | 
					 | 
				
			||||||
	"technologyreview.com": /.+\.blueconic\.net\/.+/,
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
module.exports.useGoogleBotSites = [
 | 
					 | 
				
			||||||
	"adelaidenow.com.au",
 | 
					 | 
				
			||||||
	"barrons.com",
 | 
					 | 
				
			||||||
	"couriermail.com.au",
 | 
					 | 
				
			||||||
	"dailytelegraph.com.au",
 | 
					 | 
				
			||||||
	"fd.nl",
 | 
					 | 
				
			||||||
	"genomeweb.com",
 | 
					 | 
				
			||||||
	"haaretz.co.il",
 | 
					 | 
				
			||||||
	"haaretz.com",
 | 
					 | 
				
			||||||
	"heraldsun.com.au",
 | 
					 | 
				
			||||||
	"mexiconewsdaily.com",
 | 
					 | 
				
			||||||
	"ntnews.com.au",
 | 
					 | 
				
			||||||
	"quora.com",
 | 
					 | 
				
			||||||
	"seekingalpha.com",
 | 
					 | 
				
			||||||
	"telegraph.co.uk",
 | 
					 | 
				
			||||||
	"theaustralian.com.au",
 | 
					 | 
				
			||||||
	"themarker.com",
 | 
					 | 
				
			||||||
	"themercury.com.au",
 | 
					 | 
				
			||||||
	"thenational.scot",
 | 
					 | 
				
			||||||
	"thetimes.co.uk",
 | 
					 | 
				
			||||||
	"wsj.com",
 | 
					 | 
				
			||||||
	"kansascity.com",
 | 
					 | 
				
			||||||
	"republic.ru",
 | 
					 | 
				
			||||||
	"nzz.ch",
 | 
					 | 
				
			||||||
	"handelsblatt.com",
 | 
					 | 
				
			||||||
	"washingtonpost.com",
 | 
					 | 
				
			||||||
	"df.cl",
 | 
					 | 
				
			||||||
];
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
function matchDomain(domains, hostname) {
 | 
					 | 
				
			||||||
	let matchedDomain = false;
 | 
					 | 
				
			||||||
	if (typeof domains === "string") {
 | 
					 | 
				
			||||||
		domains = [domains];
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	domains.some(
 | 
					 | 
				
			||||||
		(domain) =>
 | 
					 | 
				
			||||||
			(hostname === domain || hostname.endsWith("." + domain)) &&
 | 
					 | 
				
			||||||
			(matchedDomain = domain)
 | 
					 | 
				
			||||||
	);
 | 
					 | 
				
			||||||
	return matchedDomain;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
function matchUrlDomain(domains, url) {
 | 
					 | 
				
			||||||
	return matchDomain(domains, urlHost(url));
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
function urlHost(url) {
 | 
					 | 
				
			||||||
	if (url && url.startsWith("http")) {
 | 
					 | 
				
			||||||
		try {
 | 
					 | 
				
			||||||
			return new URL(url).hostname;
 | 
					 | 
				
			||||||
		} catch (e) {
 | 
					 | 
				
			||||||
			console.log(`url not valid: ${url} error: ${e}`);
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return url;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
module.exports.matchDomain = matchDomain;
 | 
					 | 
				
			||||||
module.exports.matchUrlDomain = matchUrlDomain;
 | 
					 | 
				
			||||||
module.exports.urlHost = urlHost;
 | 
					 | 
				
			||||||
@@ -1,18 +0,0 @@
 | 
				
			|||||||
const { googleBot } = require('./constants');
 | 
					 | 
				
			||||||
const { matchUrlDomain, useGoogleBotSites } = require("./sites");
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
module.exports.getUserAgent = (url) => {
 | 
					 | 
				
			||||||
	const useGoogleBot = useGoogleBotSites.some(function (item) {
 | 
					 | 
				
			||||||
		return typeof item === "string" && matchUrlDomain(item, url);
 | 
					 | 
				
			||||||
	});
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (!useGoogleBot) {
 | 
					 | 
				
			||||||
		return {};
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return {
 | 
					 | 
				
			||||||
		userAgent: googleBot.userAgent,
 | 
					 | 
				
			||||||
		headers: {
 | 
					 | 
				
			||||||
			"X-Forwarded-For": googleBot.ip
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@@ -3,7 +3,7 @@
 | 
				
			|||||||
Download MeiliSearch with:
 | 
					Download MeiliSearch with:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
wget https://github.com/meilisearch/MeiliSearch/releases/download/v0.11.1/meilisearch-linux-amd64
 | 
					wget https://github.com/meilisearch/meilisearch/releases/download/v0.27.0/meilisearch-linux-amd64
 | 
				
			||||||
chmod +x meilisearch-linux-amd64
 | 
					chmod +x meilisearch-linux-amd64
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -8,6 +8,8 @@
 | 
				
			|||||||
			content="{{ description }}"
 | 
								content="{{ description }}"
 | 
				
			||||||
		/>
 | 
							/>
 | 
				
			||||||
		<meta content="{{ url }}" name="og:site_name">
 | 
							<meta content="{{ url }}" name="og:site_name">
 | 
				
			||||||
 | 
							<meta name="robots" content="{{ robots }}">
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		<link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
 | 
							<link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
 | 
				
			||||||
		<link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">
 | 
							<link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">
 | 
				
			||||||
@@ -26,7 +28,7 @@
 | 
				
			|||||||
			work correctly both with client-side routing and a non-root public URL.
 | 
								work correctly both with client-side routing and a non-root public URL.
 | 
				
			||||||
			Learn how to configure a non-root public URL by running `npm run build`.
 | 
								Learn how to configure a non-root public URL by running `npm run build`.
 | 
				
			||||||
		-->
 | 
							-->
 | 
				
			||||||
		<title>{{ title }} - QotNews</title>
 | 
							<title>{{ title }}</title>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		<style>
 | 
							<style>
 | 
				
			||||||
			html {
 | 
								html {
 | 
				
			||||||
@@ -37,13 +39,23 @@
 | 
				
			|||||||
			}
 | 
								}
 | 
				
			||||||
			.nojs {
 | 
								.nojs {
 | 
				
			||||||
				color: white;
 | 
									color: white;
 | 
				
			||||||
 | 
									max-width: 32rem;
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		</style>
 | 
							</style>
 | 
				
			||||||
	</head>
 | 
						</head>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	<body>
 | 
						<body>
 | 
				
			||||||
		<div class="nojs">
 | 
							<div class="nojs">
 | 
				
			||||||
			<noscript>You need to enable JavaScript to run this app.</noscript>
 | 
								<noscript>
 | 
				
			||||||
 | 
									You need to enable JavaScript to run this app because it's written in React.
 | 
				
			||||||
 | 
									I was planning on writing a server-side version, but I've become distracted
 | 
				
			||||||
 | 
									by other projects -- sorry!
 | 
				
			||||||
 | 
									<br/>
 | 
				
			||||||
 | 
									I originally wrote this for myself, and of course I whitelist JavaScript on
 | 
				
			||||||
 | 
									all my own domains.
 | 
				
			||||||
 | 
									<br/><br/>
 | 
				
			||||||
 | 
									Alternatively, try activex.news.t0.vc for an ActiveX™ version.
 | 
				
			||||||
 | 
								</noscript>
 | 
				
			||||||
		</div>
 | 
							</div>
 | 
				
			||||||
		<div id="root"></div>
 | 
							<div id="root"></div>
 | 
				
			||||||
		<!--
 | 
							<!--
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -3,16 +3,17 @@ import { BrowserRouter as Router, Route, Link, Switch } from 'react-router-dom';
 | 
				
			|||||||
import localForage from 'localforage';
 | 
					import localForage from 'localforage';
 | 
				
			||||||
import './Style-light.css';
 | 
					import './Style-light.css';
 | 
				
			||||||
import './Style-dark.css';
 | 
					import './Style-dark.css';
 | 
				
			||||||
 | 
					import './Style-black.css';
 | 
				
			||||||
 | 
					import './Style-red.css';
 | 
				
			||||||
import './fonts/Fonts.css';
 | 
					import './fonts/Fonts.css';
 | 
				
			||||||
import { ForwardDot } from './utils.js';
 | 
					import { BackwardDot, ForwardDot } from './utils.js';
 | 
				
			||||||
 | 
					import Feed from './Feed.js';
 | 
				
			||||||
 | 
					import Article from './Article.js';
 | 
				
			||||||
 | 
					import Comments from './Comments.js';
 | 
				
			||||||
import Search from './Search.js';
 | 
					import Search from './Search.js';
 | 
				
			||||||
import Submit from './Submit.js';
 | 
					import Submit from './Submit.js';
 | 
				
			||||||
 | 
					import Results from './Results.js';
 | 
				
			||||||
import ScrollToTop from './ScrollToTop.js';
 | 
					import ScrollToTop from './ScrollToTop.js';
 | 
				
			||||||
import Feed from './pages/Feed.js';
 | 
					 | 
				
			||||||
import Article from './pages/Article.js';
 | 
					 | 
				
			||||||
import Comments from './pages/Comments.js';
 | 
					 | 
				
			||||||
import Results from './pages/Results.js';
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
class App extends React.Component {
 | 
					class App extends React.Component {
 | 
				
			||||||
	constructor(props) {
 | 
						constructor(props) {
 | 
				
			||||||
@@ -39,6 +40,16 @@ class App extends React.Component {
 | 
				
			|||||||
		localStorage.setItem('theme', 'dark');
 | 
							localStorage.setItem('theme', 'dark');
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						black() {
 | 
				
			||||||
 | 
							this.setState({ theme: 'black' });
 | 
				
			||||||
 | 
							localStorage.setItem('theme', 'black');
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						red() {
 | 
				
			||||||
 | 
							this.setState({ theme: 'red' });
 | 
				
			||||||
 | 
							localStorage.setItem('theme', 'red');
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	componentDidMount() {
 | 
						componentDidMount() {
 | 
				
			||||||
		if (!this.cache.length) {
 | 
							if (!this.cache.length) {
 | 
				
			||||||
			localForage.iterate((value, key) => {
 | 
								localForage.iterate((value, key) => {
 | 
				
			||||||
@@ -48,22 +59,61 @@ class App extends React.Component {
 | 
				
			|||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						goFullScreen() {
 | 
				
			||||||
 | 
							if ('wakeLock' in navigator) {
 | 
				
			||||||
 | 
								navigator.wakeLock.request('screen');
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							document.body.requestFullscreen({ navigationUI: 'hide' }).then(() => {
 | 
				
			||||||
 | 
								window.addEventListener('resize', () => this.forceUpdate());
 | 
				
			||||||
 | 
								this.forceUpdate();
 | 
				
			||||||
 | 
							});
 | 
				
			||||||
 | 
						};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						exitFullScreen() {
 | 
				
			||||||
 | 
							document.exitFullscreen().then(() => {
 | 
				
			||||||
 | 
								this.forceUpdate();
 | 
				
			||||||
 | 
							});
 | 
				
			||||||
 | 
						};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	render() {
 | 
						render() {
 | 
				
			||||||
		const theme = this.state.theme;
 | 
							const theme = this.state.theme;
 | 
				
			||||||
		document.body.style.backgroundColor = theme === 'dark' ? '#000' : '#eeeeee';
 | 
					
 | 
				
			||||||
 | 
							if (theme === 'dark') {
 | 
				
			||||||
 | 
								document.body.style.backgroundColor = '#1a1a1a';
 | 
				
			||||||
 | 
							} else if (theme === 'black') {
 | 
				
			||||||
 | 
								document.body.style.backgroundColor = '#000';
 | 
				
			||||||
 | 
							} else if (theme === 'red') {
 | 
				
			||||||
 | 
								document.body.style.backgroundColor = '#000';
 | 
				
			||||||
 | 
							} else {
 | 
				
			||||||
 | 
								document.body.style.backgroundColor = '#eeeeee';
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							const fullScreenAvailable = document.fullscreenEnabled ||
 | 
				
			||||||
 | 
								document.mozFullscreenEnabled ||
 | 
				
			||||||
 | 
								document.webkitFullscreenEnabled ||
 | 
				
			||||||
 | 
								document.msFullscreenEnabled;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		return (
 | 
							return (
 | 
				
			||||||
			<div className={theme}>
 | 
								<div className={theme}>
 | 
				
			||||||
				<Router>
 | 
									<Router>
 | 
				
			||||||
					<div className='container menu'>
 | 
										<div className='container menu'>
 | 
				
			||||||
						<p>
 | 
											<p>
 | 
				
			||||||
							<Link to='/'>QotNews - Feed</Link>
 | 
												<Link to='/'>QotNews</Link>
 | 
				
			||||||
							<span className='theme'>Theme: <a href='#' onClick={() => this.light()}>Light</a> - <a href='#' onClick={() => this.dark()}>Dark</a></span>
 | 
					
 | 
				
			||||||
 | 
												<span className='theme'><a href='#' onClick={() => this.light()}>Light</a> - <a href='#' onClick={() => this.dark()}>Dark</a> - <a href='#' onClick={() => this.black()}>Black</a> - <a href='#' onClick={() => this.red()}>Red</a></span>
 | 
				
			||||||
							<br />
 | 
												<br />
 | 
				
			||||||
							<span className='slogan'>Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode.</span>
 | 
												<span className='slogan'>Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode.</span>
 | 
				
			||||||
						</p>
 | 
											</p>
 | 
				
			||||||
						<Route path='/(|search)' component={Search} />
 | 
											<Route path='/(|search)' component={Search} />
 | 
				
			||||||
						<Route path='/(|search)' component={Submit} />
 | 
											<Route path='/(|search)' component={Submit} />
 | 
				
			||||||
 | 
											{fullScreenAvailable &&
 | 
				
			||||||
 | 
												<Route path='/(|search)' render={() => !document.fullscreenElement ?
 | 
				
			||||||
 | 
													<button className='fullscreen' onClick={() => this.goFullScreen()}>Enter Fullscreen</button>
 | 
				
			||||||
 | 
												:
 | 
				
			||||||
 | 
													<button className='fullscreen' onClick={() => this.exitFullScreen()}>Exit Fullscreen</button>
 | 
				
			||||||
 | 
												} />
 | 
				
			||||||
 | 
											}
 | 
				
			||||||
					</div>
 | 
										</div>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
					<Route path='/' exact render={(props) => <Feed {...props} updateCache={this.updateCache} />} />
 | 
										<Route path='/' exact render={(props) => <Feed {...props} updateCache={this.updateCache} />} />
 | 
				
			||||||
@@ -73,6 +123,7 @@ class App extends React.Component {
 | 
				
			|||||||
					</Switch>
 | 
										</Switch>
 | 
				
			||||||
					<Route path='/:id/c' exact render={(props) => <Comments {...props} cache={this.cache} />} />
 | 
										<Route path='/:id/c' exact render={(props) => <Comments {...props} cache={this.cache} />} />
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
										<BackwardDot />
 | 
				
			||||||
					<ForwardDot />
 | 
										<ForwardDot />
 | 
				
			||||||
 | 
					
 | 
				
			||||||
					<ScrollToTop />
 | 
										<ScrollToTop />
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,7 +1,7 @@
 | 
				
			|||||||
import React from 'react';
 | 
					import React from 'react';
 | 
				
			||||||
import { Helmet } from 'react-helmet';
 | 
					import { Helmet } from 'react-helmet';
 | 
				
			||||||
import localForage from 'localforage';
 | 
					import localForage from 'localforage';
 | 
				
			||||||
import { sourceLink, infoLine, ToggleDot } from '../utils.js';
 | 
					import { sourceLink, infoLine, ToggleDot } from './utils.js';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Article extends React.Component {
 | 
					class Article extends React.Component {
 | 
				
			||||||
	constructor(props) {
 | 
						constructor(props) {
 | 
				
			||||||
@@ -45,7 +45,7 @@ class Article extends React.Component {
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pConvert = (n) => {
 | 
						pConvert = (n) => {
 | 
				
			||||||
		this.setState({ pConv: [...this.state.pConv, n] });
 | 
							this.setState({ pConv: [...this.state.pConv, n]});
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	render() {
 | 
						render() {
 | 
				
			||||||
@@ -67,7 +67,8 @@ class Article extends React.Component {
 | 
				
			|||||||
				{story ?
 | 
									{story ?
 | 
				
			||||||
					<div className='article'>
 | 
										<div className='article'>
 | 
				
			||||||
						<Helmet>
 | 
											<Helmet>
 | 
				
			||||||
							<title>{story.title} - QotNews</title>
 | 
												<title>{story.title} | QotNews</title>
 | 
				
			||||||
 | 
												<meta name="robots" content="noindex" />
 | 
				
			||||||
						</Helmet>
 | 
											</Helmet>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
						<h1>{story.title}</h1>
 | 
											<h1>{story.title}</h1>
 | 
				
			||||||
@@ -4,7 +4,7 @@ import { HashLink } from 'react-router-hash-link';
 | 
				
			|||||||
import { Helmet } from 'react-helmet';
 | 
					import { Helmet } from 'react-helmet';
 | 
				
			||||||
import moment from 'moment';
 | 
					import moment from 'moment';
 | 
				
			||||||
import localForage from 'localforage';
 | 
					import localForage from 'localforage';
 | 
				
			||||||
import { infoLine, ToggleDot } from '../utils.js';
 | 
					import { infoLine, ToggleDot } from './utils.js';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Article extends React.Component {
 | 
					class Article extends React.Component {
 | 
				
			||||||
	constructor(props) {
 | 
						constructor(props) {
 | 
				
			||||||
@@ -72,7 +72,7 @@ class Article extends React.Component {
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	displayComment(story, c, level) {
 | 
						displayComment(story, c, level) {
 | 
				
			||||||
		const cid = c.author + c.date;
 | 
							const cid = c.author+c.date;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		const collapsed = this.state.collapsed.includes(cid);
 | 
							const collapsed = this.state.collapsed.includes(cid);
 | 
				
			||||||
		const expanded = this.state.expanded.includes(cid);
 | 
							const expanded = this.state.expanded.includes(cid);
 | 
				
			||||||
@@ -85,21 +85,18 @@ class Article extends React.Component {
 | 
				
			|||||||
				<div className='info'>
 | 
									<div className='info'>
 | 
				
			||||||
					<p>
 | 
										<p>
 | 
				
			||||||
						{c.author === story.author ? '[OP]' : ''} {c.author || '[Deleted]'}
 | 
											{c.author === story.author ? '[OP]' : ''} {c.author || '[Deleted]'}
 | 
				
			||||||
						{' '} | <HashLink to={'#' + cid} id={cid}>{moment.unix(c.date).fromNow()}</HashLink>
 | 
											{' '} | <HashLink to={'#'+cid} id={cid}>{moment.unix(c.date).fromNow()}</HashLink>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
						{hasChildren && (
 | 
											{hidden || hasChildren &&
 | 
				
			||||||
							hidden ?
 | 
					 | 
				
			||||||
								<span className='collapser expander pointer' onClick={() => this.expandComment(cid)}>+</span>
 | 
					 | 
				
			||||||
								:
 | 
					 | 
				
			||||||
							<span className='collapser pointer' onClick={() => this.collapseComment(cid)}>–</span>
 | 
												<span className='collapser pointer' onClick={() => this.collapseComment(cid)}>–</span>
 | 
				
			||||||
						)}
 | 
											}
 | 
				
			||||||
					</p>
 | 
										</p>
 | 
				
			||||||
				</div>
 | 
									</div>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
				<div className={collapsed ? 'text hidden' : 'text'}  dangerouslySetInnerHTML={{ __html: c.text }} />
 | 
									<div className={collapsed ? 'text hidden' : 'text'}  dangerouslySetInnerHTML={{ __html: c.text }} />
 | 
				
			||||||
 | 
					
 | 
				
			||||||
				{hidden && hasChildren ?
 | 
									{hidden && hasChildren ?
 | 
				
			||||||
					<div className='comment lined info pointer' onClick={() => this.expandComment(cid)}>[show {this.countComments(c) - 1} more]</div>
 | 
										<div className='comment lined info pointer' onClick={() => this.expandComment(cid)}>[show {this.countComments(c)-1} more]</div>
 | 
				
			||||||
				:
 | 
									:
 | 
				
			||||||
					c.comments.map(i => this.displayComment(story, i, level + 1))
 | 
										c.comments.map(i => this.displayComment(story, i, level + 1))
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
@@ -118,7 +115,8 @@ class Article extends React.Component {
 | 
				
			|||||||
				{story ?
 | 
									{story ?
 | 
				
			||||||
					<div className='article'>
 | 
										<div className='article'>
 | 
				
			||||||
						<Helmet>
 | 
											<Helmet>
 | 
				
			||||||
							<title>{story.title} - QotNews Comments</title>
 | 
												<title>{story.title} | QotNews</title>
 | 
				
			||||||
 | 
												<meta name="robots" content="noindex" />
 | 
				
			||||||
						</Helmet>
 | 
											</Helmet>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
						<h1>{story.title}</h1>
 | 
											<h1>{story.title}</h1>
 | 
				
			||||||
@@ -1,7 +1,8 @@
 | 
				
			|||||||
import React from 'react';
 | 
					import React from 'react';
 | 
				
			||||||
 | 
					import { Link } from 'react-router-dom';
 | 
				
			||||||
import { Helmet } from 'react-helmet';
 | 
					import { Helmet } from 'react-helmet';
 | 
				
			||||||
import localForage from 'localforage';
 | 
					import localForage from 'localforage';
 | 
				
			||||||
import { StoryItem } from '../components/StoryItem.js';
 | 
					import { sourceLink, infoLine, logos } from './utils.js';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Feed extends React.Component {
 | 
					class Feed extends React.Component {
 | 
				
			||||||
	constructor(props) {
 | 
						constructor(props) {
 | 
				
			||||||
@@ -21,20 +22,19 @@ class Feed extends React.Component {
 | 
				
			|||||||
					const updated = !this.state.stories || this.state.stories[0].id !== result.stories[0].id;
 | 
										const updated = !this.state.stories || this.state.stories[0].id !== result.stories[0].id;
 | 
				
			||||||
					console.log('updated:', updated);
 | 
										console.log('updated:', updated);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
					const { stories } = result;
 | 
										this.setState({ stories: result.stories });
 | 
				
			||||||
					this.setState({ stories });
 | 
										localStorage.setItem('stories', JSON.stringify(result.stories));
 | 
				
			||||||
					localStorage.setItem('stories', JSON.stringify(stories));
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
					if (updated) {
 | 
										if (updated) {
 | 
				
			||||||
						localForage.clear();
 | 
											localForage.clear();
 | 
				
			||||||
						stories.forEach((x, i) => {
 | 
											result.stories.forEach((x, i) => {
 | 
				
			||||||
							fetch('/api/' + x.id)
 | 
												fetch('/api/' + x.id)
 | 
				
			||||||
								.then(res => res.json())
 | 
													.then(res => res.json())
 | 
				
			||||||
								.then(({ story }) => {
 | 
													.then(result => {
 | 
				
			||||||
									localForage.setItem(x.id, story)
 | 
														localForage.setItem(x.id, result.story)
 | 
				
			||||||
										.then(console.log('preloaded', x.id, x.title));
 | 
															.then(console.log('preloaded', x.id, x.title));
 | 
				
			||||||
									this.props.updateCache(x.id, story);
 | 
														this.props.updateCache(x.id, result.story);
 | 
				
			||||||
								}, error => { }
 | 
													}, error => {}
 | 
				
			||||||
							);
 | 
												);
 | 
				
			||||||
						});
 | 
											});
 | 
				
			||||||
					}
 | 
										}
 | 
				
			||||||
@@ -52,10 +52,31 @@ class Feed extends React.Component {
 | 
				
			|||||||
		return (
 | 
							return (
 | 
				
			||||||
			<div className='container'>
 | 
								<div className='container'>
 | 
				
			||||||
				<Helmet>
 | 
									<Helmet>
 | 
				
			||||||
					<title>Feed - QotNews</title>
 | 
										<title>QotNews</title>
 | 
				
			||||||
 | 
										<meta name="robots" content="index" />
 | 
				
			||||||
				</Helmet>
 | 
									</Helmet>
 | 
				
			||||||
				{error && <p>Connection error?</p>}
 | 
									{error && <p>Connection error?</p>}
 | 
				
			||||||
				{stories ? stories.map(story => <StoryItem story={story}></StoryItem>) : <p>loading...</p>}
 | 
									{stories ?
 | 
				
			||||||
 | 
										<div>
 | 
				
			||||||
 | 
											{stories.map(x =>
 | 
				
			||||||
 | 
												<div className='item' key={x.id}>
 | 
				
			||||||
 | 
													<div className='title'>
 | 
				
			||||||
 | 
														<Link className='link' to={'/' + x.id}>
 | 
				
			||||||
 | 
															<img className='source-logo' src={logos[x.source]} alt='source logo' /> {x.title}
 | 
				
			||||||
 | 
														</Link>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
														<span className='source'>
 | 
				
			||||||
 | 
															({sourceLink(x)})
 | 
				
			||||||
 | 
														</span>
 | 
				
			||||||
 | 
													</div>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
													{infoLine(x)}
 | 
				
			||||||
 | 
												</div>
 | 
				
			||||||
 | 
											)}
 | 
				
			||||||
 | 
										</div>
 | 
				
			||||||
 | 
									:
 | 
				
			||||||
 | 
										<p>loading...</p>
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
			</div>
 | 
								</div>
 | 
				
			||||||
		);
 | 
							);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
@@ -1,7 +1,8 @@
 | 
				
			|||||||
import React from 'react';
 | 
					import React from 'react';
 | 
				
			||||||
 | 
					import { Link } from 'react-router-dom';
 | 
				
			||||||
import { Helmet } from 'react-helmet';
 | 
					import { Helmet } from 'react-helmet';
 | 
				
			||||||
 | 
					import { sourceLink, infoLine, logos } from './utils.js';
 | 
				
			||||||
import AbortController from 'abort-controller';
 | 
					import AbortController from 'abort-controller';
 | 
				
			||||||
import { StoryItem } from '../components/StoryItem.js';
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Results extends React.Component {
 | 
					class Results extends React.Component {
 | 
				
			||||||
	constructor(props) {
 | 
						constructor(props) {
 | 
				
			||||||
@@ -28,7 +29,7 @@ class Results extends React.Component {
 | 
				
			|||||||
			.then(res => res.json())
 | 
								.then(res => res.json())
 | 
				
			||||||
			.then(
 | 
								.then(
 | 
				
			||||||
				(result) => {
 | 
									(result) => {
 | 
				
			||||||
					this.setState({ stories: result.results });
 | 
										this.setState({ stories: result.hits });
 | 
				
			||||||
				},
 | 
									},
 | 
				
			||||||
				(error) => {
 | 
									(error) => {
 | 
				
			||||||
					if (error.message !== 'The operation was aborted. ') {
 | 
										if (error.message !== 'The operation was aborted. ') {
 | 
				
			||||||
@@ -55,14 +56,32 @@ class Results extends React.Component {
 | 
				
			|||||||
		return (
 | 
							return (
 | 
				
			||||||
			<div className='container'>
 | 
								<div className='container'>
 | 
				
			||||||
				<Helmet>
 | 
									<Helmet>
 | 
				
			||||||
					<title>Feed - QotNews</title>
 | 
										<title>Search Results | QotNews</title>
 | 
				
			||||||
				</Helmet>
 | 
									</Helmet>
 | 
				
			||||||
				{error && <p>Connection error?</p>}
 | 
									{error && <p>Connection error?</p>}
 | 
				
			||||||
				{stories ?
 | 
									{stories ?
 | 
				
			||||||
					<>
 | 
										<>
 | 
				
			||||||
						<p>Search results:</p>
 | 
											<p>Search results:</p>
 | 
				
			||||||
						<div className='comment lined'>
 | 
											<div className='comment lined'>
 | 
				
			||||||
							{stories ? stories.map(story => <StoryItem story={story}></StoryItem>) : <p>loading...</p>}
 | 
												{stories.length ?
 | 
				
			||||||
 | 
													stories.map(x =>
 | 
				
			||||||
 | 
														<div className='item' key={x.id}>
 | 
				
			||||||
 | 
															<div className='title'>
 | 
				
			||||||
 | 
																<Link className='link' to={'/' + x.id}>
 | 
				
			||||||
 | 
																	<img className='source-logo' src={logos[x.source]} alt='source logo' /> {x.title}
 | 
				
			||||||
 | 
																</Link>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
																<span className='source'>
 | 
				
			||||||
 | 
																	({sourceLink(x)})
 | 
				
			||||||
 | 
																</span>
 | 
				
			||||||
 | 
															</div>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
															{infoLine(x)}
 | 
				
			||||||
 | 
														</div>
 | 
				
			||||||
 | 
													)
 | 
				
			||||||
 | 
												:
 | 
				
			||||||
 | 
													<p>none</p>
 | 
				
			||||||
 | 
												}
 | 
				
			||||||
						</div>
 | 
											</div>
 | 
				
			||||||
					</>
 | 
										</>
 | 
				
			||||||
				:
 | 
									:
 | 
				
			||||||
@@ -15,6 +15,7 @@ class ScrollToTop extends React.Component {
 | 
				
			|||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		window.scrollTo(0, 0);
 | 
							window.scrollTo(0, 0);
 | 
				
			||||||
 | 
							document.body.scrollTop = 0;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	render() {
 | 
						render() {
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -37,7 +37,7 @@ class Search extends Component {
 | 
				
			|||||||
			<span className='search'>
 | 
								<span className='search'>
 | 
				
			||||||
				<form onSubmit={this.searchAgain}>
 | 
									<form onSubmit={this.searchAgain}>
 | 
				
			||||||
					<input
 | 
										<input
 | 
				
			||||||
						placeholder='Search... (fixed)'
 | 
											placeholder='Search...'
 | 
				
			||||||
						value={search}
 | 
											value={search}
 | 
				
			||||||
						onChange={this.searchArticles}
 | 
											onChange={this.searchArticles}
 | 
				
			||||||
						ref={this.inputRef}
 | 
											ref={this.inputRef}
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										68
									
								
								webclient/src/Style-black.css
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										68
									
								
								webclient/src/Style-black.css
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,68 @@
 | 
				
			|||||||
 | 
					.black {
 | 
				
			||||||
 | 
						color: #ddd;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.black a {
 | 
				
			||||||
 | 
						color: #ddd;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.black input {
 | 
				
			||||||
 | 
						color: #ddd;
 | 
				
			||||||
 | 
						border: 1px solid #828282;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.black button {
 | 
				
			||||||
 | 
						background-color: #444444;
 | 
				
			||||||
 | 
						border-color: #bbb;
 | 
				
			||||||
 | 
						color: #ddd;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.black .item {
 | 
				
			||||||
 | 
						color: #828282;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.black .item .source-logo {
 | 
				
			||||||
 | 
						filter: grayscale(1);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.black .item a {
 | 
				
			||||||
 | 
						color: #828282;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.black .item a.link {
 | 
				
			||||||
 | 
						color: #ddd;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					.black .item a.link:visited {
 | 
				
			||||||
 | 
						color: #828282;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.black .item .info a.hot {
 | 
				
			||||||
 | 
						color: #cccccc;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.black .article a {
 | 
				
			||||||
 | 
						border-bottom: 1px solid #aaaaaa;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.black .article u {
 | 
				
			||||||
 | 
						border-bottom: 1px solid #aaaaaa;
 | 
				
			||||||
 | 
						text-decoration: none;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.black .story-text video,
 | 
				
			||||||
 | 
					.black .story-text img {
 | 
				
			||||||
 | 
						filter: brightness(50%);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.black .article .info {
 | 
				
			||||||
 | 
						color: #828282;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.black .article .info a {
 | 
				
			||||||
 | 
						border-bottom: none;
 | 
				
			||||||
 | 
						color: #828282;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.black .comment.lined {
 | 
				
			||||||
 | 
						border-left: 1px solid #444444;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -11,12 +11,14 @@
 | 
				
			|||||||
	border: 1px solid #828282;
 | 
						border: 1px solid #828282;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
.dark .item {
 | 
					.dark button {
 | 
				
			||||||
	color: #828282;
 | 
						background-color: #444444;
 | 
				
			||||||
 | 
						border-color: #bbb;
 | 
				
			||||||
 | 
						color: #ddd;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
.dark .item .source-logo {
 | 
					.dark .item {
 | 
				
			||||||
	filter: grayscale(1);
 | 
						color: #828282;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
.dark .item a {
 | 
					.dark .item a {
 | 
				
			||||||
@@ -43,6 +45,7 @@
 | 
				
			|||||||
	text-decoration: none;
 | 
						text-decoration: none;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.dark .story-text video,
 | 
				
			||||||
.dark .story-text img {
 | 
					.dark .story-text img {
 | 
				
			||||||
	filter: brightness(50%);
 | 
						filter: brightness(50%);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -2,9 +2,30 @@ body {
 | 
				
			|||||||
	text-rendering: optimizeLegibility;
 | 
						text-rendering: optimizeLegibility;
 | 
				
			||||||
	font: 1rem/1.3 sans-serif;
 | 
						font: 1rem/1.3 sans-serif;
 | 
				
			||||||
	color: #000000;
 | 
						color: #000000;
 | 
				
			||||||
	margin-bottom: 100vh;
 | 
					 | 
				
			||||||
	word-break: break-word;
 | 
						word-break: break-word;
 | 
				
			||||||
	font-kerning: normal;
 | 
						font-kerning: normal;
 | 
				
			||||||
 | 
						margin: 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					::backdrop {
 | 
				
			||||||
 | 
						background-color: rgba(0,0,0,0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					body:fullscreen {
 | 
				
			||||||
 | 
						overflow-y: scroll !important;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					body:-ms-fullscreen {
 | 
				
			||||||
 | 
						overflow-y: scroll !important;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					body:-webkit-full-screen {
 | 
				
			||||||
 | 
						overflow-y: scroll !important;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					body:-moz-full-screen {
 | 
				
			||||||
 | 
						overflow-y: scroll !important;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#root {
 | 
				
			||||||
 | 
						margin: 8px 8px 100vh 8px !important;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
a {
 | 
					a {
 | 
				
			||||||
@@ -22,6 +43,12 @@ input {
 | 
				
			|||||||
	border-radius: 4px;
 | 
						border-radius: 4px;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.fullscreen {
 | 
				
			||||||
 | 
						margin: 0.25rem;
 | 
				
			||||||
 | 
						padding: 0.25rem;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pre {
 | 
					pre {
 | 
				
			||||||
	overflow: auto;
 | 
						overflow: auto;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -185,16 +212,20 @@ span.source {
 | 
				
			|||||||
	cursor: pointer;
 | 
						cursor: pointer;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
.toggleDot {
 | 
					.dot {
 | 
				
			||||||
 | 
						cursor: pointer;
 | 
				
			||||||
	position: fixed;
 | 
						position: fixed;
 | 
				
			||||||
	bottom: 1rem;
 | 
					 | 
				
			||||||
	left: 1rem;
 | 
					 | 
				
			||||||
	height: 3rem;
 | 
						height: 3rem;
 | 
				
			||||||
	width: 3rem;
 | 
						width: 3rem;
 | 
				
			||||||
	background-color: #828282;
 | 
						background-color: #828282;
 | 
				
			||||||
	border-radius: 50%;
 | 
						border-radius: 50%;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.toggleDot {
 | 
				
			||||||
 | 
						bottom: 1rem;
 | 
				
			||||||
 | 
						left: 1rem;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
.toggleDot .button {
 | 
					.toggleDot .button {
 | 
				
			||||||
	font: 2rem/1 'icomoon';
 | 
						font: 2rem/1 'icomoon';
 | 
				
			||||||
	position: relative;
 | 
						position: relative;
 | 
				
			||||||
@@ -203,21 +234,27 @@ span.source {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
.forwardDot {
 | 
					.forwardDot {
 | 
				
			||||||
	cursor: pointer;
 | 
					 | 
				
			||||||
	position: fixed;
 | 
					 | 
				
			||||||
	bottom: 1rem;
 | 
						bottom: 1rem;
 | 
				
			||||||
	right: 1rem;
 | 
						right: 1rem;
 | 
				
			||||||
	height: 3rem;
 | 
					 | 
				
			||||||
	width: 3rem;
 | 
					 | 
				
			||||||
	background-color: #828282;
 | 
					 | 
				
			||||||
	border-radius: 50%;
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
.forwardDot .button {
 | 
					.forwardDot .button {
 | 
				
			||||||
	font: 2.5rem/1 'icomoon';
 | 
						font: 2rem/1 'icomoon';
 | 
				
			||||||
	position: relative;
 | 
						position: relative;
 | 
				
			||||||
	top: 0.25rem;
 | 
						top: 0.5rem;
 | 
				
			||||||
	left: 0.3rem;
 | 
						left: 0.5rem;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.backwardDot {
 | 
				
			||||||
 | 
						bottom: 1rem;
 | 
				
			||||||
 | 
						right: 5rem;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.backwardDot .button {
 | 
				
			||||||
 | 
						font: 2rem/1 'icomoon';
 | 
				
			||||||
 | 
						position: relative;
 | 
				
			||||||
 | 
						top: 0.5rem;
 | 
				
			||||||
 | 
						left: 0.5rem;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
.search form {
 | 
					.search form {
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										82
									
								
								webclient/src/Style-red.css
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										82
									
								
								webclient/src/Style-red.css
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,82 @@
 | 
				
			|||||||
 | 
					.red {
 | 
				
			||||||
 | 
						color: #b00;
 | 
				
			||||||
 | 
						scrollbar-color: #b00 #440000;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.red a {
 | 
				
			||||||
 | 
						color: #b00;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.red input {
 | 
				
			||||||
 | 
						color: #b00;
 | 
				
			||||||
 | 
						border: 1px solid #690000;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.red input::placeholder {
 | 
				
			||||||
 | 
						color: #690000;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.red hr {
 | 
				
			||||||
 | 
						background-color: #690000;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.red button {
 | 
				
			||||||
 | 
						background-color: #440000;
 | 
				
			||||||
 | 
						border-color: #b00;
 | 
				
			||||||
 | 
						color: #b00;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.red .item,
 | 
				
			||||||
 | 
					.red .slogan {
 | 
				
			||||||
 | 
						color: #690000;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.red .item .source-logo {
 | 
				
			||||||
 | 
						display: none;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.red .item a {
 | 
				
			||||||
 | 
						color: #690000;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.red .item a.link {
 | 
				
			||||||
 | 
						color: #b00;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					.red .item a.link:visited {
 | 
				
			||||||
 | 
						color: #690000;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.red .item .info a.hot {
 | 
				
			||||||
 | 
						color: #cc0000;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.red .article a {
 | 
				
			||||||
 | 
						border-bottom: 1px solid #aa0000;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.red .article u {
 | 
				
			||||||
 | 
						border-bottom: 1px solid #aa0000;
 | 
				
			||||||
 | 
						text-decoration: none;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.red .story-text video,
 | 
				
			||||||
 | 
					.red .story-text img {
 | 
				
			||||||
 | 
						filter: grayscale(100%) brightness(20%) sepia(100%) hue-rotate(-50deg) saturate(600%) contrast(0.8);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.red .article .info {
 | 
				
			||||||
 | 
						color: #690000;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.red .article .info a {
 | 
				
			||||||
 | 
						border-bottom: none;
 | 
				
			||||||
 | 
						color: #690000;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.red .comment.lined {
 | 
				
			||||||
 | 
						border-left: 1px solid #440000;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.red .dot {
 | 
				
			||||||
 | 
						background-color: #440000;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -41,7 +41,7 @@ class Submit extends Component {
 | 
				
			|||||||
			<span className='search'>
 | 
								<span className='search'>
 | 
				
			||||||
				<form onSubmit={this.submitArticle}>
 | 
									<form onSubmit={this.submitArticle}>
 | 
				
			||||||
					<input
 | 
										<input
 | 
				
			||||||
						placeholder='Submit Article'
 | 
											placeholder='Submit URL'
 | 
				
			||||||
						ref={this.inputRef}
 | 
											ref={this.inputRef}
 | 
				
			||||||
					/>
 | 
										/>
 | 
				
			||||||
				</form>
 | 
									</form>
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,34 +0,0 @@
 | 
				
			|||||||
import React from "react";
 | 
					 | 
				
			||||||
import { Link } from "react-router-dom";
 | 
					 | 
				
			||||||
import { sourceLink, infoLine, getLogoUrl } from "../utils.js";
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
export class StoryItem extends React.Component {
 | 
					 | 
				
			||||||
	constructor(props) {
 | 
					 | 
				
			||||||
		super(props);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	render() {
 | 
					 | 
				
			||||||
		const story = this.props.story;
 | 
					 | 
				
			||||||
		const { id, title } = story;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		return (
 | 
					 | 
				
			||||||
			<div className="item" key={id}>
 | 
					 | 
				
			||||||
				<div className="title">
 | 
					 | 
				
			||||||
					<Link className="link" to={"/" + id}>
 | 
					 | 
				
			||||||
						<img
 | 
					 | 
				
			||||||
							className="source-logo"
 | 
					 | 
				
			||||||
							src={getLogoUrl(story)}
 | 
					 | 
				
			||||||
							alt="source logo"
 | 
					 | 
				
			||||||
						/>
 | 
					 | 
				
			||||||
						{" "}
 | 
					 | 
				
			||||||
						{title}
 | 
					 | 
				
			||||||
					</Link>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
					<span className="source">({sourceLink(story)})</span>
 | 
					 | 
				
			||||||
				</div>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
				{infoLine(story)}
 | 
					 | 
				
			||||||
			</div>
 | 
					 | 
				
			||||||
		);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
										
											Binary file not shown.
										
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
		Reference in New Issue
	
	Block a user