Compare commits
54 Commits
master
...
9cee370a25
Author | SHA1 | Date | |
---|---|---|---|
|
9cee370a25 | ||
|
5efc6ef2d3 | ||
|
4ec50e20cb | ||
|
c1b7877f4b | ||
|
7b8cbfc9b9 | ||
|
bfa4108a8e | ||
|
0bd0d40a31 | ||
|
4e04595415 | ||
|
006db2960c | ||
|
1f063f0dac | ||
|
1658346aa9 | ||
|
2dbc702b40 | ||
|
1c4764e67d | ||
|
ee49d2021e | ||
|
c391c50ab1 | ||
|
095f0d549a | ||
|
c21c71667e | ||
|
c3a2c91a11 | ||
|
0f39446a61 | ||
|
351059aab1 | ||
|
4488e2c292 | ||
|
afda5b635c | ||
|
0fc1a44d2b | ||
|
9fff1b9e46 | ||
|
16b59f6c67 | ||
|
939f4775a7 | ||
|
9bfc6fc6fa | ||
|
6ea9844d00 | ||
|
1318259d3d | ||
|
98a0c2257c | ||
|
e6976db25d | ||
|
9edc8b7cca | ||
|
33e21e7f30 | ||
|
892a99eca6 | ||
|
d718d05a04 | ||
|
d1795eb1b8 | ||
|
9f4ff4acf0 | ||
|
db6aad84ec | ||
|
29f8a8b8cc | ||
|
abf8589e02 | ||
|
b759f46582 | ||
|
736cdc8576 | ||
|
244d416f6e | ||
|
5f98a2e76a | ||
|
0567cdfd9b | ||
|
4f90671cec | ||
|
e63a1456a5 | ||
|
76f1d57702 | ||
|
de80389ed0 | ||
|
4e64cf682a | ||
|
c5fe5d25a0 | ||
|
283a2b1545 | ||
|
0d6a86ace2 | ||
|
f23bf628e0 |
1
apiserver/.gitignore
vendored
1
apiserver/.gitignore
vendored
@@ -109,5 +109,4 @@ settings.py
|
|||||||
data.db
|
data.db
|
||||||
data.db.bak
|
data.db.bak
|
||||||
data/archive/*
|
data/archive/*
|
||||||
data/backup/*
|
|
||||||
qotnews.sqlite
|
qotnews.sqlite
|
||||||
|
@@ -1,11 +1,11 @@
|
|||||||
import json
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
from sqlalchemy import create_engine, Column, String, ForeignKey, Integer
|
from sqlalchemy import create_engine, Column, String, ForeignKey, Integer
|
||||||
from sqlalchemy.ext.declarative import declarative_base
|
from sqlalchemy.ext.declarative import declarative_base
|
||||||
from sqlalchemy.orm import sessionmaker
|
from sqlalchemy.orm import sessionmaker
|
||||||
from sqlalchemy.exc import IntegrityError
|
from sqlalchemy.exc import IntegrityError
|
||||||
|
from sqlalchemy.types import JSON
|
||||||
|
|
||||||
engine = create_engine('sqlite:///data/qotnews.sqlite', connect_args={'timeout': 360})
|
engine = create_engine('sqlite:///data/qotnews.sqlite')
|
||||||
Session = sessionmaker(bind=engine)
|
Session = sessionmaker(bind=engine)
|
||||||
|
|
||||||
Base = declarative_base()
|
Base = declarative_base()
|
||||||
@@ -15,8 +15,8 @@ class Story(Base):
|
|||||||
|
|
||||||
sid = Column(String(16), primary_key=True)
|
sid = Column(String(16), primary_key=True)
|
||||||
ref = Column(String(16), unique=True)
|
ref = Column(String(16), unique=True)
|
||||||
meta_json = Column(String)
|
meta = Column(JSON)
|
||||||
full_json = Column(String)
|
data = Column(JSON)
|
||||||
title = Column(String)
|
title = Column(String)
|
||||||
|
|
||||||
class Reflist(Base):
|
class Reflist(Base):
|
||||||
@@ -36,19 +36,21 @@ def get_story(sid):
|
|||||||
|
|
||||||
def put_story(story):
|
def put_story(story):
|
||||||
story = story.copy()
|
story = story.copy()
|
||||||
full_json = json.dumps(story)
|
data = {}
|
||||||
|
data.update(story)
|
||||||
|
|
||||||
story.pop('text', None)
|
meta = {}
|
||||||
story.pop('comments', None)
|
meta.update(story)
|
||||||
meta_json = json.dumps(story)
|
meta.pop('text', None)
|
||||||
|
meta.pop('comments', None)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
session = Session()
|
session = Session()
|
||||||
s = Story(
|
s = Story(
|
||||||
sid=story['id'],
|
sid=story['id'],
|
||||||
ref=story['ref'],
|
ref=story['ref'],
|
||||||
full_json=full_json,
|
data=data,
|
||||||
meta_json=meta_json,
|
meta=meta,
|
||||||
title=story.get('title', None),
|
title=story.get('title', None),
|
||||||
)
|
)
|
||||||
session.merge(s)
|
session.merge(s)
|
||||||
@@ -63,19 +65,26 @@ def get_story_by_ref(ref):
|
|||||||
session = Session()
|
session = Session()
|
||||||
return session.query(Story).filter(Story.ref==ref).first()
|
return session.query(Story).filter(Story.ref==ref).first()
|
||||||
|
|
||||||
def get_reflist(amount):
|
def get_stories_by_url(url):
|
||||||
session = Session()
|
session = Session()
|
||||||
q = session.query(Reflist).order_by(Reflist.rid.desc()).limit(amount)
|
return session.query(Story).\
|
||||||
|
filter(Story.title != None).\
|
||||||
|
filter(Story.meta['url'].as_string() == url).\
|
||||||
|
order_by(Story.meta['date'].desc())
|
||||||
|
|
||||||
|
def get_reflist():
|
||||||
|
session = Session()
|
||||||
|
q = session.query(Reflist).order_by(Reflist.rid.desc())
|
||||||
return [dict(ref=x.ref, sid=x.sid, source=x.source) for x in q.all()]
|
return [dict(ref=x.ref, sid=x.sid, source=x.source) for x in q.all()]
|
||||||
|
|
||||||
def get_stories(amount, skip=0):
|
def get_stories(maxage=60*60*24*2):
|
||||||
|
time = datetime.now().timestamp() - maxage
|
||||||
session = Session()
|
session = Session()
|
||||||
q = session.query(Reflist, Story.meta_json).\
|
q = session.query(Reflist, Story.meta).\
|
||||||
order_by(Reflist.rid.desc()).\
|
|
||||||
join(Story).\
|
join(Story).\
|
||||||
filter(Story.title != None).\
|
filter(Story.title != None).\
|
||||||
offset(skip).\
|
filter(Story.meta['date'] > time).\
|
||||||
limit(amount)
|
order_by(Story.meta['date'].desc())
|
||||||
return [x[1] for x in q]
|
return [x[1] for x in q]
|
||||||
|
|
||||||
def put_ref(ref, sid, source):
|
def put_ref(ref, sid, source):
|
||||||
@@ -101,22 +110,7 @@ def del_ref(ref):
|
|||||||
finally:
|
finally:
|
||||||
session.close()
|
session.close()
|
||||||
|
|
||||||
def count_stories():
|
|
||||||
try:
|
|
||||||
session = Session()
|
|
||||||
return session.query(Story).count()
|
|
||||||
finally:
|
|
||||||
session.close()
|
|
||||||
|
|
||||||
def get_story_list():
|
|
||||||
try:
|
|
||||||
session = Session()
|
|
||||||
return session.query(Story.sid).all()
|
|
||||||
finally:
|
|
||||||
session.close()
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
init()
|
init()
|
||||||
|
|
||||||
#print(get_story_by_ref('hgi3sy'))
|
print(get_story_by_ref('hgi3sy'))
|
||||||
print(len(get_reflist(99999)))
|
|
||||||
|
@@ -1,8 +1,6 @@
|
|||||||
import database
|
import database
|
||||||
import search
|
import search
|
||||||
import sys
|
import sys
|
||||||
import settings
|
|
||||||
import logging
|
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import requests
|
import requests
|
||||||
@@ -23,7 +21,7 @@ def database_del_story(sid):
|
|||||||
|
|
||||||
def search_del_story(sid):
|
def search_del_story(sid):
|
||||||
try:
|
try:
|
||||||
r = requests.delete(settings.MEILI_URL + 'indexes/qotnews/documents/'+sid, timeout=2)
|
r = requests.delete(search.MEILI_URL + 'indexes/qotnews/documents/'+sid, timeout=2)
|
||||||
if r.status_code != 202:
|
if r.status_code != 202:
|
||||||
raise Exception('Bad response code ' + str(r.status_code))
|
raise Exception('Bad response code ' + str(r.status_code))
|
||||||
return r.json()
|
return r.json()
|
@@ -6,84 +6,117 @@ logging.basicConfig(
|
|||||||
import requests
|
import requests
|
||||||
import time
|
import time
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
import itertools
|
||||||
|
|
||||||
import settings
|
import settings
|
||||||
from feeds import hackernews, reddit, tildes, manual, lobsters
|
from feeds import hackernews, reddit, tildes, substack, manual, news
|
||||||
import utils
|
from scrapers import outline, declutter, local
|
||||||
|
|
||||||
INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com', 'sec.gov']
|
INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com']
|
||||||
TWO_DAYS = 60*60*24*2
|
|
||||||
|
|
||||||
def list():
|
substacks = {}
|
||||||
feed = []
|
for key, value in settings.SUBSTACK.items():
|
||||||
|
substacks[key] = substack.Publication(value['url'])
|
||||||
|
categories = {}
|
||||||
|
for key, value in settings.CATEGORY.items():
|
||||||
|
categories[key] = news.Category(value['url'], value.get('tz'))
|
||||||
|
sitemaps = {}
|
||||||
|
for key, value in settings.SITEMAP.items():
|
||||||
|
sitemaps[key] = news.Sitemap(value['url'], value.get('tz'))
|
||||||
|
|
||||||
|
def get_list():
|
||||||
|
feeds = {}
|
||||||
|
|
||||||
if settings.NUM_HACKERNEWS:
|
if settings.NUM_HACKERNEWS:
|
||||||
feed += [(x, 'hackernews') for x in hackernews.feed()[:settings.NUM_HACKERNEWS]]
|
feeds['hackernews'] = [(x, 'hackernews') for x in hackernews.feed()[:settings.NUM_HACKERNEWS]]
|
||||||
|
|
||||||
if settings.NUM_LOBSTERS:
|
|
||||||
feed += [(x, 'lobsters') for x in lobsters.feed()[:settings.NUM_LOBSTERS]]
|
|
||||||
|
|
||||||
if settings.NUM_REDDIT:
|
if settings.NUM_REDDIT:
|
||||||
feed += [(x, 'reddit') for x in reddit.feed()[:settings.NUM_REDDIT]]
|
feeds['reddit'] = [(x, 'reddit') for x in reddit.feed()[:settings.NUM_REDDIT]]
|
||||||
|
|
||||||
if settings.NUM_TILDES:
|
if settings.NUM_TILDES:
|
||||||
feed += [(x, 'tildes') for x in tildes.feed()[:settings.NUM_TILDES]]
|
feeds['tildes'] = [(x, 'tildes') for x in tildes.feed()[:settings.NUM_TILDES]]
|
||||||
|
|
||||||
|
if settings.NUM_SUBSTACK:
|
||||||
|
feeds['substack'] = [(x, 'substack') for x in substack.top.feed()[:settings.NUM_SUBSTACK]]
|
||||||
|
|
||||||
|
for key, publication in substacks.items():
|
||||||
|
count = settings.SUBSTACK[key]['count']
|
||||||
|
feeds[key] = [(x, key) for x in publication.feed()[:count]]
|
||||||
|
|
||||||
|
for key, sites in categories.items():
|
||||||
|
count = settings.CATEGORY[key].get('count') or 0
|
||||||
|
excludes = settings.CATEGORY[key].get('excludes')
|
||||||
|
tz = settings.CATEGORY[key].get('tz')
|
||||||
|
feeds[key] = [(x, key) for x in sites.feed(excludes)[:count]]
|
||||||
|
|
||||||
|
for key, sites in sitemaps.items():
|
||||||
|
count = settings.SITEMAP[key].get('count') or 0
|
||||||
|
excludes = settings.SITEMAP[key].get('excludes')
|
||||||
|
feeds[key] = [(x, key) for x in sites.feed(excludes)[:count]]
|
||||||
|
|
||||||
|
values = feeds.values()
|
||||||
|
feed = itertools.chain.from_iterable(itertools.zip_longest(*values, fillvalue=None))
|
||||||
|
feed = list(filter(None, feed))
|
||||||
return feed
|
return feed
|
||||||
|
|
||||||
def get_article(url):
|
def get_article(url):
|
||||||
if not settings.READER_URL:
|
scrapers = {
|
||||||
logging.info('Readerserver not configured, aborting.')
|
'declutter': declutter,
|
||||||
return ''
|
'outline': outline,
|
||||||
|
'local': local,
|
||||||
|
}
|
||||||
|
available = settings.SCRAPERS or ['local']
|
||||||
|
if 'local' not in available:
|
||||||
|
available += ['local']
|
||||||
|
|
||||||
if url.startswith('https://twitter.com'):
|
for scraper in available:
|
||||||
logging.info('Replacing twitter.com url with nitter.net')
|
if scraper not in scrapers.keys():
|
||||||
url = url.replace('twitter.com', 'nitter.net')
|
continue
|
||||||
|
try:
|
||||||
try:
|
html = scrapers[scraper].get_html(url)
|
||||||
r = requests.post(settings.READER_URL, data=dict(url=url), timeout=20)
|
if html:
|
||||||
if r.status_code != 200:
|
return html
|
||||||
raise Exception('Bad response code ' + str(r.status_code))
|
except KeyboardInterrupt:
|
||||||
return r.text
|
raise
|
||||||
except KeyboardInterrupt:
|
except:
|
||||||
raise
|
pass
|
||||||
except BaseException as e:
|
return ''
|
||||||
logging.error('Problem getting article: {}'.format(str(e)))
|
|
||||||
return ''
|
|
||||||
|
|
||||||
def get_content_type(url):
|
def get_content_type(url):
|
||||||
try:
|
|
||||||
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'}
|
|
||||||
return requests.get(url, headers=headers, timeout=5).headers['content-type']
|
|
||||||
except:
|
|
||||||
return ''
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
headers = {
|
headers = {
|
||||||
'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
|
'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
|
||||||
'X-Forwarded-For': '66.249.66.1',
|
'X-Forwarded-For': '66.249.66.1',
|
||||||
}
|
}
|
||||||
return requests.get(url, headers=headers, timeout=10).headers['content-type']
|
return requests.get(url, headers=headers, timeout=5).headers['content-type']
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'}
|
||||||
|
return requests.get(url, headers=headers, timeout=10).headers['content-type']
|
||||||
|
except:
|
||||||
|
return ''
|
||||||
|
|
||||||
def update_story(story, is_manual=False):
|
def update_story(story, is_manual=False):
|
||||||
res = {}
|
res = {}
|
||||||
|
|
||||||
try:
|
if story['source'] == 'hackernews':
|
||||||
if story['source'] == 'hackernews':
|
res = hackernews.story(story['ref'])
|
||||||
res = hackernews.story(story['ref'])
|
elif story['source'] == 'reddit':
|
||||||
elif story['source'] == 'lobsters':
|
res = reddit.story(story['ref'])
|
||||||
res = lobsters.story(story['ref'])
|
elif story['source'] == 'tildes':
|
||||||
elif story['source'] == 'reddit':
|
res = tildes.story(story['ref'])
|
||||||
res = reddit.story(story['ref'])
|
elif story['source'] == 'substack':
|
||||||
elif story['source'] == 'tildes':
|
res = substack.top.story(story['ref'])
|
||||||
res = tildes.story(story['ref'])
|
elif story['source'] in categories.keys():
|
||||||
elif story['source'] == 'manual':
|
res = categories[story['source']].story(story['ref'])
|
||||||
res = manual.story(story['ref'])
|
elif story['source'] in sitemaps.keys():
|
||||||
except BaseException as e:
|
res = sitemaps[story['source']].story(story['ref'])
|
||||||
utils.alert_tanner('Problem updating {} story, ref {}: {}'.format(story['source'], story['ref'], str(e)))
|
elif story['source'] in substacks.keys():
|
||||||
logging.exception(e)
|
res = substacks[story['source']].story(story['ref'])
|
||||||
return False
|
elif story['source'] == 'manual':
|
||||||
|
res = manual.story(story['ref'])
|
||||||
|
|
||||||
if res:
|
if res:
|
||||||
story.update(res) # join dicts
|
story.update(res) # join dicts
|
||||||
@@ -91,8 +124,8 @@ def update_story(story, is_manual=False):
|
|||||||
logging.info('Story not ready yet')
|
logging.info('Story not ready yet')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if story['date'] and not is_manual and story['date'] + TWO_DAYS < time.time():
|
if story['date'] and not is_manual and story['date'] + settings.MAX_STORY_AGE < time.time():
|
||||||
logging.info('Story too old, removing. Date: {}'.format(story['date']))
|
logging.info('Story too old, removing')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if story.get('url', '') and not story.get('text', ''):
|
if story.get('url', '') and not story.get('text', ''):
|
||||||
@@ -106,12 +139,6 @@ def update_story(story, is_manual=False):
|
|||||||
logging.info(story['url'])
|
logging.info(story['url'])
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if 'trump' in story['title'].lower() or 'musk' in story['title'].lower():
|
|
||||||
logging.info('Trump / Musk story, skipping')
|
|
||||||
logging.info(story['url'])
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
logging.info('Getting article ' + story['url'])
|
logging.info('Getting article ' + story['url'])
|
||||||
story['text'] = get_article(story['url'])
|
story['text'] = get_article(story['url'])
|
||||||
if not story['text']: return False
|
if not story['text']: return False
|
||||||
@@ -129,7 +156,7 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
#print(get_article('https://www.bloomberg.com/news/articles/2019-09-23/xi-s-communists-under-pressure-as-high-prices-hit-china-workers'))
|
#print(get_article('https://www.bloomberg.com/news/articles/2019-09-23/xi-s-communists-under-pressure-as-high-prices-hit-china-workers'))
|
||||||
|
|
||||||
a = get_content_type('https://tefkos.comminfo.rutgers.edu/Courses/e530/Readings/Beal%202008%20full%20text%20searching.pdf')
|
a = get_article('https://blog.joinmastodon.org/2019/10/mastodon-3.0/')
|
||||||
print(a)
|
print(a)
|
||||||
|
|
||||||
print('done')
|
print('done')
|
||||||
|
@@ -12,8 +12,7 @@ import requests
|
|||||||
from utils import clean
|
from utils import clean
|
||||||
|
|
||||||
API_TOPSTORIES = lambda x: 'https://hacker-news.firebaseio.com/v0/topstories.json'
|
API_TOPSTORIES = lambda x: 'https://hacker-news.firebaseio.com/v0/topstories.json'
|
||||||
ALG_API_ITEM = lambda x : 'https://hn.algolia.com/api/v1/items/{}'.format(x)
|
API_ITEM = lambda x : 'https://hn.algolia.com/api/v1/items/{}'.format(x)
|
||||||
BHN_API_ITEM = lambda x : 'https://api.hnpwa.com/v0/item/{}.json'.format(x)
|
|
||||||
|
|
||||||
SITE_LINK = lambda x : 'https://news.ycombinator.com/item?id={}'.format(x)
|
SITE_LINK = lambda x : 'https://news.ycombinator.com/item?id={}'.format(x)
|
||||||
SITE_AUTHOR_LINK = lambda x : 'https://news.ycombinator.com/user?id={}'.format(x)
|
SITE_AUTHOR_LINK = lambda x : 'https://news.ycombinator.com/user?id={}'.format(x)
|
||||||
@@ -43,7 +42,7 @@ def api(route, ref=None):
|
|||||||
def feed():
|
def feed():
|
||||||
return [str(x) for x in api(API_TOPSTORIES) or []]
|
return [str(x) for x in api(API_TOPSTORIES) or []]
|
||||||
|
|
||||||
def alg_comment(i):
|
def comment(i):
|
||||||
if 'author' not in i:
|
if 'author' not in i:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -52,25 +51,21 @@ def alg_comment(i):
|
|||||||
c['score'] = i.get('points', 0)
|
c['score'] = i.get('points', 0)
|
||||||
c['date'] = i.get('created_at_i', 0)
|
c['date'] = i.get('created_at_i', 0)
|
||||||
c['text'] = clean(i.get('text', '') or '')
|
c['text'] = clean(i.get('text', '') or '')
|
||||||
c['comments'] = [alg_comment(j) for j in i['children']]
|
c['comments'] = [comment(j) for j in i['children']]
|
||||||
c['comments'] = list(filter(bool, c['comments']))
|
c['comments'] = list(filter(bool, c['comments']))
|
||||||
return c
|
return c
|
||||||
|
|
||||||
def alg_comment_count(i):
|
def comment_count(i):
|
||||||
alive = 1 if i['author'] else 0
|
alive = 1 if i['author'] else 0
|
||||||
return sum([alg_comment_count(c) for c in i['comments']]) + alive
|
return sum([comment_count(c) for c in i['comments']]) + alive
|
||||||
|
|
||||||
def alg_story(ref):
|
def story(ref):
|
||||||
r = api(ALG_API_ITEM, ref)
|
r = api(API_ITEM, ref)
|
||||||
if not r:
|
if not r: return False
|
||||||
logging.info('Bad Algolia Hackernews API response.')
|
|
||||||
return None
|
|
||||||
|
|
||||||
if 'deleted' in r:
|
if 'deleted' in r:
|
||||||
logging.info('Story was deleted.')
|
|
||||||
return False
|
return False
|
||||||
elif r.get('type', '') != 'story':
|
elif r.get('type', '') != 'story':
|
||||||
logging.info('Type "{}" is not "story".'.format(r.get('type', '')))
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
s = {}
|
s = {}
|
||||||
@@ -81,85 +76,17 @@ def alg_story(ref):
|
|||||||
s['title'] = r.get('title', '')
|
s['title'] = r.get('title', '')
|
||||||
s['link'] = SITE_LINK(ref)
|
s['link'] = SITE_LINK(ref)
|
||||||
s['url'] = r.get('url', '')
|
s['url'] = r.get('url', '')
|
||||||
s['comments'] = [alg_comment(i) for i in r['children']]
|
s['comments'] = [comment(i) for i in r['children']]
|
||||||
s['comments'] = list(filter(bool, s['comments']))
|
s['comments'] = list(filter(bool, s['comments']))
|
||||||
s['num_comments'] = alg_comment_count(s) - 1
|
s['num_comments'] = comment_count(s) - 1
|
||||||
|
|
||||||
if 'text' in r and r['text']:
|
if 'text' in r and r['text']:
|
||||||
s['text'] = clean(r['text'] or '')
|
s['text'] = clean(r['text'] or '')
|
||||||
|
|
||||||
return s
|
return s
|
||||||
|
|
||||||
def bhn_comment(i):
|
|
||||||
if 'user' not in i:
|
|
||||||
return False
|
|
||||||
|
|
||||||
c = {}
|
|
||||||
c['author'] = i.get('user', '')
|
|
||||||
c['score'] = 0 # Not present?
|
|
||||||
c['date'] = i.get('time', 0)
|
|
||||||
c['text'] = clean(i.get('content', '') or '')
|
|
||||||
c['comments'] = [bhn_comment(j) for j in i['comments']]
|
|
||||||
c['comments'] = list(filter(bool, c['comments']))
|
|
||||||
return c
|
|
||||||
|
|
||||||
def bhn_story(ref):
|
|
||||||
r = api(BHN_API_ITEM, ref)
|
|
||||||
if not r:
|
|
||||||
logging.info('Bad BetterHN Hackernews API response.')
|
|
||||||
return None
|
|
||||||
|
|
||||||
if 'deleted' in r: # TODO: verify
|
|
||||||
logging.info('Story was deleted.')
|
|
||||||
return False
|
|
||||||
elif r.get('dead', False):
|
|
||||||
logging.info('Story was deleted.')
|
|
||||||
return False
|
|
||||||
elif r.get('type', '') != 'link':
|
|
||||||
logging.info('Type "{}" is not "link".'.format(r.get('type', '')))
|
|
||||||
return False
|
|
||||||
|
|
||||||
s = {}
|
|
||||||
s['author'] = r.get('user', '')
|
|
||||||
s['author_link'] = SITE_AUTHOR_LINK(r.get('user', ''))
|
|
||||||
s['score'] = r.get('points', 0)
|
|
||||||
s['date'] = r.get('time', 0)
|
|
||||||
s['title'] = r.get('title', '')
|
|
||||||
s['link'] = SITE_LINK(ref)
|
|
||||||
s['url'] = r.get('url', '')
|
|
||||||
if s['url'].startswith('item'):
|
|
||||||
s['url'] = SITE_LINK(ref)
|
|
||||||
s['comments'] = [bhn_comment(i) for i in r['comments']]
|
|
||||||
s['comments'] = list(filter(bool, s['comments']))
|
|
||||||
s['num_comments'] = r.get('comments_count', 0)
|
|
||||||
|
|
||||||
if 'content' in r and r['content']:
|
|
||||||
s['text'] = clean(r['content'] or '')
|
|
||||||
|
|
||||||
return s
|
|
||||||
|
|
||||||
def story(ref):
|
|
||||||
s = alg_story(ref)
|
|
||||||
if s is None:
|
|
||||||
s = bhn_story(ref)
|
|
||||||
if not s:
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
if s['score'] < 25 and s['num_comments'] < 10:
|
|
||||||
logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
|
|
||||||
return False
|
|
||||||
|
|
||||||
return s
|
|
||||||
|
|
||||||
# scratchpad so I can quickly develop the parser
|
# scratchpad so I can quickly develop the parser
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print(feed())
|
print(feed())
|
||||||
#print(story(20763961))
|
#print(story(20763961))
|
||||||
#print(story(20802050))
|
#print(story(20802050))
|
||||||
|
|
||||||
#print(story(42899834)) # type "job"
|
|
||||||
#print(story(42900076)) # Ask HN
|
|
||||||
#print(story(42898201)) # Show HN
|
|
||||||
#print(story(42899703)) # normal
|
|
||||||
print(story(42902678)) # bad title?
|
|
||||||
|
@@ -1,120 +0,0 @@
|
|||||||
import logging
|
|
||||||
logging.basicConfig(
|
|
||||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
||||||
level=logging.DEBUG)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
import sys
|
|
||||||
sys.path.insert(0,'.')
|
|
||||||
|
|
||||||
import requests
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
from utils import clean
|
|
||||||
|
|
||||||
API_HOTTEST = lambda x: 'https://lobste.rs/hottest.json'
|
|
||||||
API_ITEM = lambda x : 'https://lobste.rs/s/{}.json'.format(x)
|
|
||||||
|
|
||||||
SITE_LINK = lambda x : 'https://lobste.rs/s/{}'.format(x)
|
|
||||||
SITE_AUTHOR_LINK = lambda x : 'https://lobste.rs/u/{}'.format(x)
|
|
||||||
|
|
||||||
def api(route, ref=None):
|
|
||||||
try:
|
|
||||||
r = requests.get(route(ref), timeout=5)
|
|
||||||
if r.status_code != 200:
|
|
||||||
raise Exception('Bad response code ' + str(r.status_code))
|
|
||||||
return r.json()
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
raise
|
|
||||||
except BaseException as e:
|
|
||||||
logging.error('Problem hitting lobsters API: {}, trying again'.format(str(e)))
|
|
||||||
|
|
||||||
try:
|
|
||||||
r = requests.get(route(ref), timeout=15)
|
|
||||||
if r.status_code != 200:
|
|
||||||
raise Exception('Bad response code ' + str(r.status_code))
|
|
||||||
return r.json()
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
raise
|
|
||||||
except BaseException as e:
|
|
||||||
logging.error('Problem hitting lobsters API: {}'.format(str(e)))
|
|
||||||
return False
|
|
||||||
|
|
||||||
def feed():
|
|
||||||
return [x['short_id'] for x in api(API_HOTTEST) or []]
|
|
||||||
|
|
||||||
def unix(date_str):
|
|
||||||
date_str = date_str.replace(':', '')
|
|
||||||
return int(datetime.strptime(date_str, '%Y-%m-%dT%H%M%S.%f%z').timestamp())
|
|
||||||
|
|
||||||
def make_comment(i):
|
|
||||||
c = {}
|
|
||||||
try:
|
|
||||||
c['author'] = i['commenting_user']
|
|
||||||
except KeyError:
|
|
||||||
c['author'] = ''
|
|
||||||
c['score'] = i.get('score', 0)
|
|
||||||
try:
|
|
||||||
c['date'] = unix(i['created_at'])
|
|
||||||
except KeyError:
|
|
||||||
c['date'] = 0
|
|
||||||
c['text'] = clean(i.get('comment', '') or '')
|
|
||||||
c['comments'] = []
|
|
||||||
return c
|
|
||||||
|
|
||||||
def iter_comments(flat_comments):
|
|
||||||
nested_comments = []
|
|
||||||
parent_stack = []
|
|
||||||
for comment in flat_comments:
|
|
||||||
c = make_comment(comment)
|
|
||||||
indent = comment['depth']
|
|
||||||
|
|
||||||
if indent == 0:
|
|
||||||
nested_comments.append(c)
|
|
||||||
parent_stack = [c]
|
|
||||||
else:
|
|
||||||
parent_stack = parent_stack[:indent]
|
|
||||||
p = parent_stack[-1]
|
|
||||||
p['comments'].append(c)
|
|
||||||
parent_stack.append(c)
|
|
||||||
return nested_comments
|
|
||||||
|
|
||||||
def story(ref):
|
|
||||||
r = api(API_ITEM, ref)
|
|
||||||
if not r:
|
|
||||||
logging.info('Bad Lobsters API response.')
|
|
||||||
return False
|
|
||||||
|
|
||||||
s = {}
|
|
||||||
try:
|
|
||||||
s['author'] = r['submitter_user']
|
|
||||||
s['author_link'] = SITE_AUTHOR_LINK(s['author'])
|
|
||||||
except KeyError:
|
|
||||||
s['author'] = ''
|
|
||||||
s['author_link'] = ''
|
|
||||||
s['score'] = r.get('score', 0)
|
|
||||||
try:
|
|
||||||
s['date'] = unix(r['created_at'])
|
|
||||||
except KeyError:
|
|
||||||
s['date'] = 0
|
|
||||||
s['title'] = r.get('title', '')
|
|
||||||
s['link'] = SITE_LINK(ref)
|
|
||||||
s['url'] = r.get('url', '')
|
|
||||||
s['comments'] = iter_comments(r['comments'])
|
|
||||||
s['num_comments'] = r['comment_count']
|
|
||||||
|
|
||||||
if s['score'] < 15 and s['num_comments'] < 10:
|
|
||||||
logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
|
|
||||||
return False
|
|
||||||
|
|
||||||
if 'description' in r and r['description']:
|
|
||||||
s['text'] = clean(r['description'] or '')
|
|
||||||
|
|
||||||
return s
|
|
||||||
|
|
||||||
# scratchpad so I can quickly develop the parser
|
|
||||||
if __name__ == '__main__':
|
|
||||||
#print(feed())
|
|
||||||
import json
|
|
||||||
print(json.dumps(story('fzvd1v'), indent=4))
|
|
||||||
#print(json.dumps(story('ixyv5u'), indent=4))
|
|
@@ -27,9 +27,7 @@ def api(route):
|
|||||||
|
|
||||||
def story(ref):
|
def story(ref):
|
||||||
html = api(ref)
|
html = api(ref)
|
||||||
if not html:
|
if not html: return False
|
||||||
logging.info('Bad http GET response.')
|
|
||||||
return False
|
|
||||||
|
|
||||||
soup = BeautifulSoup(html, features='html.parser')
|
soup = BeautifulSoup(html, features='html.parser')
|
||||||
|
|
||||||
|
231
apiserver/feeds/news.py
Normal file
231
apiserver/feeds/news.py
Normal file
@@ -0,0 +1,231 @@
|
|||||||
|
import logging
|
||||||
|
logging.basicConfig(
|
||||||
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||||
|
level=logging.DEBUG)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0,'.')
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from datetime import datetime
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from scrapers import declutter
|
||||||
|
import dateutil.parser
|
||||||
|
import extruct
|
||||||
|
import pytz
|
||||||
|
|
||||||
|
from utils import clean
|
||||||
|
|
||||||
|
USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'
|
||||||
|
#USER_AGENT = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
|
||||||
|
|
||||||
|
def unix(date_str, tz=None):
|
||||||
|
try:
|
||||||
|
dt = dateutil.parser.parse(date_str)
|
||||||
|
if tz:
|
||||||
|
dt = pytz.timezone(tz).localize(dt)
|
||||||
|
return int(dt.timestamp())
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def xml(route, ref=None):
|
||||||
|
try:
|
||||||
|
headers = {'User-Agent': USER_AGENT, 'X-Forwarded-For': '66.249.66.1'}
|
||||||
|
r = requests.get(route(ref), headers=headers, timeout=5)
|
||||||
|
if r.status_code != 200:
|
||||||
|
raise Exception('Bad response code ' + str(r.status_code))
|
||||||
|
return r.text
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
raise
|
||||||
|
except BaseException as e:
|
||||||
|
logging.error('Problem hitting URL: {}'.format(str(e)))
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def parse_extruct(s, data):
|
||||||
|
for rdfa in data['rdfa']:
|
||||||
|
for key, props in rdfa.items():
|
||||||
|
if 'http://ogp.me/ns#title' in props:
|
||||||
|
for values in props['http://ogp.me/ns#title']:
|
||||||
|
s['title'] = values['@value']
|
||||||
|
if 'http://ogp.me/ns/article#modified_time' in props:
|
||||||
|
for values in props['http://ogp.me/ns/article#modified_time']:
|
||||||
|
s['date'] = values['@value']
|
||||||
|
if 'http://ogp.me/ns/article#published_time' in props:
|
||||||
|
for values in props['http://ogp.me/ns/article#published_time']:
|
||||||
|
s['date'] = values['@value']
|
||||||
|
|
||||||
|
for og in data['opengraph']:
|
||||||
|
titles = list(filter(None, [value if 'og:title' in key else None for key, value in og['properties']]))
|
||||||
|
modified = list(filter(None, [value if 'article:modified_time' in key else None for key, value in og['properties']]))
|
||||||
|
published = list(filter(None, [value if 'article:published_time' in key else None for key, value in og['properties']]))
|
||||||
|
if len(modified):
|
||||||
|
s['date'] = modified[0]
|
||||||
|
if len(published):
|
||||||
|
s['date'] = published[0]
|
||||||
|
if len(titles):
|
||||||
|
s['title'] = titles[0]
|
||||||
|
|
||||||
|
for md in data['microdata']:
|
||||||
|
if md['type'] == 'https://schema.org/NewsArticle':
|
||||||
|
props = md['properties']
|
||||||
|
s['title'] = props['headline']
|
||||||
|
if props['dateModified']:
|
||||||
|
s['date'] = props['dateModified']
|
||||||
|
if props['datePublished']:
|
||||||
|
s['date'] = props['datePublished']
|
||||||
|
if 'author' in props and props['author']:
|
||||||
|
s['author'] = props['author']['properties']['name']
|
||||||
|
|
||||||
|
for ld in data['json-ld']:
|
||||||
|
if '@type' in ld and ld['@type'] in ['Article', 'NewsArticle']:
|
||||||
|
s['title'] = ld['headline']
|
||||||
|
if ld['dateModified']:
|
||||||
|
s['date'] = ld['dateModified']
|
||||||
|
if ld['datePublished']:
|
||||||
|
s['date'] = ld['datePublished']
|
||||||
|
if 'author' in ld and ld['author']:
|
||||||
|
s['author'] = ld['author']['name']
|
||||||
|
if '@graph' in ld:
|
||||||
|
for gld in ld['@graph']:
|
||||||
|
if '@type' in gld and gld['@type'] in ['Article', 'NewsArticle']:
|
||||||
|
s['title'] = gld['headline']
|
||||||
|
if gld['dateModified']:
|
||||||
|
s['date'] = gld['dateModified']
|
||||||
|
if gld['datePublished']:
|
||||||
|
s['date'] = gld['datePublished']
|
||||||
|
|
||||||
|
return s
|
||||||
|
|
||||||
|
def comment(i):
|
||||||
|
if 'author' not in i:
|
||||||
|
return False
|
||||||
|
|
||||||
|
c = {}
|
||||||
|
c['author'] = i.get('author', '')
|
||||||
|
c['score'] = i.get('points', 0)
|
||||||
|
c['date'] = unix(i.get('date', 0))
|
||||||
|
c['text'] = clean(i.get('text', '') or '')
|
||||||
|
c['comments'] = [comment(j) for j in i['children']]
|
||||||
|
c['comments'] = list(filter(bool, c['comments']))
|
||||||
|
return c
|
||||||
|
|
||||||
|
def comment_count(i):
|
||||||
|
alive = 1 if i['author'] else 0
|
||||||
|
return sum([comment_count(c) for c in i['comments']]) + alive
|
||||||
|
|
||||||
|
class _Base:
|
||||||
|
def __init__(url, tz=None):
|
||||||
|
self.url = url
|
||||||
|
self.tz = tz
|
||||||
|
|
||||||
|
def feed(self, excludes=None):
|
||||||
|
return []
|
||||||
|
|
||||||
|
def story(self, ref):
|
||||||
|
markup = xml(lambda x: ref)
|
||||||
|
if not markup:
|
||||||
|
return False
|
||||||
|
|
||||||
|
s = {}
|
||||||
|
s['author_link'] = ''
|
||||||
|
s['score'] = 0
|
||||||
|
s['comments'] = []
|
||||||
|
s['num_comments'] = 0
|
||||||
|
s['link'] = ref
|
||||||
|
s['url'] = ref
|
||||||
|
s['date'] = 0
|
||||||
|
|
||||||
|
data = extruct.extract(markup)
|
||||||
|
s = parse_extruct(s, data)
|
||||||
|
if s['date']:
|
||||||
|
s['date'] = unix(s['date'], tz=self.tz)
|
||||||
|
|
||||||
|
if 'disqus' in markup:
|
||||||
|
try:
|
||||||
|
s['comments'] = declutter.get_comments(ref)
|
||||||
|
c['comments'] = list(filter(bool, c['comments']))
|
||||||
|
s['num_comments'] = comment_count(s['comments'])
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
raise
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if not s['date']:
|
||||||
|
return False
|
||||||
|
return s
|
||||||
|
|
||||||
|
def get_sitemap_date(a):
|
||||||
|
if a.find('lastmod'):
|
||||||
|
return a.find('lastmod').text
|
||||||
|
if a.find('news:publication_date'):
|
||||||
|
return a.find('news:publication_date').text
|
||||||
|
if a.find('ns2:publication_date'):
|
||||||
|
return a.find('ns2:publication_date').text
|
||||||
|
return ''
|
||||||
|
|
||||||
|
class Sitemap(_Base):
|
||||||
|
def __init__(self, url, tz=None):
|
||||||
|
self.tz = tz
|
||||||
|
self.sitemap_url = url
|
||||||
|
|
||||||
|
def feed(self, excludes=None):
|
||||||
|
markup = xml(lambda x: self.sitemap_url)
|
||||||
|
if not markup: return []
|
||||||
|
soup = BeautifulSoup(markup, features='lxml')
|
||||||
|
sitemap = soup.find('urlset').findAll('url')
|
||||||
|
|
||||||
|
links = list(filter(None, [a if a.find('loc') else None for a in sitemap]))
|
||||||
|
links = list(filter(None, [a if get_sitemap_date(a) else None for a in links]))
|
||||||
|
links.sort(key=lambda a: unix(get_sitemap_date(a)), reverse=True)
|
||||||
|
links = [x.find('loc').text for x in links] or []
|
||||||
|
links = list(set(links))
|
||||||
|
if excludes:
|
||||||
|
links = list(filter(None, [None if any(e in link for e in excludes) else link for link in links]))
|
||||||
|
return links
|
||||||
|
|
||||||
|
|
||||||
|
class Category(_Base):
|
||||||
|
def __init__(self, url, tz=None):
|
||||||
|
self.tz = tz
|
||||||
|
self.category_url = url
|
||||||
|
self.base_url = '/'.join(url.split('/')[:3])
|
||||||
|
|
||||||
|
def feed(self, excludes=None):
|
||||||
|
markup = xml(lambda x: self.category_url)
|
||||||
|
if not markup: return []
|
||||||
|
soup = BeautifulSoup(markup, features='html.parser')
|
||||||
|
links = soup.find_all('a', href=True)
|
||||||
|
links = [link.get('href') for link in links]
|
||||||
|
links = [f"{self.base_url}{link}" if link.startswith('/') else link for link in links]
|
||||||
|
links = list(filter(None, [link if link.startswith(self.category_url) else None for link in links]))
|
||||||
|
links = list(filter(None, [link if link != self.category_url else None for link in links]))
|
||||||
|
links = list(set(links))
|
||||||
|
if excludes:
|
||||||
|
links = list(filter(None, [None if any(e in link for e in excludes) else link for link in links]))
|
||||||
|
return links
|
||||||
|
|
||||||
|
|
||||||
|
# scratchpad so I can quickly develop the parser
|
||||||
|
if __name__ == '__main__':
|
||||||
|
print("Sitemap: Stuff")
|
||||||
|
site = Sitemap("https://www.stuff.co.nz/sitemap/news/sitemap.xml")
|
||||||
|
posts = site.feed()
|
||||||
|
print(posts[:5])
|
||||||
|
print(site.story(posts[0]))
|
||||||
|
|
||||||
|
print("Category: RadioNZ Te Ao Māori")
|
||||||
|
site = Category("https://www.rnz.co.nz/news/te-manu-korihi/")
|
||||||
|
posts = site.feed()
|
||||||
|
print(posts[:5])
|
||||||
|
print(site.story(posts[0]))
|
||||||
|
|
||||||
|
print("Sitemap: Newsroom")
|
||||||
|
site = Sitemap("https://www.newsroom.co.nz/sitemap.xml")
|
||||||
|
posts = site.feed()
|
||||||
|
print(posts[:5])
|
||||||
|
print(site.story(posts[0]))
|
||||||
|
|
@@ -32,8 +32,11 @@ def feed():
|
|||||||
return [x.id for x in reddit.subreddit(subs).hot()]
|
return [x.id for x in reddit.subreddit(subs).hot()]
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
raise
|
raise
|
||||||
except BaseException as e:
|
except PRAWException as e:
|
||||||
logging.critical('Problem hitting reddit API: {}'.format(str(e)))
|
logging.error('Problem hitting reddit API: {}'.format(str(e)))
|
||||||
|
return []
|
||||||
|
except PrawcoreException as e:
|
||||||
|
logging.error('Problem hitting reddit API: {}'.format(str(e)))
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def comment(i):
|
def comment(i):
|
||||||
@@ -56,9 +59,7 @@ def comment(i):
|
|||||||
def story(ref):
|
def story(ref):
|
||||||
try:
|
try:
|
||||||
r = reddit.submission(ref)
|
r = reddit.submission(ref)
|
||||||
if not r:
|
if not r: return False
|
||||||
logging.info('Bad Reddit API response.')
|
|
||||||
return False
|
|
||||||
|
|
||||||
s = {}
|
s = {}
|
||||||
s['author'] = r.author.name if r.author else '[Deleted]'
|
s['author'] = r.author.name if r.author else '[Deleted]'
|
||||||
@@ -73,7 +74,6 @@ def story(ref):
|
|||||||
s['num_comments'] = r.num_comments
|
s['num_comments'] = r.num_comments
|
||||||
|
|
||||||
if s['score'] < 25 and s['num_comments'] < 10:
|
if s['score'] < 25 and s['num_comments'] < 10:
|
||||||
logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if r.selftext:
|
if r.selftext:
|
||||||
@@ -84,10 +84,10 @@ def story(ref):
|
|||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
raise
|
raise
|
||||||
except PRAWException as e:
|
except PRAWException as e:
|
||||||
logging.critical('Problem hitting reddit API: {}'.format(str(e)))
|
logging.error('Problem hitting reddit API: {}'.format(str(e)))
|
||||||
return False
|
return False
|
||||||
except PrawcoreException as e:
|
except PrawcoreException as e:
|
||||||
logging.critical('Problem hitting reddit API: {}'.format(str(e)))
|
logging.error('Problem hitting reddit API: {}'.format(str(e)))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# scratchpad so I can quickly develop the parser
|
# scratchpad so I can quickly develop the parser
|
||||||
|
165
apiserver/feeds/substack.py
Normal file
165
apiserver/feeds/substack.py
Normal file
@@ -0,0 +1,165 @@
|
|||||||
|
import logging
|
||||||
|
logging.basicConfig(
|
||||||
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||||
|
level=logging.DEBUG)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0,'.')
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from utils import clean
|
||||||
|
|
||||||
|
SUBSTACK_REFERER = 'https://substack.com'
|
||||||
|
SUBSTACK_API_TOP_POSTS = lambda x: "https://substack.com/api/v1/reader/top-posts"
|
||||||
|
|
||||||
|
def author_link(author_id, base_url):
|
||||||
|
return f"{base_url}/people/{author_id}"
|
||||||
|
def api_comments(post_id, base_url):
|
||||||
|
return f"{base_url}/api/v1/post/{post_id}/comments?all_comments=true&sort=best_first"
|
||||||
|
def api_stories(x, base_url):
|
||||||
|
return f"{base_url}/api/v1/archive?sort=new&search=&offset=0&limit=100"
|
||||||
|
|
||||||
|
def unix(date_str):
|
||||||
|
return int(datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S.%fZ').timestamp())
|
||||||
|
|
||||||
|
def api(route, ref=None, referer=None):
|
||||||
|
headers = {'Referer': referer} if referer else None
|
||||||
|
try:
|
||||||
|
r = requests.get(route(ref), headers=headers, timeout=10)
|
||||||
|
if r.status_code != 200:
|
||||||
|
raise Exception('Bad response code ' + str(r.status_code))
|
||||||
|
return r.json()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
raise
|
||||||
|
except BaseException as e:
|
||||||
|
logging.error('Problem hitting Substack API: {}, trying again'.format(str(e)))
|
||||||
|
|
||||||
|
try:
|
||||||
|
r = requests.get(route(ref), headers=headers, timeout=20)
|
||||||
|
if r.status_code != 200:
|
||||||
|
raise Exception('Bad response code ' + str(r.status_code))
|
||||||
|
return r.json()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
raise
|
||||||
|
except BaseException as e:
|
||||||
|
logging.error('Problem hitting Substack API: {}'.format(str(e)))
|
||||||
|
return False
|
||||||
|
|
||||||
|
def comment(i):
|
||||||
|
if 'body' not in i:
|
||||||
|
return False
|
||||||
|
|
||||||
|
c = {}
|
||||||
|
c['date'] = unix(i.get('date'))
|
||||||
|
c['author'] = i.get('name', '')
|
||||||
|
c['score'] = i.get('reactions').get('❤')
|
||||||
|
c['text'] = clean(i.get('body', '') or '')
|
||||||
|
c['comments'] = [comment(j) for j in i['children']]
|
||||||
|
c['comments'] = list(filter(bool, c['comments']))
|
||||||
|
|
||||||
|
return c
|
||||||
|
|
||||||
|
class Publication:
|
||||||
|
def __init__(self, domain):
|
||||||
|
self.BASE_DOMAIN = domain
|
||||||
|
|
||||||
|
def feed(self):
|
||||||
|
stories = api(lambda x: api_stories(x, self.BASE_DOMAIN), referer=self.BASE_DOMAIN)
|
||||||
|
if not stories: return []
|
||||||
|
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
|
||||||
|
return [str(i.get("id")) for i in stories or []]
|
||||||
|
|
||||||
|
def story(self, ref):
|
||||||
|
stories = api(lambda x: api_stories(x, self.BASE_DOMAIN), referer=self.BASE_DOMAIN)
|
||||||
|
if not stories: return False
|
||||||
|
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
|
||||||
|
stories = list(filter(None, [i if str(i.get('id')) == ref else None for i in stories]))
|
||||||
|
|
||||||
|
if len(stories) == 0:
|
||||||
|
return False
|
||||||
|
|
||||||
|
r = stories[0]
|
||||||
|
if not r:
|
||||||
|
return False
|
||||||
|
|
||||||
|
s = {}
|
||||||
|
s['author'] = ''
|
||||||
|
s['author_link'] = ''
|
||||||
|
|
||||||
|
s['date'] = unix(r.get('post_date'))
|
||||||
|
s['score'] = r.get('reactions').get('❤')
|
||||||
|
s['title'] = r.get('title', '')
|
||||||
|
s['link'] = r.get('canonical_url', '')
|
||||||
|
s['url'] = r.get('canonical_url', '')
|
||||||
|
comments = api(lambda x: api_comments(x, self.BASE_DOMAIN), r.get('id'), referer=self.BASE_DOMAIN)
|
||||||
|
s['comments'] = [comment(i) for i in comments.get('comments')]
|
||||||
|
s['comments'] = list(filter(bool, s['comments']))
|
||||||
|
s['num_comments'] = r.get('comment_count', 0)
|
||||||
|
|
||||||
|
authors = list(filter(None, [self._bylines(byline) for byline in r.get('publishedBylines')]))
|
||||||
|
if len(authors):
|
||||||
|
s['author'] = authors[0].get('name')
|
||||||
|
s['author_link'] = authors[0].get('link')
|
||||||
|
|
||||||
|
return s
|
||||||
|
|
||||||
|
def _bylines(self, b):
|
||||||
|
if 'id' not in b:
|
||||||
|
return None
|
||||||
|
a = {}
|
||||||
|
a['name'] = b.get('name')
|
||||||
|
a['link'] = author_link(b.get('id'), self.BASE_DOMAIN)
|
||||||
|
return a
|
||||||
|
|
||||||
|
|
||||||
|
class Top:
|
||||||
|
def feed(self):
|
||||||
|
stories = api(SUBSTACK_API_TOP_POSTS, referer=SUBSTACK_REFERER)
|
||||||
|
if not stories: return []
|
||||||
|
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
|
||||||
|
return [str(i.get("id")) for i in stories or []]
|
||||||
|
|
||||||
|
def story(self, ref):
|
||||||
|
stories = api(SUBSTACK_API_TOP_POSTS, referer=SUBSTACK_REFERER)
|
||||||
|
if not stories: return False
|
||||||
|
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
|
||||||
|
stories = list(filter(None, [i if str(i.get('id')) == ref else None for i in stories]))
|
||||||
|
|
||||||
|
if len(stories) == 0:
|
||||||
|
return False
|
||||||
|
|
||||||
|
r = stories[0]
|
||||||
|
if not r:
|
||||||
|
return False
|
||||||
|
|
||||||
|
s = {}
|
||||||
|
pub = r.get('pub')
|
||||||
|
base_url = pub.get('base_url')
|
||||||
|
s['author'] = pub.get('author_name')
|
||||||
|
s['author_link'] = author_link(pub.get('author_id'), base_url)
|
||||||
|
|
||||||
|
s['date'] = unix(r.get('post_date'))
|
||||||
|
s['score'] = r.get('score')
|
||||||
|
s['title'] = r.get('title', '')
|
||||||
|
s['link'] = r.get('canonical_url', '')
|
||||||
|
s['url'] = r.get('canonical_url', '')
|
||||||
|
comments = api(lambda x: api_comments(x, base_url), r.get('id'), referer=SUBSTACK_REFERER)
|
||||||
|
s['comments'] = [comment(i) for i in comments.get('comments')]
|
||||||
|
s['comments'] = list(filter(bool, s['comments']))
|
||||||
|
s['num_comments'] = r.get('comment_count', 0)
|
||||||
|
|
||||||
|
return s
|
||||||
|
|
||||||
|
top = Top()
|
||||||
|
|
||||||
|
# scratchpad so I can quickly develop the parser
|
||||||
|
if __name__ == '__main__':
|
||||||
|
top_posts = top.feed()
|
||||||
|
print(top.story(top_posts[0]))
|
||||||
|
|
||||||
|
webworm = Publication("https://www.webworm.co/")
|
||||||
|
posts = webworm.feed()
|
||||||
|
print(webworm.story(posts[0]))
|
@@ -34,7 +34,7 @@ def api(route):
|
|||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
raise
|
raise
|
||||||
except BaseException as e:
|
except BaseException as e:
|
||||||
logging.critical('Problem hitting tildes website: {}'.format(str(e)))
|
logging.error('Problem hitting tildes website: {}'.format(str(e)))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def feed():
|
def feed():
|
||||||
@@ -71,15 +71,11 @@ def story(ref):
|
|||||||
html = api(SITE_LINK(group_lookup[ref], ref))
|
html = api(SITE_LINK(group_lookup[ref], ref))
|
||||||
else:
|
else:
|
||||||
html = api(API_ITEM(ref))
|
html = api(API_ITEM(ref))
|
||||||
if not html:
|
if not html: return False
|
||||||
logging.info('Bad Tildes API response.')
|
|
||||||
return False
|
|
||||||
|
|
||||||
soup = BeautifulSoup(html, features='html.parser')
|
soup = BeautifulSoup(html, features='html.parser')
|
||||||
a = soup.find('article', class_='topic-full')
|
a = soup.find('article', class_='topic-full')
|
||||||
if a is None:
|
if a is None: return False
|
||||||
logging.info('Tildes <article> element not found.')
|
|
||||||
return False
|
|
||||||
|
|
||||||
h = a.find('header')
|
h = a.find('header')
|
||||||
lu = h.find('a', class_='link-user')
|
lu = h.find('a', class_='link-user')
|
||||||
@@ -87,7 +83,6 @@ def story(ref):
|
|||||||
error = a.find('div', class_='text-error')
|
error = a.find('div', class_='text-error')
|
||||||
if error:
|
if error:
|
||||||
if 'deleted' in error.string or 'removed' in error.string:
|
if 'deleted' in error.string or 'removed' in error.string:
|
||||||
logging.info('Article was deleted or removed.')
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
s = {}
|
s = {}
|
||||||
@@ -107,21 +102,7 @@ def story(ref):
|
|||||||
ch = a.find('header', class_='topic-comments-header')
|
ch = a.find('header', class_='topic-comments-header')
|
||||||
s['num_comments'] = int(ch.h2.string.split(' ')[0]) if ch else 0
|
s['num_comments'] = int(ch.h2.string.split(' ')[0]) if ch else 0
|
||||||
|
|
||||||
if s['group'].split('.')[0] not in [
|
if s['score'] < 8 and s['num_comments'] < 6:
|
||||||
'~arts',
|
|
||||||
'~comp',
|
|
||||||
'~creative',
|
|
||||||
'~design',
|
|
||||||
'~engineering',
|
|
||||||
'~finance',
|
|
||||||
'~science',
|
|
||||||
'~tech',
|
|
||||||
]:
|
|
||||||
logging.info('Group ({}) not in whitelist.'.format(s['group']))
|
|
||||||
return False
|
|
||||||
|
|
||||||
if s['score'] < 15 and s['num_comments'] < 10:
|
|
||||||
logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
td = a.find('div', class_='topic-full-text')
|
td = a.find('div', class_='topic-full-text')
|
||||||
@@ -132,7 +113,7 @@ def story(ref):
|
|||||||
|
|
||||||
# scratchpad so I can quickly develop the parser
|
# scratchpad so I can quickly develop the parser
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print(feed())
|
#print(feed())
|
||||||
#normal = story('gxt')
|
#normal = story('gxt')
|
||||||
#print(normal)
|
#print(normal)
|
||||||
#no_comments = story('gxr')
|
#no_comments = story('gxr')
|
||||||
@@ -141,8 +122,8 @@ if __name__ == '__main__':
|
|||||||
#print(self_post)
|
#print(self_post)
|
||||||
#li_comment = story('gqx')
|
#li_comment = story('gqx')
|
||||||
#print(li_comment)
|
#print(li_comment)
|
||||||
#broken = story('q4y')
|
broken = story('q4y')
|
||||||
#print(broken)
|
print(broken)
|
||||||
|
|
||||||
# make sure there's no self-reference
|
# make sure there's no self-reference
|
||||||
#import copy
|
#import copy
|
||||||
|
@@ -4,6 +4,7 @@ certifi==2020.6.20
|
|||||||
chardet==3.0.4
|
chardet==3.0.4
|
||||||
click==7.1.2
|
click==7.1.2
|
||||||
commonmark==0.9.1
|
commonmark==0.9.1
|
||||||
|
extruct==0.10.0
|
||||||
Flask==1.1.2
|
Flask==1.1.2
|
||||||
Flask-Cors==3.0.8
|
Flask-Cors==3.0.8
|
||||||
gevent==20.6.2
|
gevent==20.6.2
|
||||||
@@ -11,11 +12,13 @@ greenlet==0.4.16
|
|||||||
idna==2.10
|
idna==2.10
|
||||||
itsdangerous==1.1.0
|
itsdangerous==1.1.0
|
||||||
Jinja2==2.11.2
|
Jinja2==2.11.2
|
||||||
|
lxml==4.6.1
|
||||||
MarkupSafe==1.1.1
|
MarkupSafe==1.1.1
|
||||||
packaging==20.4
|
packaging==20.4
|
||||||
praw==6.4.0
|
praw==6.4.0
|
||||||
prawcore==1.4.0
|
prawcore==1.4.0
|
||||||
pyparsing==2.4.7
|
pyparsing==2.4.7
|
||||||
|
pytz==2020.4
|
||||||
requests==2.24.0
|
requests==2.24.0
|
||||||
six==1.15.0
|
six==1.15.0
|
||||||
soupsieve==2.0.1
|
soupsieve==2.0.1
|
||||||
@@ -27,3 +30,4 @@ websocket-client==0.57.0
|
|||||||
Werkzeug==1.0.1
|
Werkzeug==1.0.1
|
||||||
zope.event==4.4
|
zope.event==4.4
|
||||||
zope.interface==5.1.0
|
zope.interface==5.1.0
|
||||||
|
python-dateutil==2.8.1
|
||||||
|
41
apiserver/scrapers/declutter.py
Normal file
41
apiserver/scrapers/declutter.py
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
import logging
|
||||||
|
logging.basicConfig(
|
||||||
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||||
|
level=logging.DEBUG)
|
||||||
|
import requests
|
||||||
|
|
||||||
|
DECLUTTER_API = 'https://declutter.1j.nz/details'
|
||||||
|
DECLUTTER_COMMENT_API = 'https://declutter.1j.nz/comments'
|
||||||
|
TIMEOUT = 30
|
||||||
|
|
||||||
|
|
||||||
|
def get_html(url):
|
||||||
|
logging.info(f"Declutter Scraper: {url}")
|
||||||
|
details = get_details(url)
|
||||||
|
if not details:
|
||||||
|
return ''
|
||||||
|
return details['content']
|
||||||
|
|
||||||
|
def get_details(url):
|
||||||
|
try:
|
||||||
|
r = requests.post(DECLUTTER_API, data=dict(url=url), timeout=TIMEOUT)
|
||||||
|
if r.status_code != 200:
|
||||||
|
raise Exception('Bad response code ' + str(r.status_code))
|
||||||
|
return r.json()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
raise
|
||||||
|
except BaseException as e:
|
||||||
|
logging.error('Problem decluttering article: {}'.format(str(e)))
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_comments(url):
|
||||||
|
try:
|
||||||
|
r = requests.post(DECLUTTER_COMMENT_API, data=dict(url=url), timeout=TIMEOUT)
|
||||||
|
if r.status_code != 200:
|
||||||
|
raise Exception('Bad response code ' + str(r.status_code))
|
||||||
|
return r.json()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
raise
|
||||||
|
except BaseException as e:
|
||||||
|
logging.error('Problem getting comments for article: {}'.format(str(e)))
|
||||||
|
return None
|
27
apiserver/scrapers/local.py
Normal file
27
apiserver/scrapers/local.py
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
import logging
|
||||||
|
logging.basicConfig(
|
||||||
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||||
|
level=logging.DEBUG)
|
||||||
|
import requests
|
||||||
|
|
||||||
|
READ_API = 'http://127.0.0.1:33843/details'
|
||||||
|
TIMEOUT = 20
|
||||||
|
|
||||||
|
def get_html(url):
|
||||||
|
logging.info(f"Local Scraper: {url}")
|
||||||
|
details = get_details(url)
|
||||||
|
if not details:
|
||||||
|
return ''
|
||||||
|
return details['content']
|
||||||
|
|
||||||
|
def get_details(url):
|
||||||
|
try:
|
||||||
|
r = requests.post(READ_API, data=dict(url=url), timeout=TIMEOUT)
|
||||||
|
if r.status_code != 200:
|
||||||
|
raise Exception('Bad response code ' + str(r.status_code))
|
||||||
|
return r.json()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
raise
|
||||||
|
except BaseException as e:
|
||||||
|
logging.error('Problem getting article: {}'.format(str(e)))
|
||||||
|
return None
|
37
apiserver/scrapers/outline.py
Normal file
37
apiserver/scrapers/outline.py
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
import logging
|
||||||
|
logging.basicConfig(
|
||||||
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||||
|
level=logging.DEBUG)
|
||||||
|
import requests
|
||||||
|
|
||||||
|
OUTLINE_REFERER = 'https://outline.com/'
|
||||||
|
OUTLINE_API = 'https://api.outline.com/v3/parse_article'
|
||||||
|
TIMEOUT = 20
|
||||||
|
|
||||||
|
def get_html(url):
|
||||||
|
details = get_details(url)
|
||||||
|
if not details:
|
||||||
|
return ''
|
||||||
|
return details['html']
|
||||||
|
|
||||||
|
def get_details(url):
|
||||||
|
try:
|
||||||
|
logging.info(f"Outline Scraper: {url}")
|
||||||
|
params = {'source_url': url}
|
||||||
|
headers = {'Referer': OUTLINE_REFERER}
|
||||||
|
r = requests.get(OUTLINE_API, params=params, headers=headers, timeout=TIMEOUT)
|
||||||
|
if r.status_code == 429:
|
||||||
|
logging.info('Rate limited by outline, sleeping 30s and skipping...')
|
||||||
|
time.sleep(30)
|
||||||
|
return None
|
||||||
|
if r.status_code != 200:
|
||||||
|
raise Exception('Bad response code ' + str(r.status_code))
|
||||||
|
data = r.json()['data']
|
||||||
|
if 'URL is not supported by Outline' in data['html']:
|
||||||
|
raise Exception('URL not supported by Outline')
|
||||||
|
return data
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
raise
|
||||||
|
except BaseException as e:
|
||||||
|
logging.error('Problem outlining article: {}'.format(str(e)))
|
||||||
|
return None
|
@@ -1,58 +0,0 @@
|
|||||||
import time
|
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
|
|
||||||
import feed
|
|
||||||
import database
|
|
||||||
import search
|
|
||||||
|
|
||||||
database.init()
|
|
||||||
|
|
||||||
def fix_gzip_bug(story_list):
|
|
||||||
FIX_THRESHOLD = 150
|
|
||||||
|
|
||||||
count = 1
|
|
||||||
for sid in story_list:
|
|
||||||
try:
|
|
||||||
sid = sid[0]
|
|
||||||
story = database.get_story(sid)
|
|
||||||
full_json = json.loads(story.full_json)
|
|
||||||
meta_json = json.loads(story.meta_json)
|
|
||||||
text = full_json.get('text', '')
|
|
||||||
|
|
||||||
count = text.count('<EFBFBD>')
|
|
||||||
if not count: continue
|
|
||||||
|
|
||||||
ratio = count / len(text) * 1000
|
|
||||||
print('Bad story:', sid, 'Num ?:', count, 'Ratio:', ratio)
|
|
||||||
if ratio < FIX_THRESHOLD: continue
|
|
||||||
|
|
||||||
print('Attempting to fix...')
|
|
||||||
|
|
||||||
valid = feed.update_story(meta_json, is_manual=True)
|
|
||||||
if valid:
|
|
||||||
database.put_story(meta_json)
|
|
||||||
search.put_story(meta_json)
|
|
||||||
print('Success')
|
|
||||||
else:
|
|
||||||
print('Story was not valid')
|
|
||||||
|
|
||||||
time.sleep(3)
|
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
raise
|
|
||||||
except BaseException as e:
|
|
||||||
logging.exception(e)
|
|
||||||
breakpoint()
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
num_stories = database.count_stories()
|
|
||||||
|
|
||||||
print('Fix {} stories?'.format(num_stories))
|
|
||||||
print('Press ENTER to continue, ctrl-c to cancel')
|
|
||||||
input()
|
|
||||||
|
|
||||||
story_list = database.get_story_list()
|
|
||||||
|
|
||||||
fix_gzip_bug(story_list)
|
|
||||||
|
|
@@ -1,62 +0,0 @@
|
|||||||
import logging
|
|
||||||
logging.basicConfig(
|
|
||||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
||||||
level=logging.INFO)
|
|
||||||
|
|
||||||
import database
|
|
||||||
from sqlalchemy import select
|
|
||||||
import search
|
|
||||||
import sys
|
|
||||||
|
|
||||||
import time
|
|
||||||
import json
|
|
||||||
import requests
|
|
||||||
|
|
||||||
database.init()
|
|
||||||
search.init()
|
|
||||||
|
|
||||||
BATCH_SIZE = 5000
|
|
||||||
|
|
||||||
def put_stories(stories):
|
|
||||||
return search.meili_api(requests.post, 'indexes/qotnews/documents', stories)
|
|
||||||
|
|
||||||
def get_update(update_id):
|
|
||||||
return search.meili_api(requests.get, 'tasks/{}'.format(update_id))
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
num_stories = database.count_stories()
|
|
||||||
|
|
||||||
print('Reindex {} stories?'.format(num_stories))
|
|
||||||
print('Press ENTER to continue, ctrl-c to cancel')
|
|
||||||
input()
|
|
||||||
|
|
||||||
story_list = database.get_story_list()
|
|
||||||
|
|
||||||
count = 1
|
|
||||||
while len(story_list):
|
|
||||||
stories = []
|
|
||||||
|
|
||||||
for _ in range(BATCH_SIZE):
|
|
||||||
try:
|
|
||||||
sid = story_list.pop()
|
|
||||||
except IndexError:
|
|
||||||
break
|
|
||||||
|
|
||||||
story = database.get_story(sid)
|
|
||||||
print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title))
|
|
||||||
story_obj = json.loads(story.meta_json)
|
|
||||||
stories.append(story_obj)
|
|
||||||
count += 1
|
|
||||||
|
|
||||||
res = put_stories(stories)
|
|
||||||
update_id = res['uid']
|
|
||||||
|
|
||||||
print('Waiting for processing', end='')
|
|
||||||
while get_update(update_id)['status'] != 'succeeded':
|
|
||||||
time.sleep(0.5)
|
|
||||||
print('.', end='', flush=True)
|
|
||||||
|
|
||||||
print()
|
|
||||||
|
|
||||||
print('Done.')
|
|
||||||
|
|
@@ -1,23 +0,0 @@
|
|||||||
import time
|
|
||||||
import requests
|
|
||||||
|
|
||||||
def test_search_api():
|
|
||||||
num_tests = 100
|
|
||||||
total_time = 0
|
|
||||||
|
|
||||||
for i in range(num_tests):
|
|
||||||
start = time.time()
|
|
||||||
|
|
||||||
res = requests.get('http://127.0.0.1:33842/api/search?q=iphone')
|
|
||||||
res.raise_for_status()
|
|
||||||
|
|
||||||
duration = time.time() - start
|
|
||||||
total_time += duration
|
|
||||||
|
|
||||||
avg_time = total_time / num_tests
|
|
||||||
|
|
||||||
print('Average search time:', avg_time)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
test_search_api()
|
|
@@ -4,62 +4,83 @@ logging.basicConfig(
|
|||||||
level=logging.DEBUG)
|
level=logging.DEBUG)
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
import settings
|
|
||||||
|
|
||||||
SEARCH_ENABLED = bool(settings.MEILI_URL)
|
MEILI_URL = 'http://127.0.0.1:7700/'
|
||||||
|
|
||||||
def meili_api(method, route, json=None, params=None, parse_json=True):
|
def create_index():
|
||||||
try:
|
try:
|
||||||
r = method(settings.MEILI_URL + route, json=json, params=params, timeout=4)
|
json = dict(name='qotnews', uid='qotnews')
|
||||||
if r.status_code > 299:
|
r = requests.post(MEILI_URL + 'indexes', json=json, timeout=2)
|
||||||
|
if r.status_code != 201:
|
||||||
raise Exception('Bad response code ' + str(r.status_code))
|
raise Exception('Bad response code ' + str(r.status_code))
|
||||||
if parse_json:
|
return r.json()
|
||||||
return r.json()
|
|
||||||
else:
|
|
||||||
r.encoding = 'utf-8'
|
|
||||||
return r.text
|
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
raise
|
raise
|
||||||
except BaseException as e:
|
except BaseException as e:
|
||||||
logging.error('Problem with MeiliSearch api route: %s: %s', route, str(e))
|
logging.error('Problem creating MeiliSearch index: {}'.format(str(e)))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def create_index():
|
|
||||||
json = dict(uid='qotnews', primaryKey='id')
|
|
||||||
return meili_api(requests.post, 'indexes', json=json)
|
|
||||||
|
|
||||||
def update_rankings():
|
def update_rankings():
|
||||||
json = ['typo', 'words', 'proximity', 'date:desc', 'exactness']
|
try:
|
||||||
return meili_api(requests.post, 'indexes/qotnews/settings/ranking-rules', json=json)
|
json = ['typo', 'words', 'proximity', 'attribute', 'desc(date)', 'wordsPosition', 'exactness']
|
||||||
|
r = requests.post(MEILI_URL + 'indexes/qotnews/settings/ranking-rules', json=json, timeout=2)
|
||||||
|
if r.status_code != 202:
|
||||||
|
raise Exception('Bad response code ' + str(r.status_code))
|
||||||
|
return r.json()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
raise
|
||||||
|
except BaseException as e:
|
||||||
|
logging.error('Problem setting MeiliSearch ranking rules: {}'.format(str(e)))
|
||||||
|
return False
|
||||||
|
|
||||||
def update_attributes():
|
def update_attributes():
|
||||||
json = ['title', 'url', 'author']
|
try:
|
||||||
r = meili_api(requests.post, 'indexes/qotnews/settings/searchable-attributes', json=json)
|
json = ['title', 'url', 'author', 'link', 'id']
|
||||||
json = ['id', 'ref', 'source', 'author', 'author_link', 'score', 'date', 'title', 'link', 'url', 'num_comments']
|
r = requests.post(MEILI_URL + 'indexes/qotnews/settings/searchable-attributes', json=json, timeout=2)
|
||||||
r = meili_api(requests.post, 'indexes/qotnews/settings/displayed-attributes', json=json)
|
if r.status_code != 202:
|
||||||
return r
|
raise Exception('Bad response code ' + str(r.status_code))
|
||||||
|
requests.delete(MEILI_URL + 'indexes/qotnews/settings/displayed-attributes', timeout=2)
|
||||||
|
return r.json()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
raise
|
||||||
|
except BaseException as e:
|
||||||
|
logging.error('Problem setting MeiliSearch searchable attributes: {}'.format(str(e)))
|
||||||
|
return False
|
||||||
|
|
||||||
def init():
|
def init():
|
||||||
if not SEARCH_ENABLED:
|
create_index()
|
||||||
logging.info('Search is not enabled, skipping init.')
|
|
||||||
return
|
|
||||||
print(create_index())
|
|
||||||
update_rankings()
|
update_rankings()
|
||||||
update_attributes()
|
update_attributes()
|
||||||
|
|
||||||
def put_story(story):
|
def put_story(story):
|
||||||
if not SEARCH_ENABLED: return
|
story = story.copy()
|
||||||
return meili_api(requests.post, 'indexes/qotnews/documents', [story])
|
story.pop('text', None)
|
||||||
|
story.pop('comments', None)
|
||||||
|
try:
|
||||||
|
r = requests.post(MEILI_URL + 'indexes/qotnews/documents', json=[story], timeout=2)
|
||||||
|
if r.status_code != 202:
|
||||||
|
raise Exception('Bad response code ' + str(r.status_code))
|
||||||
|
return r.json()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
raise
|
||||||
|
except BaseException as e:
|
||||||
|
logging.error('Problem putting MeiliSearch story: {}'.format(str(e)))
|
||||||
|
return False
|
||||||
|
|
||||||
def search(q):
|
def search(q):
|
||||||
if not SEARCH_ENABLED: return []
|
try:
|
||||||
params = dict(q=q, limit=settings.FEED_LENGTH)
|
params = dict(q=q, limit=250)
|
||||||
r = meili_api(requests.get, 'indexes/qotnews/search', params=params, parse_json=False)
|
r = requests.get(MEILI_URL + 'indexes/qotnews/search', params=params, timeout=2)
|
||||||
return r
|
if r.status_code != 200:
|
||||||
|
raise Exception('Bad response code ' + str(r.status_code))
|
||||||
|
return r.json()['hits']
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
raise
|
||||||
|
except BaseException as e:
|
||||||
|
logging.error('Problem searching MeiliSearch: {}'.format(str(e)))
|
||||||
|
return False
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
init()
|
create_index()
|
||||||
|
|
||||||
print(update_rankings())
|
print(search('the'))
|
||||||
|
|
||||||
print(search('facebook'))
|
|
||||||
|
@@ -28,8 +28,6 @@ from flask_cors import CORS
|
|||||||
database.init()
|
database.init()
|
||||||
search.init()
|
search.init()
|
||||||
|
|
||||||
news_index = 0
|
|
||||||
|
|
||||||
def new_id():
|
def new_id():
|
||||||
nid = gen_rand_id()
|
nid = gen_rand_id()
|
||||||
while database.get_story(nid):
|
while database.get_story(nid):
|
||||||
@@ -42,11 +40,8 @@ cors = CORS(flask_app)
|
|||||||
|
|
||||||
@flask_app.route('/api')
|
@flask_app.route('/api')
|
||||||
def api():
|
def api():
|
||||||
skip = request.args.get('skip', 0)
|
stories = database.get_stories(settings.MAX_STORY_AGE)
|
||||||
limit = request.args.get('limit', settings.FEED_LENGTH)
|
res = Response(json.dumps({"stories": stories}))
|
||||||
stories = database.get_stories(limit, skip)
|
|
||||||
# hacky nested json
|
|
||||||
res = Response('{"stories":[' + ','.join(stories) + ']}')
|
|
||||||
res.headers['content-type'] = 'application/json'
|
res.headers['content-type'] = 'application/json'
|
||||||
return res
|
return res
|
||||||
|
|
||||||
@@ -56,10 +51,8 @@ def apisearch():
|
|||||||
if len(q) >= 3:
|
if len(q) >= 3:
|
||||||
results = search.search(q)
|
results = search.search(q)
|
||||||
else:
|
else:
|
||||||
results = '[]'
|
results = []
|
||||||
res = Response(results)
|
return dict(results=results)
|
||||||
res.headers['content-type'] = 'application/json'
|
|
||||||
return res
|
|
||||||
|
|
||||||
@flask_app.route('/api/submit', methods=['POST'], strict_slashes=False)
|
@flask_app.route('/api/submit', methods=['POST'], strict_slashes=False)
|
||||||
def submit():
|
def submit():
|
||||||
@@ -67,8 +60,6 @@ def submit():
|
|||||||
url = request.form['url']
|
url = request.form['url']
|
||||||
nid = new_id()
|
nid = new_id()
|
||||||
|
|
||||||
logging.info('Manual submission: ' + url)
|
|
||||||
|
|
||||||
parse = urlparse(url)
|
parse = urlparse(url)
|
||||||
if 'news.ycombinator.com' in parse.hostname:
|
if 'news.ycombinator.com' in parse.hostname:
|
||||||
source = 'hackernews'
|
source = 'hackernews'
|
||||||
@@ -76,9 +67,6 @@ def submit():
|
|||||||
elif 'tildes.net' in parse.hostname and '~' in url:
|
elif 'tildes.net' in parse.hostname and '~' in url:
|
||||||
source = 'tildes'
|
source = 'tildes'
|
||||||
ref = parse.path.split('/')[2]
|
ref = parse.path.split('/')[2]
|
||||||
elif 'lobste.rs' in parse.hostname and '/s/' in url:
|
|
||||||
source = 'lobsters'
|
|
||||||
ref = parse.path.split('/')[2]
|
|
||||||
elif 'reddit.com' in parse.hostname and 'comments' in url:
|
elif 'reddit.com' in parse.hostname and 'comments' in url:
|
||||||
source = 'reddit'
|
source = 'reddit'
|
||||||
ref = parse.path.split('/')[4]
|
ref = parse.path.split('/')[4]
|
||||||
@@ -111,8 +99,9 @@ def submit():
|
|||||||
def story(sid):
|
def story(sid):
|
||||||
story = database.get_story(sid)
|
story = database.get_story(sid)
|
||||||
if story:
|
if story:
|
||||||
# hacky nested json
|
related = database.get_stories_by_url(story.meta['url'])
|
||||||
res = Response('{"story":' + story.full_json + '}')
|
related = [r.meta for r in related]
|
||||||
|
res = Response(json.dumps({"story": story.data, "related": related}))
|
||||||
res.headers['content-type'] = 'application/json'
|
res.headers['content-type'] = 'application/json'
|
||||||
return res
|
return res
|
||||||
else:
|
else:
|
||||||
@@ -122,11 +111,9 @@ def story(sid):
|
|||||||
@flask_app.route('/search')
|
@flask_app.route('/search')
|
||||||
def index():
|
def index():
|
||||||
return render_template('index.html',
|
return render_template('index.html',
|
||||||
title='QotNews',
|
title='Feed',
|
||||||
url='news.t0.vc',
|
url='news.t0.vc',
|
||||||
description='Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode',
|
description='Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode')
|
||||||
robots='index',
|
|
||||||
)
|
|
||||||
|
|
||||||
@flask_app.route('/<sid>', strict_slashes=False)
|
@flask_app.route('/<sid>', strict_slashes=False)
|
||||||
@flask_app.route('/<sid>/c', strict_slashes=False)
|
@flask_app.route('/<sid>/c', strict_slashes=False)
|
||||||
@@ -138,7 +125,7 @@ def static_story(sid):
|
|||||||
|
|
||||||
story = database.get_story(sid)
|
story = database.get_story(sid)
|
||||||
if not story: return abort(404)
|
if not story: return abort(404)
|
||||||
story = json.loads(story.full_json)
|
story = story.data
|
||||||
|
|
||||||
score = story['score']
|
score = story['score']
|
||||||
num_comments = story['num_comments']
|
num_comments = story['num_comments']
|
||||||
@@ -151,71 +138,65 @@ def static_story(sid):
|
|||||||
url = url.replace('www.', '')
|
url = url.replace('www.', '')
|
||||||
|
|
||||||
return render_template('index.html',
|
return render_template('index.html',
|
||||||
title=story['title'] + ' | QotNews',
|
title=story['title'],
|
||||||
url=url,
|
url=url,
|
||||||
description=description,
|
description=description)
|
||||||
robots='noindex',
|
|
||||||
)
|
|
||||||
|
|
||||||
http_server = WSGIServer(('', 33842), flask_app)
|
http_server = WSGIServer(('', 33842), flask_app)
|
||||||
|
|
||||||
def feed_thread():
|
def _add_new_refs():
|
||||||
global news_index
|
for ref, source in feed.get_list():
|
||||||
|
if database.get_story_by_ref(ref):
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
nid = new_id()
|
||||||
|
database.put_ref(ref, nid, source)
|
||||||
|
logging.info('Added ref ' + ref)
|
||||||
|
except database.IntegrityError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
def _update_current_story(item):
|
||||||
|
try:
|
||||||
|
story = database.get_story(item['sid']).data
|
||||||
|
except AttributeError:
|
||||||
|
story = dict(id=item['sid'], ref=item['ref'], source=item['source'])
|
||||||
|
|
||||||
|
logging.info('Updating story: {}'.format(str(story['ref'])))
|
||||||
|
|
||||||
|
valid = feed.update_story(story)
|
||||||
|
if valid:
|
||||||
|
database.put_story(story)
|
||||||
|
search.put_story(story)
|
||||||
|
else:
|
||||||
|
database.del_ref(item['ref'])
|
||||||
|
logging.info('Removed ref {}'.format(item['ref']))
|
||||||
|
|
||||||
|
def feed_thread():
|
||||||
|
ref_list = []
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
# onboard new stories
|
# onboard new stories
|
||||||
if news_index == 0:
|
if not len(ref_list):
|
||||||
for ref, source in feed.list():
|
_add_new_refs()
|
||||||
if database.get_story_by_ref(ref):
|
ref_list = database.get_reflist()
|
||||||
continue
|
|
||||||
try:
|
|
||||||
nid = new_id()
|
|
||||||
logging.info('Adding ref: {}, id: {}, source: {}'.format(ref, nid, source))
|
|
||||||
database.put_ref(ref, nid, source)
|
|
||||||
except database.IntegrityError:
|
|
||||||
logging.info('Already have ID / ref, skipping.')
|
|
||||||
continue
|
|
||||||
|
|
||||||
ref_list = database.get_reflist(settings.FEED_LENGTH)
|
|
||||||
|
|
||||||
# update current stories
|
# update current stories
|
||||||
if news_index < len(ref_list):
|
if len(ref_list):
|
||||||
item = ref_list[news_index]
|
item = ref_list.pop(0)
|
||||||
|
_update_current_story(item)
|
||||||
try:
|
|
||||||
story_json = database.get_story(item['sid']).full_json
|
|
||||||
story = json.loads(story_json)
|
|
||||||
except AttributeError:
|
|
||||||
story = dict(id=item['sid'], ref=item['ref'], source=item['source'])
|
|
||||||
|
|
||||||
logging.info('Updating {} story: {}, index: {}'.format(story['source'], story['ref'], news_index))
|
|
||||||
|
|
||||||
valid = feed.update_story(story)
|
|
||||||
if valid:
|
|
||||||
database.put_story(story)
|
|
||||||
search.put_story(story)
|
|
||||||
else:
|
|
||||||
database.del_ref(item['ref'])
|
|
||||||
logging.info('Removed ref {}'.format(item['ref']))
|
|
||||||
else:
|
|
||||||
logging.info('Skipping index: ' + str(news_index))
|
|
||||||
|
|
||||||
gevent.sleep(6)
|
gevent.sleep(6)
|
||||||
|
|
||||||
news_index += 1
|
|
||||||
if news_index == settings.FEED_LENGTH: news_index = 0
|
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
logging.info('Ending feed thread...')
|
logging.info('Ending feed thread...')
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
logging.critical('feed_thread error: {} {}'.format(e.__class__.__name__, e))
|
logging.error('feed_thread error: {} {}'.format(e.__class__.__name__, e))
|
||||||
http_server.stop()
|
http_server.stop()
|
||||||
|
|
||||||
logging.info('Starting Feed thread...')
|
print('Starting Feed thread...')
|
||||||
gevent.spawn(feed_thread)
|
gevent.spawn(feed_thread)
|
||||||
|
|
||||||
logging.info('Starting HTTP thread...')
|
print('Starting HTTP thread...')
|
||||||
try:
|
try:
|
||||||
http_server.serve_forever()
|
http_server.serve_forever()
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
|
@@ -1,23 +1,28 @@
|
|||||||
# QotNews settings
|
# QotNews settings
|
||||||
# edit this file and save it as settings.py
|
# edit this file and save it as settings.py
|
||||||
|
|
||||||
|
MAX_STORY_AGE = 3*24*60*60
|
||||||
|
|
||||||
# Feed Lengths
|
# Feed Lengths
|
||||||
# Number of top items from each site to pull
|
# Number of top items from each site to pull
|
||||||
# set to 0 to disable that site
|
# set to 0 to disable that site
|
||||||
FEED_LENGTH = 75
|
|
||||||
NUM_HACKERNEWS = 15
|
NUM_HACKERNEWS = 15
|
||||||
NUM_LOBSTERS = 10
|
NUM_REDDIT = 10
|
||||||
NUM_REDDIT = 15
|
|
||||||
NUM_TILDES = 5
|
NUM_TILDES = 5
|
||||||
|
NUM_SUBSTACK = 10
|
||||||
|
|
||||||
# Meilisearch server URL
|
SITEMAP = {}
|
||||||
# Leave blank if not using search
|
# SITEMAP['nzherald'] = { 'url': "https://www.nzherald.co.nz/arcio/news-sitemap/", 'count': 10},
|
||||||
#MEILI_URL = 'http://127.0.0.1:7700/'
|
# SITEMAP['stuff'] = { 'url': "https://www.stuff.co.nz/sitemap.xml", 'count': 10},
|
||||||
MEILI_URL = ''
|
|
||||||
|
|
||||||
# Readerserver URL
|
SUBSTACK = {}
|
||||||
# Leave blank if not using, but that defeats the whole point
|
# SUBSTACK['webworm'] = { 'url': "https://www.webworm.co", 'count': 10},
|
||||||
READER_URL = 'http://127.0.0.1:33843/'
|
# SUBSTACK['the bulletin'] = { 'url': "https://thespinoff.substack.com", 'count': 10},
|
||||||
|
|
||||||
|
CATEGORY = {}
|
||||||
|
# CATEGORY['rnz national'] = { 'url': "https://www.rnz.co.nz/news/national", 'count': 10},
|
||||||
|
|
||||||
|
SCRAPERS = ['declutter', 'outline', 'local']
|
||||||
|
|
||||||
# Reddit account info
|
# Reddit account info
|
||||||
# leave blank if not using Reddit
|
# leave blank if not using Reddit
|
||||||
@@ -33,9 +38,13 @@ SUBREDDITS = [
|
|||||||
'HistoryofIdeas',
|
'HistoryofIdeas',
|
||||||
'LaymanJournals',
|
'LaymanJournals',
|
||||||
'PhilosophyofScience',
|
'PhilosophyofScience',
|
||||||
|
'PoliticsPDFs',
|
||||||
|
'Scholar',
|
||||||
'StateOfTheUnion',
|
'StateOfTheUnion',
|
||||||
'TheAgora',
|
'TheAgora',
|
||||||
|
'TrueFilm',
|
||||||
'TrueReddit',
|
'TrueReddit',
|
||||||
|
'UniversityofReddit',
|
||||||
'culturalstudies',
|
'culturalstudies',
|
||||||
'hardscience',
|
'hardscience',
|
||||||
'indepthsports',
|
'indepthsports',
|
||||||
@@ -44,7 +53,4 @@ SUBREDDITS = [
|
|||||||
'neurophilosophy',
|
'neurophilosophy',
|
||||||
'resilientcommunities',
|
'resilientcommunities',
|
||||||
'worldevents',
|
'worldevents',
|
||||||
'StallmanWasRight',
|
|
||||||
'EverythingScience',
|
|
||||||
'longevity',
|
|
||||||
]
|
]
|
||||||
|
@@ -8,14 +8,6 @@ import string
|
|||||||
|
|
||||||
from bleach.sanitizer import Cleaner
|
from bleach.sanitizer import Cleaner
|
||||||
|
|
||||||
def alert_tanner(message):
|
|
||||||
try:
|
|
||||||
logging.info('Alerting Tanner: ' + message)
|
|
||||||
params = dict(qotnews=message)
|
|
||||||
requests.get('https://tbot.tannercollin.com/message', params=params, timeout=4)
|
|
||||||
except BaseException as e:
|
|
||||||
logging.error('Problem alerting Tanner: ' + str(e))
|
|
||||||
|
|
||||||
def gen_rand_id():
|
def gen_rand_id():
|
||||||
return ''.join(random.choice(string.ascii_uppercase) for _ in range(4))
|
return ''.join(random.choice(string.ascii_uppercase) for _ in range(4))
|
||||||
|
|
||||||
|
@@ -1,53 +1,14 @@
|
|||||||
|
const port = 33843;
|
||||||
const express = require('express');
|
const express = require('express');
|
||||||
const app = express();
|
const app = express();
|
||||||
const port = 33843;
|
const simple = require('./simple');
|
||||||
|
|
||||||
const request = require('request');
|
|
||||||
const JSDOM = require('jsdom').JSDOM;
|
|
||||||
const { Readability } = require('readability');
|
|
||||||
|
|
||||||
app.use(express.urlencoded({ extended: true }));
|
app.use(express.urlencoded({ extended: true }));
|
||||||
|
app.get('/', (req, res) => res.send(simple.FORM));
|
||||||
app.get('/', (req, res) => {
|
app.post('/', (req, res) => simple.scrape(req, res));
|
||||||
res.send('<form method="POST" accept-charset="UTF-8"><input name="url"><button type="submit">SUBMIT</button></form>');
|
app.post('/details', (req, res) => simple.details(req, res));
|
||||||
});
|
// app.post('/browser', (req, res) => browser.scrape(req, res));
|
||||||
|
// app.post('/browser/details', (req, res) => browser.details(req, res));
|
||||||
const requestCallback = (url, res) => (error, response, body) => {
|
|
||||||
if (!error && response.statusCode == 200) {
|
|
||||||
console.log('Response OK.');
|
|
||||||
|
|
||||||
const doc = new JSDOM(body, {url: url});
|
|
||||||
const reader = new Readability(doc.window.document);
|
|
||||||
const article = reader.parse();
|
|
||||||
|
|
||||||
if (article && article.content) {
|
|
||||||
res.send(article.content);
|
|
||||||
} else {
|
|
||||||
res.sendStatus(404);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
console.log('Response error:', error ? error.toString() : response.statusCode);
|
|
||||||
res.sendStatus(response ? response.statusCode : 404);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
app.post('/', (req, res) => {
|
|
||||||
const url = req.body.url;
|
|
||||||
const requestOptions = {
|
|
||||||
url: url,
|
|
||||||
gzip: true,
|
|
||||||
//headers: {'User-Agent': 'Googlebot/2.1 (+http://www.google.com/bot.html)'},
|
|
||||||
//headers: {'User-Agent': 'Twitterbot/1.0'},
|
|
||||||
headers: {
|
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0',
|
|
||||||
'X-Forwarded-For': '66.249.66.1',
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
console.log('Parse request for:', url);
|
|
||||||
|
|
||||||
request(requestOptions, requestCallback(url, res));
|
|
||||||
});
|
|
||||||
|
|
||||||
app.listen(port, () => {
|
app.listen(port, () => {
|
||||||
console.log(`Example app listening on port ${port}!`);
|
console.log(`Example app listening on port ${port}!`);
|
||||||
|
43
readerserver/simple.js
Normal file
43
readerserver/simple.js
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
const request = require('request');
|
||||||
|
const JSDOM = require('jsdom').JSDOM;
|
||||||
|
const { Readability } = require('readability');
|
||||||
|
|
||||||
|
const options = url => ({
|
||||||
|
url: url,
|
||||||
|
headers: {
|
||||||
|
'User-Agent': 'Googlebot/2.1 (+http://www.google.com/bot.html)',
|
||||||
|
'X-Forwarded-For': '66.249.66.1',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const extract = (url, body) => {
|
||||||
|
const doc = new JSDOM(body, { url: url });
|
||||||
|
const reader = new Readability(doc.window.document);
|
||||||
|
return reader.parse();
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
module.exports.FORM = '<form method="POST" action="/" accept-charset="UTF-8"><input name="url"><button type="submit">SUBMIT</button></form>';
|
||||||
|
module.exports.scrape = (req, res) => request(options(req.body.url), (error, response, body) => {
|
||||||
|
if (error || response.statusCode != 200) {
|
||||||
|
console.log('Response error:', error ? error.toString() : response.statusCode);
|
||||||
|
return res.sendStatus(response ? response.statusCode : 404);
|
||||||
|
}
|
||||||
|
const article = extract(url, body);
|
||||||
|
if (article && article.content) {
|
||||||
|
return res.send(article.content);
|
||||||
|
}
|
||||||
|
return res.sendStatus(404);
|
||||||
|
});
|
||||||
|
|
||||||
|
module.exports.details = (req, res) => request(options(req.body.url), (error, response, body) => {
|
||||||
|
if (error || response.statusCode != 200) {
|
||||||
|
console.log('Response error:', error ? error.toString() : response.statusCode);
|
||||||
|
return res.sendStatus(response ? response.statusCode : 404);
|
||||||
|
}
|
||||||
|
const article = extract(url, body);
|
||||||
|
if (article) {
|
||||||
|
return res.send(article);
|
||||||
|
}
|
||||||
|
return res.sendStatus(404);
|
||||||
|
});
|
File diff suppressed because it is too large
Load Diff
@@ -3,7 +3,7 @@
|
|||||||
Download MeiliSearch with:
|
Download MeiliSearch with:
|
||||||
|
|
||||||
```
|
```
|
||||||
wget https://github.com/meilisearch/meilisearch/releases/download/v0.27.0/meilisearch-linux-amd64
|
wget https://github.com/meilisearch/MeiliSearch/releases/download/v0.11.1/meilisearch-linux-amd64
|
||||||
chmod +x meilisearch-linux-amd64
|
chmod +x meilisearch-linux-amd64
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@@ -8,8 +8,6 @@
|
|||||||
content="{{ description }}"
|
content="{{ description }}"
|
||||||
/>
|
/>
|
||||||
<meta content="{{ url }}" name="og:site_name">
|
<meta content="{{ url }}" name="og:site_name">
|
||||||
<meta name="robots" content="{{ robots }}">
|
|
||||||
|
|
||||||
|
|
||||||
<link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
|
<link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
|
||||||
<link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">
|
<link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">
|
||||||
@@ -28,7 +26,7 @@
|
|||||||
work correctly both with client-side routing and a non-root public URL.
|
work correctly both with client-side routing and a non-root public URL.
|
||||||
Learn how to configure a non-root public URL by running `npm run build`.
|
Learn how to configure a non-root public URL by running `npm run build`.
|
||||||
-->
|
-->
|
||||||
<title>{{ title }}</title>
|
<title>{{ title }} - QotNews</title>
|
||||||
|
|
||||||
<style>
|
<style>
|
||||||
html {
|
html {
|
||||||
@@ -39,23 +37,13 @@
|
|||||||
}
|
}
|
||||||
.nojs {
|
.nojs {
|
||||||
color: white;
|
color: white;
|
||||||
max-width: 32rem;
|
|
||||||
}
|
}
|
||||||
</style>
|
</style>
|
||||||
</head>
|
</head>
|
||||||
|
|
||||||
<body>
|
<body>
|
||||||
<div class="nojs">
|
<div class="nojs">
|
||||||
<noscript>
|
<noscript>You need to enable JavaScript to run this app.</noscript>
|
||||||
You need to enable JavaScript to run this app because it's written in React.
|
|
||||||
I was planning on writing a server-side version, but I've become distracted
|
|
||||||
by other projects -- sorry!
|
|
||||||
<br/>
|
|
||||||
I originally wrote this for myself, and of course I whitelist JavaScript on
|
|
||||||
all my own domains.
|
|
||||||
<br/><br/>
|
|
||||||
Alternatively, try activex.news.t0.vc for an ActiveX™ version.
|
|
||||||
</noscript>
|
|
||||||
</div>
|
</div>
|
||||||
<div id="root"></div>
|
<div id="root"></div>
|
||||||
<!--
|
<!--
|
||||||
|
@@ -3,10 +3,8 @@ import { BrowserRouter as Router, Route, Link, Switch } from 'react-router-dom';
|
|||||||
import localForage from 'localforage';
|
import localForage from 'localforage';
|
||||||
import './Style-light.css';
|
import './Style-light.css';
|
||||||
import './Style-dark.css';
|
import './Style-dark.css';
|
||||||
import './Style-black.css';
|
|
||||||
import './Style-red.css';
|
|
||||||
import './fonts/Fonts.css';
|
import './fonts/Fonts.css';
|
||||||
import { BackwardDot, ForwardDot } from './utils.js';
|
import { ForwardDot } from './utils.js';
|
||||||
import Feed from './Feed.js';
|
import Feed from './Feed.js';
|
||||||
import Article from './Article.js';
|
import Article from './Article.js';
|
||||||
import Comments from './Comments.js';
|
import Comments from './Comments.js';
|
||||||
@@ -40,16 +38,6 @@ class App extends React.Component {
|
|||||||
localStorage.setItem('theme', 'dark');
|
localStorage.setItem('theme', 'dark');
|
||||||
}
|
}
|
||||||
|
|
||||||
black() {
|
|
||||||
this.setState({ theme: 'black' });
|
|
||||||
localStorage.setItem('theme', 'black');
|
|
||||||
}
|
|
||||||
|
|
||||||
red() {
|
|
||||||
this.setState({ theme: 'red' });
|
|
||||||
localStorage.setItem('theme', 'red');
|
|
||||||
}
|
|
||||||
|
|
||||||
componentDidMount() {
|
componentDidMount() {
|
||||||
if (!this.cache.length) {
|
if (!this.cache.length) {
|
||||||
localForage.iterate((value, key) => {
|
localForage.iterate((value, key) => {
|
||||||
@@ -59,61 +47,22 @@ class App extends React.Component {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
goFullScreen() {
|
|
||||||
if ('wakeLock' in navigator) {
|
|
||||||
navigator.wakeLock.request('screen');
|
|
||||||
}
|
|
||||||
|
|
||||||
document.body.requestFullscreen({ navigationUI: 'hide' }).then(() => {
|
|
||||||
window.addEventListener('resize', () => this.forceUpdate());
|
|
||||||
this.forceUpdate();
|
|
||||||
});
|
|
||||||
};
|
|
||||||
|
|
||||||
exitFullScreen() {
|
|
||||||
document.exitFullscreen().then(() => {
|
|
||||||
this.forceUpdate();
|
|
||||||
});
|
|
||||||
};
|
|
||||||
|
|
||||||
render() {
|
render() {
|
||||||
const theme = this.state.theme;
|
const theme = this.state.theme;
|
||||||
|
document.body.style.backgroundColor = theme === 'dark' ? '#000' : '#eeeeee';
|
||||||
if (theme === 'dark') {
|
|
||||||
document.body.style.backgroundColor = '#1a1a1a';
|
|
||||||
} else if (theme === 'black') {
|
|
||||||
document.body.style.backgroundColor = '#000';
|
|
||||||
} else if (theme === 'red') {
|
|
||||||
document.body.style.backgroundColor = '#000';
|
|
||||||
} else {
|
|
||||||
document.body.style.backgroundColor = '#eeeeee';
|
|
||||||
}
|
|
||||||
|
|
||||||
const fullScreenAvailable = document.fullscreenEnabled ||
|
|
||||||
document.mozFullscreenEnabled ||
|
|
||||||
document.webkitFullscreenEnabled ||
|
|
||||||
document.msFullscreenEnabled;
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className={theme}>
|
<div className={theme}>
|
||||||
<Router>
|
<Router>
|
||||||
<div className='container menu'>
|
<div className='container menu'>
|
||||||
<p>
|
<p>
|
||||||
<Link to='/'>QotNews</Link>
|
<Link to='/'>QotNews - Feed</Link>
|
||||||
|
<span className='theme'>Theme: <a href='#' onClick={() => this.light()}>Light</a> - <a href='#' onClick={() => this.dark()}>Dark</a></span>
|
||||||
<span className='theme'><a href='#' onClick={() => this.light()}>Light</a> - <a href='#' onClick={() => this.dark()}>Dark</a> - <a href='#' onClick={() => this.black()}>Black</a> - <a href='#' onClick={() => this.red()}>Red</a></span>
|
|
||||||
<br />
|
<br />
|
||||||
<span className='slogan'>Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode.</span>
|
<span className='slogan'>Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode.</span>
|
||||||
</p>
|
</p>
|
||||||
<Route path='/(|search)' component={Search} />
|
<Route path='/(|search)' component={Search} />
|
||||||
<Route path='/(|search)' component={Submit} />
|
<Route path='/(|search)' component={Submit} />
|
||||||
{fullScreenAvailable &&
|
|
||||||
<Route path='/(|search)' render={() => !document.fullscreenElement ?
|
|
||||||
<button className='fullscreen' onClick={() => this.goFullScreen()}>Enter Fullscreen</button>
|
|
||||||
:
|
|
||||||
<button className='fullscreen' onClick={() => this.exitFullScreen()}>Exit Fullscreen</button>
|
|
||||||
} />
|
|
||||||
}
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<Route path='/' exact render={(props) => <Feed {...props} updateCache={this.updateCache} />} />
|
<Route path='/' exact render={(props) => <Feed {...props} updateCache={this.updateCache} />} />
|
||||||
@@ -123,7 +72,6 @@ class App extends React.Component {
|
|||||||
</Switch>
|
</Switch>
|
||||||
<Route path='/:id/c' exact render={(props) => <Comments {...props} cache={this.cache} />} />
|
<Route path='/:id/c' exact render={(props) => <Comments {...props} cache={this.cache} />} />
|
||||||
|
|
||||||
<BackwardDot />
|
|
||||||
<ForwardDot />
|
<ForwardDot />
|
||||||
|
|
||||||
<ScrollToTop />
|
<ScrollToTop />
|
||||||
|
@@ -67,8 +67,7 @@ class Article extends React.Component {
|
|||||||
{story ?
|
{story ?
|
||||||
<div className='article'>
|
<div className='article'>
|
||||||
<Helmet>
|
<Helmet>
|
||||||
<title>{story.title} | QotNews</title>
|
<title>{story.title} - QotNews</title>
|
||||||
<meta name="robots" content="noindex" />
|
|
||||||
</Helmet>
|
</Helmet>
|
||||||
|
|
||||||
<h1>{story.title}</h1>
|
<h1>{story.title}</h1>
|
||||||
|
@@ -72,7 +72,7 @@ class Article extends React.Component {
|
|||||||
}
|
}
|
||||||
|
|
||||||
displayComment(story, c, level) {
|
displayComment(story, c, level) {
|
||||||
const cid = c.author+c.date;
|
const cid = c.author + c.date;
|
||||||
|
|
||||||
const collapsed = this.state.collapsed.includes(cid);
|
const collapsed = this.state.collapsed.includes(cid);
|
||||||
const expanded = this.state.expanded.includes(cid);
|
const expanded = this.state.expanded.includes(cid);
|
||||||
@@ -85,19 +85,22 @@ class Article extends React.Component {
|
|||||||
<div className='info'>
|
<div className='info'>
|
||||||
<p>
|
<p>
|
||||||
{c.author === story.author ? '[OP]' : ''} {c.author || '[Deleted]'}
|
{c.author === story.author ? '[OP]' : ''} {c.author || '[Deleted]'}
|
||||||
{' '} | <HashLink to={'#'+cid} id={cid}>{moment.unix(c.date).fromNow()}</HashLink>
|
{' '} | <HashLink to={'#' + cid} id={cid}>{moment.unix(c.date).fromNow()}</HashLink>
|
||||||
|
|
||||||
{hidden || hasChildren &&
|
{hasChildren && (
|
||||||
<span className='collapser pointer' onClick={() => this.collapseComment(cid)}>–</span>
|
hidden ?
|
||||||
}
|
<span className='collapser expander pointer' onClick={() => this.expandComment(cid)}>+</span>
|
||||||
|
:
|
||||||
|
<span className='collapser pointer' onClick={() => this.collapseComment(cid)}>–</span>
|
||||||
|
)}
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div className={collapsed ? 'text hidden' : 'text'} dangerouslySetInnerHTML={{ __html: c.text }} />
|
<div className={collapsed ? 'text hidden' : 'text'} dangerouslySetInnerHTML={{ __html: c.text }} />
|
||||||
|
|
||||||
{hidden && hasChildren ?
|
{hidden && hasChildren ?
|
||||||
<div className='comment lined info pointer' onClick={() => this.expandComment(cid)}>[show {this.countComments(c)-1} more]</div>
|
<div className='comment lined info pointer' onClick={() => this.expandComment(cid)}>[show {this.countComments(c) - 1} more]</div>
|
||||||
:
|
:
|
||||||
c.comments.map(i => this.displayComment(story, i, level + 1))
|
c.comments.map(i => this.displayComment(story, i, level + 1))
|
||||||
}
|
}
|
||||||
</div>
|
</div>
|
||||||
@@ -115,8 +118,7 @@ class Article extends React.Component {
|
|||||||
{story ?
|
{story ?
|
||||||
<div className='article'>
|
<div className='article'>
|
||||||
<Helmet>
|
<Helmet>
|
||||||
<title>{story.title} | QotNews</title>
|
<title>{story.title} - QotNews Comments</title>
|
||||||
<meta name="robots" content="noindex" />
|
|
||||||
</Helmet>
|
</Helmet>
|
||||||
|
|
||||||
<h1>{story.title}</h1>
|
<h1>{story.title}</h1>
|
||||||
@@ -131,7 +133,7 @@ class Article extends React.Component {
|
|||||||
{story.comments.map(c => this.displayComment(story, c, 0))}
|
{story.comments.map(c => this.displayComment(story, c, 0))}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
:
|
:
|
||||||
<p>loading...</p>
|
<p>loading...</p>
|
||||||
}
|
}
|
||||||
<ToggleDot id={id} article={true} />
|
<ToggleDot id={id} article={true} />
|
||||||
|
@@ -22,20 +22,21 @@ class Feed extends React.Component {
|
|||||||
const updated = !this.state.stories || this.state.stories[0].id !== result.stories[0].id;
|
const updated = !this.state.stories || this.state.stories[0].id !== result.stories[0].id;
|
||||||
console.log('updated:', updated);
|
console.log('updated:', updated);
|
||||||
|
|
||||||
this.setState({ stories: result.stories });
|
const { stories } = result;
|
||||||
localStorage.setItem('stories', JSON.stringify(result.stories));
|
this.setState({ stories });
|
||||||
|
localStorage.setItem('stories', JSON.stringify(stories));
|
||||||
|
|
||||||
if (updated) {
|
if (updated) {
|
||||||
localForage.clear();
|
localForage.clear();
|
||||||
result.stories.forEach((x, i) => {
|
stories.forEach((x, i) => {
|
||||||
fetch('/api/' + x.id)
|
fetch('/api/' + x.id)
|
||||||
.then(res => res.json())
|
.then(res => res.json())
|
||||||
.then(result => {
|
.then(({ story }) => {
|
||||||
localForage.setItem(x.id, result.story)
|
localForage.setItem(x.id, story)
|
||||||
.then(console.log('preloaded', x.id, x.title));
|
.then(console.log('preloaded', x.id, x.title));
|
||||||
this.props.updateCache(x.id, result.story);
|
this.props.updateCache(x.id, story);
|
||||||
}, error => {}
|
}, error => { }
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -52,8 +53,7 @@ class Feed extends React.Component {
|
|||||||
return (
|
return (
|
||||||
<div className='container'>
|
<div className='container'>
|
||||||
<Helmet>
|
<Helmet>
|
||||||
<title>QotNews</title>
|
<title>Feed - QotNews</title>
|
||||||
<meta name="robots" content="index" />
|
|
||||||
</Helmet>
|
</Helmet>
|
||||||
{error && <p>Connection error?</p>}
|
{error && <p>Connection error?</p>}
|
||||||
{stories ?
|
{stories ?
|
||||||
@@ -62,7 +62,7 @@ class Feed extends React.Component {
|
|||||||
<div className='item' key={x.id}>
|
<div className='item' key={x.id}>
|
||||||
<div className='title'>
|
<div className='title'>
|
||||||
<Link className='link' to={'/' + x.id}>
|
<Link className='link' to={'/' + x.id}>
|
||||||
<img className='source-logo' src={logos[x.source]} alt='source logo' /> {x.title}
|
<img className='source-logo' src={logos[x.source] || logos[x.source.split(' ')[0]]} alt='source logo' /> {x.title}
|
||||||
</Link>
|
</Link>
|
||||||
|
|
||||||
<span className='source'>
|
<span className='source'>
|
||||||
@@ -74,7 +74,7 @@ class Feed extends React.Component {
|
|||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
:
|
:
|
||||||
<p>loading...</p>
|
<p>loading...</p>
|
||||||
}
|
}
|
||||||
</div>
|
</div>
|
||||||
|
@@ -29,7 +29,7 @@ class Results extends React.Component {
|
|||||||
.then(res => res.json())
|
.then(res => res.json())
|
||||||
.then(
|
.then(
|
||||||
(result) => {
|
(result) => {
|
||||||
this.setState({ stories: result.hits });
|
this.setState({ stories: result.results });
|
||||||
},
|
},
|
||||||
(error) => {
|
(error) => {
|
||||||
if (error.message !== 'The operation was aborted. ') {
|
if (error.message !== 'The operation was aborted. ') {
|
||||||
@@ -56,7 +56,7 @@ class Results extends React.Component {
|
|||||||
return (
|
return (
|
||||||
<div className='container'>
|
<div className='container'>
|
||||||
<Helmet>
|
<Helmet>
|
||||||
<title>Search Results | QotNews</title>
|
<title>Feed - QotNews</title>
|
||||||
</Helmet>
|
</Helmet>
|
||||||
{error && <p>Connection error?</p>}
|
{error && <p>Connection error?</p>}
|
||||||
{stories ?
|
{stories ?
|
||||||
|
@@ -15,7 +15,6 @@ class ScrollToTop extends React.Component {
|
|||||||
}
|
}
|
||||||
|
|
||||||
window.scrollTo(0, 0);
|
window.scrollTo(0, 0);
|
||||||
document.body.scrollTop = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
render() {
|
render() {
|
||||||
|
@@ -37,7 +37,7 @@ class Search extends Component {
|
|||||||
<span className='search'>
|
<span className='search'>
|
||||||
<form onSubmit={this.searchAgain}>
|
<form onSubmit={this.searchAgain}>
|
||||||
<input
|
<input
|
||||||
placeholder='Search...'
|
placeholder='Search... (fixed)'
|
||||||
value={search}
|
value={search}
|
||||||
onChange={this.searchArticles}
|
onChange={this.searchArticles}
|
||||||
ref={this.inputRef}
|
ref={this.inputRef}
|
||||||
|
@@ -1,68 +0,0 @@
|
|||||||
.black {
|
|
||||||
color: #ddd;
|
|
||||||
}
|
|
||||||
|
|
||||||
.black a {
|
|
||||||
color: #ddd;
|
|
||||||
}
|
|
||||||
|
|
||||||
.black input {
|
|
||||||
color: #ddd;
|
|
||||||
border: 1px solid #828282;
|
|
||||||
}
|
|
||||||
|
|
||||||
.black button {
|
|
||||||
background-color: #444444;
|
|
||||||
border-color: #bbb;
|
|
||||||
color: #ddd;
|
|
||||||
}
|
|
||||||
|
|
||||||
.black .item {
|
|
||||||
color: #828282;
|
|
||||||
}
|
|
||||||
|
|
||||||
.black .item .source-logo {
|
|
||||||
filter: grayscale(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
.black .item a {
|
|
||||||
color: #828282;
|
|
||||||
}
|
|
||||||
|
|
||||||
.black .item a.link {
|
|
||||||
color: #ddd;
|
|
||||||
}
|
|
||||||
.black .item a.link:visited {
|
|
||||||
color: #828282;
|
|
||||||
}
|
|
||||||
|
|
||||||
.black .item .info a.hot {
|
|
||||||
color: #cccccc;
|
|
||||||
}
|
|
||||||
|
|
||||||
.black .article a {
|
|
||||||
border-bottom: 1px solid #aaaaaa;
|
|
||||||
}
|
|
||||||
|
|
||||||
.black .article u {
|
|
||||||
border-bottom: 1px solid #aaaaaa;
|
|
||||||
text-decoration: none;
|
|
||||||
}
|
|
||||||
|
|
||||||
.black .story-text video,
|
|
||||||
.black .story-text img {
|
|
||||||
filter: brightness(50%);
|
|
||||||
}
|
|
||||||
|
|
||||||
.black .article .info {
|
|
||||||
color: #828282;
|
|
||||||
}
|
|
||||||
|
|
||||||
.black .article .info a {
|
|
||||||
border-bottom: none;
|
|
||||||
color: #828282;
|
|
||||||
}
|
|
||||||
|
|
||||||
.black .comment.lined {
|
|
||||||
border-left: 1px solid #444444;
|
|
||||||
}
|
|
@@ -11,16 +11,14 @@
|
|||||||
border: 1px solid #828282;
|
border: 1px solid #828282;
|
||||||
}
|
}
|
||||||
|
|
||||||
.dark button {
|
|
||||||
background-color: #444444;
|
|
||||||
border-color: #bbb;
|
|
||||||
color: #ddd;
|
|
||||||
}
|
|
||||||
|
|
||||||
.dark .item {
|
.dark .item {
|
||||||
color: #828282;
|
color: #828282;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.dark .item .source-logo {
|
||||||
|
filter: grayscale(1);
|
||||||
|
}
|
||||||
|
|
||||||
.dark .item a {
|
.dark .item a {
|
||||||
color: #828282;
|
color: #828282;
|
||||||
}
|
}
|
||||||
@@ -45,7 +43,6 @@
|
|||||||
text-decoration: none;
|
text-decoration: none;
|
||||||
}
|
}
|
||||||
|
|
||||||
.dark .story-text video,
|
|
||||||
.dark .story-text img {
|
.dark .story-text img {
|
||||||
filter: brightness(50%);
|
filter: brightness(50%);
|
||||||
}
|
}
|
||||||
|
@@ -2,30 +2,9 @@ body {
|
|||||||
text-rendering: optimizeLegibility;
|
text-rendering: optimizeLegibility;
|
||||||
font: 1rem/1.3 sans-serif;
|
font: 1rem/1.3 sans-serif;
|
||||||
color: #000000;
|
color: #000000;
|
||||||
|
margin-bottom: 100vh;
|
||||||
word-break: break-word;
|
word-break: break-word;
|
||||||
font-kerning: normal;
|
font-kerning: normal;
|
||||||
margin: 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
::backdrop {
|
|
||||||
background-color: rgba(0,0,0,0);
|
|
||||||
}
|
|
||||||
|
|
||||||
body:fullscreen {
|
|
||||||
overflow-y: scroll !important;
|
|
||||||
}
|
|
||||||
body:-ms-fullscreen {
|
|
||||||
overflow-y: scroll !important;
|
|
||||||
}
|
|
||||||
body:-webkit-full-screen {
|
|
||||||
overflow-y: scroll !important;
|
|
||||||
}
|
|
||||||
body:-moz-full-screen {
|
|
||||||
overflow-y: scroll !important;
|
|
||||||
}
|
|
||||||
|
|
||||||
#root {
|
|
||||||
margin: 8px 8px 100vh 8px !important;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
a {
|
a {
|
||||||
@@ -43,12 +22,6 @@ input {
|
|||||||
border-radius: 4px;
|
border-radius: 4px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.fullscreen {
|
|
||||||
margin: 0.25rem;
|
|
||||||
padding: 0.25rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
pre {
|
pre {
|
||||||
overflow: auto;
|
overflow: auto;
|
||||||
}
|
}
|
||||||
@@ -212,20 +185,16 @@ span.source {
|
|||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
}
|
}
|
||||||
|
|
||||||
.dot {
|
.toggleDot {
|
||||||
cursor: pointer;
|
|
||||||
position: fixed;
|
position: fixed;
|
||||||
|
bottom: 1rem;
|
||||||
|
left: 1rem;
|
||||||
height: 3rem;
|
height: 3rem;
|
||||||
width: 3rem;
|
width: 3rem;
|
||||||
background-color: #828282;
|
background-color: #828282;
|
||||||
border-radius: 50%;
|
border-radius: 50%;
|
||||||
}
|
}
|
||||||
|
|
||||||
.toggleDot {
|
|
||||||
bottom: 1rem;
|
|
||||||
left: 1rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
.toggleDot .button {
|
.toggleDot .button {
|
||||||
font: 2rem/1 'icomoon';
|
font: 2rem/1 'icomoon';
|
||||||
position: relative;
|
position: relative;
|
||||||
@@ -234,27 +203,21 @@ span.source {
|
|||||||
}
|
}
|
||||||
|
|
||||||
.forwardDot {
|
.forwardDot {
|
||||||
|
cursor: pointer;
|
||||||
|
position: fixed;
|
||||||
bottom: 1rem;
|
bottom: 1rem;
|
||||||
right: 1rem;
|
right: 1rem;
|
||||||
|
height: 3rem;
|
||||||
|
width: 3rem;
|
||||||
|
background-color: #828282;
|
||||||
|
border-radius: 50%;
|
||||||
}
|
}
|
||||||
|
|
||||||
.forwardDot .button {
|
.forwardDot .button {
|
||||||
font: 2rem/1 'icomoon';
|
font: 2.5rem/1 'icomoon';
|
||||||
position: relative;
|
position: relative;
|
||||||
top: 0.5rem;
|
top: 0.25rem;
|
||||||
left: 0.5rem;
|
left: 0.3rem;
|
||||||
}
|
|
||||||
|
|
||||||
.backwardDot {
|
|
||||||
bottom: 1rem;
|
|
||||||
right: 5rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
.backwardDot .button {
|
|
||||||
font: 2rem/1 'icomoon';
|
|
||||||
position: relative;
|
|
||||||
top: 0.5rem;
|
|
||||||
left: 0.5rem;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
.search form {
|
.search form {
|
||||||
|
@@ -1,82 +0,0 @@
|
|||||||
.red {
|
|
||||||
color: #b00;
|
|
||||||
scrollbar-color: #b00 #440000;
|
|
||||||
}
|
|
||||||
|
|
||||||
.red a {
|
|
||||||
color: #b00;
|
|
||||||
}
|
|
||||||
|
|
||||||
.red input {
|
|
||||||
color: #b00;
|
|
||||||
border: 1px solid #690000;
|
|
||||||
}
|
|
||||||
|
|
||||||
.red input::placeholder {
|
|
||||||
color: #690000;
|
|
||||||
}
|
|
||||||
|
|
||||||
.red hr {
|
|
||||||
background-color: #690000;
|
|
||||||
}
|
|
||||||
|
|
||||||
.red button {
|
|
||||||
background-color: #440000;
|
|
||||||
border-color: #b00;
|
|
||||||
color: #b00;
|
|
||||||
}
|
|
||||||
|
|
||||||
.red .item,
|
|
||||||
.red .slogan {
|
|
||||||
color: #690000;
|
|
||||||
}
|
|
||||||
|
|
||||||
.red .item .source-logo {
|
|
||||||
display: none;
|
|
||||||
}
|
|
||||||
|
|
||||||
.red .item a {
|
|
||||||
color: #690000;
|
|
||||||
}
|
|
||||||
|
|
||||||
.red .item a.link {
|
|
||||||
color: #b00;
|
|
||||||
}
|
|
||||||
.red .item a.link:visited {
|
|
||||||
color: #690000;
|
|
||||||
}
|
|
||||||
|
|
||||||
.red .item .info a.hot {
|
|
||||||
color: #cc0000;
|
|
||||||
}
|
|
||||||
|
|
||||||
.red .article a {
|
|
||||||
border-bottom: 1px solid #aa0000;
|
|
||||||
}
|
|
||||||
|
|
||||||
.red .article u {
|
|
||||||
border-bottom: 1px solid #aa0000;
|
|
||||||
text-decoration: none;
|
|
||||||
}
|
|
||||||
|
|
||||||
.red .story-text video,
|
|
||||||
.red .story-text img {
|
|
||||||
filter: grayscale(100%) brightness(20%) sepia(100%) hue-rotate(-50deg) saturate(600%) contrast(0.8);
|
|
||||||
}
|
|
||||||
|
|
||||||
.red .article .info {
|
|
||||||
color: #690000;
|
|
||||||
}
|
|
||||||
|
|
||||||
.red .article .info a {
|
|
||||||
border-bottom: none;
|
|
||||||
color: #690000;
|
|
||||||
}
|
|
||||||
|
|
||||||
.red .comment.lined {
|
|
||||||
border-left: 1px solid #440000;
|
|
||||||
}
|
|
||||||
|
|
||||||
.red .dot {
|
|
||||||
background-color: #440000;
|
|
||||||
}
|
|
@@ -41,7 +41,7 @@ class Submit extends Component {
|
|||||||
<span className='search'>
|
<span className='search'>
|
||||||
<form onSubmit={this.submitArticle}>
|
<form onSubmit={this.submitArticle}>
|
||||||
<input
|
<input
|
||||||
placeholder='Submit URL'
|
placeholder='Submit Article'
|
||||||
ref={this.inputRef}
|
ref={this.inputRef}
|
||||||
/>
|
/>
|
||||||
</form>
|
</form>
|
||||||
|
Binary file not shown.
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user