Compare commits

..

54 Commits

Author SHA1 Message Date
Jason Schwarzenberger
9cee370a25 tvnz icon 2020-11-10 14:10:02 +13:00
Jason Schwarzenberger
5efc6ef2d3 add related stories (in api only) 2020-11-10 14:09:56 +13:00
Jason Schwarzenberger
4ec50e20cb feed thread loop. 2020-11-10 10:10:38 +13:00
Jason Schwarzenberger
c1b7877f4b remove limit. 2020-11-09 17:54:50 +13:00
Jason Schwarzenberger
7b8cbfc9b9 try to make feed only determined by the max age. 2020-11-09 17:50:58 +13:00
Jason Schwarzenberger
bfa4108a8e Merge remote-tracking branch 'tanner/master' 2020-11-09 16:08:28 +13:00
Jason Schwarzenberger
0bd0d40a31 use json type in sqlite. 2020-11-09 15:45:10 +13:00
Jason Schwarzenberger
4e04595415 fix search. 2020-11-09 15:44:44 +13:00
Jason
006db2960c change to 3 days 2020-11-09 01:36:51 +00:00
Jason Schwarzenberger
1f063f0dac undo log level change 2020-11-06 11:20:34 +13:00
Jason Schwarzenberger
1658346aa9 fix news.py feed. 2020-11-06 10:37:43 +13:00
Jason Schwarzenberger
2dbc702b40 switch to python-dateutil for parser, reverse sort xml feeds. 2020-11-06 10:02:39 +13:00
Jason Schwarzenberger
1c4764e67d sort sitemap feed by lastmod time. 2020-11-06 09:30:15 +13:00
Jason
ee49d2021e newsroom 2020-11-05 20:28:55 +00:00
Jason
c391c50ab1 use localize 2020-11-05 04:15:31 +00:00
Jason Schwarzenberger
095f0d549a use replace. 2020-11-05 16:57:08 +13:00
Jason Schwarzenberger
c21c71667e fix date issue. 2020-11-05 16:41:15 +13:00
Jason Schwarzenberger
c3a2c91a11 update requirements.txt 2020-11-05 16:33:50 +13:00
Jason Schwarzenberger
0f39446a61 tz aware for use in settings. 2020-11-05 16:30:55 +13:00
Jason Schwarzenberger
351059aab1 fix excludes. 2020-11-05 15:59:13 +13:00
Jason Schwarzenberger
4488e2c292 add an excludes list of substrings for urls in the settings for sitemap/category. 2020-11-05 15:51:59 +13:00
Jason Schwarzenberger
afda5b635c disqus test. 2020-11-05 14:23:51 +13:00
Jason Schwarzenberger
0fc1a44d2b fix issue in substack. 2020-11-04 17:40:29 +13:00
Jason Schwarzenberger
9fff1b9e46 avoid duplicate articles listed on the category page 2020-11-04 17:14:42 +13:00
Jason Schwarzenberger
16b59f6c67 try stop bad pages. 2020-11-04 16:34:31 +13:00
Jason Schwarzenberger
939f4775a7 better settings example. 2020-11-04 15:52:34 +13:00
Jason Schwarzenberger
9bfc6fc6fa scraper settings, ordering and loop. 2020-11-04 15:47:12 +13:00
Jason Schwarzenberger
6ea9844d00 remove useless try blocks. 2020-11-04 15:37:19 +13:00
Jason Schwarzenberger
1318259d3d imply referrer is substack. 2020-11-04 15:21:07 +13:00
Jason Schwarzenberger
98a0c2257c increase declutter timeout. 2020-11-04 15:15:00 +13:00
Jason Schwarzenberger
e6976db25d fix tabs 2020-11-04 15:04:20 +13:00
Jason Schwarzenberger
9edc8b7cca move scraping for article content to files. 2020-11-04 15:00:58 +13:00
Jason Schwarzenberger
33e21e7f30 fix mistake. 2020-11-04 12:45:01 +13:00
Jason Schwarzenberger
892a99eca6 add + expander in place of collapser. 2020-11-04 12:43:15 +13:00
Jason Schwarzenberger
d718d05a04 fix dates for newsroom. 2020-11-04 11:53:16 +13:00
Jason Schwarzenberger
d1795eb1b8 add radionz and newsroom logos. 2020-11-04 11:30:56 +13:00
Jason Schwarzenberger
9f4ff4acf0 remove unnecessary sitemap.xml request. 2020-11-04 11:22:15 +13:00
Jason Schwarzenberger
db6aad84ec fix mistake. 2020-11-04 11:12:01 +13:00
Jason Schwarzenberger
29f8a8b8cc add news site categories feed. 2020-11-04 11:08:50 +13:00
Jason
abf8589e02 fix sitemap 2020-11-03 10:53:40 +00:00
Jason
b759f46582 use extruct for opengraph/json-ld/microdata of articles 2020-11-03 10:31:36 +00:00
Jason Schwarzenberger
736cdc8576 fix mistake. 2020-11-03 17:04:46 +13:00
Jason Schwarzenberger
244d416f6e settings config of sitemap/substack publications. 2020-11-03 17:01:29 +13:00
Jason Schwarzenberger
5f98a2e76a Merge remote-tracking branch 'tanner/master' into master
And adding relevant setings.py.example/etc.
2020-11-03 16:44:02 +13:00
Jason Schwarzenberger
0567cdfd9b move sort to render. 2020-11-03 16:30:22 +13:00
Jason Schwarzenberger
4f90671cec order feed by reverse chronological 2020-11-03 16:21:23 +13:00
Jason Schwarzenberger
e63a1456a5 add logos. 2020-11-03 16:07:07 +13:00
Jason Schwarzenberger
76f1d57702 sitemap based feed. 2020-11-03 16:00:03 +13:00
Jason Schwarzenberger
de80389ed0 add logos. 2020-11-03 12:48:19 +13:00
Jason Schwarzenberger
4e64cf682a add the bulletin. 2020-11-03 12:41:16 +13:00
Jason Schwarzenberger
c5fe5d25a0 add substack.py top sites, replacing webworm.py 2020-11-03 12:28:39 +13:00
Jason
283a2b1545 fix webworm comments 2020-11-02 22:06:43 +00:00
Jason Schwarzenberger
0d6a86ace2 fix webworm dates. 2020-11-03 10:31:14 +13:00
Jason Schwarzenberger
f23bf628e0 add webworm/substack as a feed. 2020-11-02 17:09:59 +13:00
41 changed files with 1229 additions and 1305 deletions

View File

@@ -109,5 +109,4 @@ settings.py
data.db
data.db.bak
data/archive/*
data/backup/*
qotnews.sqlite

View File

@@ -1,11 +1,11 @@
import json
from datetime import datetime, timedelta
from sqlalchemy import create_engine, Column, String, ForeignKey, Integer
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy.exc import IntegrityError
from sqlalchemy.types import JSON
engine = create_engine('sqlite:///data/qotnews.sqlite', connect_args={'timeout': 360})
engine = create_engine('sqlite:///data/qotnews.sqlite')
Session = sessionmaker(bind=engine)
Base = declarative_base()
@@ -15,8 +15,8 @@ class Story(Base):
sid = Column(String(16), primary_key=True)
ref = Column(String(16), unique=True)
meta_json = Column(String)
full_json = Column(String)
meta = Column(JSON)
data = Column(JSON)
title = Column(String)
class Reflist(Base):
@@ -36,19 +36,21 @@ def get_story(sid):
def put_story(story):
story = story.copy()
full_json = json.dumps(story)
data = {}
data.update(story)
story.pop('text', None)
story.pop('comments', None)
meta_json = json.dumps(story)
meta = {}
meta.update(story)
meta.pop('text', None)
meta.pop('comments', None)
try:
session = Session()
s = Story(
sid=story['id'],
ref=story['ref'],
full_json=full_json,
meta_json=meta_json,
data=data,
meta=meta,
title=story.get('title', None),
)
session.merge(s)
@@ -63,19 +65,26 @@ def get_story_by_ref(ref):
session = Session()
return session.query(Story).filter(Story.ref==ref).first()
def get_reflist(amount):
def get_stories_by_url(url):
session = Session()
q = session.query(Reflist).order_by(Reflist.rid.desc()).limit(amount)
return session.query(Story).\
filter(Story.title != None).\
filter(Story.meta['url'].as_string() == url).\
order_by(Story.meta['date'].desc())
def get_reflist():
session = Session()
q = session.query(Reflist).order_by(Reflist.rid.desc())
return [dict(ref=x.ref, sid=x.sid, source=x.source) for x in q.all()]
def get_stories(amount, skip=0):
def get_stories(maxage=60*60*24*2):
time = datetime.now().timestamp() - maxage
session = Session()
q = session.query(Reflist, Story.meta_json).\
order_by(Reflist.rid.desc()).\
q = session.query(Reflist, Story.meta).\
join(Story).\
filter(Story.title != None).\
offset(skip).\
limit(amount)
filter(Story.meta['date'] > time).\
order_by(Story.meta['date'].desc())
return [x[1] for x in q]
def put_ref(ref, sid, source):
@@ -101,22 +110,7 @@ def del_ref(ref):
finally:
session.close()
def count_stories():
try:
session = Session()
return session.query(Story).count()
finally:
session.close()
def get_story_list():
try:
session = Session()
return session.query(Story.sid).all()
finally:
session.close()
if __name__ == '__main__':
init()
#print(get_story_by_ref('hgi3sy'))
print(len(get_reflist(99999)))
print(get_story_by_ref('hgi3sy'))

View File

@@ -1,8 +1,6 @@
import database
import search
import sys
import settings
import logging
import json
import requests
@@ -23,7 +21,7 @@ def database_del_story(sid):
def search_del_story(sid):
try:
r = requests.delete(settings.MEILI_URL + 'indexes/qotnews/documents/'+sid, timeout=2)
r = requests.delete(search.MEILI_URL + 'indexes/qotnews/documents/'+sid, timeout=2)
if r.status_code != 202:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()

View File

@@ -6,84 +6,117 @@ logging.basicConfig(
import requests
import time
from bs4 import BeautifulSoup
import itertools
import settings
from feeds import hackernews, reddit, tildes, manual, lobsters
import utils
from feeds import hackernews, reddit, tildes, substack, manual, news
from scrapers import outline, declutter, local
INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com', 'sec.gov']
TWO_DAYS = 60*60*24*2
INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com']
def list():
feed = []
substacks = {}
for key, value in settings.SUBSTACK.items():
substacks[key] = substack.Publication(value['url'])
categories = {}
for key, value in settings.CATEGORY.items():
categories[key] = news.Category(value['url'], value.get('tz'))
sitemaps = {}
for key, value in settings.SITEMAP.items():
sitemaps[key] = news.Sitemap(value['url'], value.get('tz'))
def get_list():
feeds = {}
if settings.NUM_HACKERNEWS:
feed += [(x, 'hackernews') for x in hackernews.feed()[:settings.NUM_HACKERNEWS]]
if settings.NUM_LOBSTERS:
feed += [(x, 'lobsters') for x in lobsters.feed()[:settings.NUM_LOBSTERS]]
feeds['hackernews'] = [(x, 'hackernews') for x in hackernews.feed()[:settings.NUM_HACKERNEWS]]
if settings.NUM_REDDIT:
feed += [(x, 'reddit') for x in reddit.feed()[:settings.NUM_REDDIT]]
feeds['reddit'] = [(x, 'reddit') for x in reddit.feed()[:settings.NUM_REDDIT]]
if settings.NUM_TILDES:
feed += [(x, 'tildes') for x in tildes.feed()[:settings.NUM_TILDES]]
feeds['tildes'] = [(x, 'tildes') for x in tildes.feed()[:settings.NUM_TILDES]]
if settings.NUM_SUBSTACK:
feeds['substack'] = [(x, 'substack') for x in substack.top.feed()[:settings.NUM_SUBSTACK]]
for key, publication in substacks.items():
count = settings.SUBSTACK[key]['count']
feeds[key] = [(x, key) for x in publication.feed()[:count]]
for key, sites in categories.items():
count = settings.CATEGORY[key].get('count') or 0
excludes = settings.CATEGORY[key].get('excludes')
tz = settings.CATEGORY[key].get('tz')
feeds[key] = [(x, key) for x in sites.feed(excludes)[:count]]
for key, sites in sitemaps.items():
count = settings.SITEMAP[key].get('count') or 0
excludes = settings.SITEMAP[key].get('excludes')
feeds[key] = [(x, key) for x in sites.feed(excludes)[:count]]
values = feeds.values()
feed = itertools.chain.from_iterable(itertools.zip_longest(*values, fillvalue=None))
feed = list(filter(None, feed))
return feed
def get_article(url):
if not settings.READER_URL:
logging.info('Readerserver not configured, aborting.')
return ''
scrapers = {
'declutter': declutter,
'outline': outline,
'local': local,
}
available = settings.SCRAPERS or ['local']
if 'local' not in available:
available += ['local']
if url.startswith('https://twitter.com'):
logging.info('Replacing twitter.com url with nitter.net')
url = url.replace('twitter.com', 'nitter.net')
try:
r = requests.post(settings.READER_URL, data=dict(url=url), timeout=20)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.text
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem getting article: {}'.format(str(e)))
return ''
for scraper in available:
if scraper not in scrapers.keys():
continue
try:
html = scrapers[scraper].get_html(url)
if html:
return html
except KeyboardInterrupt:
raise
except:
pass
return ''
def get_content_type(url):
try:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'}
return requests.get(url, headers=headers, timeout=5).headers['content-type']
except:
return ''
try:
headers = {
'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
'X-Forwarded-For': '66.249.66.1',
}
return requests.get(url, headers=headers, timeout=10).headers['content-type']
return requests.get(url, headers=headers, timeout=5).headers['content-type']
except:
pass
try:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'}
return requests.get(url, headers=headers, timeout=10).headers['content-type']
except:
return ''
def update_story(story, is_manual=False):
res = {}
try:
if story['source'] == 'hackernews':
res = hackernews.story(story['ref'])
elif story['source'] == 'lobsters':
res = lobsters.story(story['ref'])
elif story['source'] == 'reddit':
res = reddit.story(story['ref'])
elif story['source'] == 'tildes':
res = tildes.story(story['ref'])
elif story['source'] == 'manual':
res = manual.story(story['ref'])
except BaseException as e:
utils.alert_tanner('Problem updating {} story, ref {}: {}'.format(story['source'], story['ref'], str(e)))
logging.exception(e)
return False
if story['source'] == 'hackernews':
res = hackernews.story(story['ref'])
elif story['source'] == 'reddit':
res = reddit.story(story['ref'])
elif story['source'] == 'tildes':
res = tildes.story(story['ref'])
elif story['source'] == 'substack':
res = substack.top.story(story['ref'])
elif story['source'] in categories.keys():
res = categories[story['source']].story(story['ref'])
elif story['source'] in sitemaps.keys():
res = sitemaps[story['source']].story(story['ref'])
elif story['source'] in substacks.keys():
res = substacks[story['source']].story(story['ref'])
elif story['source'] == 'manual':
res = manual.story(story['ref'])
if res:
story.update(res) # join dicts
@@ -91,8 +124,8 @@ def update_story(story, is_manual=False):
logging.info('Story not ready yet')
return False
if story['date'] and not is_manual and story['date'] + TWO_DAYS < time.time():
logging.info('Story too old, removing. Date: {}'.format(story['date']))
if story['date'] and not is_manual and story['date'] + settings.MAX_STORY_AGE < time.time():
logging.info('Story too old, removing')
return False
if story.get('url', '') and not story.get('text', ''):
@@ -106,12 +139,6 @@ def update_story(story, is_manual=False):
logging.info(story['url'])
return False
if 'trump' in story['title'].lower() or 'musk' in story['title'].lower():
logging.info('Trump / Musk story, skipping')
logging.info(story['url'])
return False
logging.info('Getting article ' + story['url'])
story['text'] = get_article(story['url'])
if not story['text']: return False
@@ -129,7 +156,7 @@ if __name__ == '__main__':
#print(get_article('https://www.bloomberg.com/news/articles/2019-09-23/xi-s-communists-under-pressure-as-high-prices-hit-china-workers'))
a = get_content_type('https://tefkos.comminfo.rutgers.edu/Courses/e530/Readings/Beal%202008%20full%20text%20searching.pdf')
a = get_article('https://blog.joinmastodon.org/2019/10/mastodon-3.0/')
print(a)
print('done')

View File

@@ -12,8 +12,7 @@ import requests
from utils import clean
API_TOPSTORIES = lambda x: 'https://hacker-news.firebaseio.com/v0/topstories.json'
ALG_API_ITEM = lambda x : 'https://hn.algolia.com/api/v1/items/{}'.format(x)
BHN_API_ITEM = lambda x : 'https://api.hnpwa.com/v0/item/{}.json'.format(x)
API_ITEM = lambda x : 'https://hn.algolia.com/api/v1/items/{}'.format(x)
SITE_LINK = lambda x : 'https://news.ycombinator.com/item?id={}'.format(x)
SITE_AUTHOR_LINK = lambda x : 'https://news.ycombinator.com/user?id={}'.format(x)
@@ -43,7 +42,7 @@ def api(route, ref=None):
def feed():
return [str(x) for x in api(API_TOPSTORIES) or []]
def alg_comment(i):
def comment(i):
if 'author' not in i:
return False
@@ -52,25 +51,21 @@ def alg_comment(i):
c['score'] = i.get('points', 0)
c['date'] = i.get('created_at_i', 0)
c['text'] = clean(i.get('text', '') or '')
c['comments'] = [alg_comment(j) for j in i['children']]
c['comments'] = [comment(j) for j in i['children']]
c['comments'] = list(filter(bool, c['comments']))
return c
def alg_comment_count(i):
def comment_count(i):
alive = 1 if i['author'] else 0
return sum([alg_comment_count(c) for c in i['comments']]) + alive
return sum([comment_count(c) for c in i['comments']]) + alive
def alg_story(ref):
r = api(ALG_API_ITEM, ref)
if not r:
logging.info('Bad Algolia Hackernews API response.')
return None
def story(ref):
r = api(API_ITEM, ref)
if not r: return False
if 'deleted' in r:
logging.info('Story was deleted.')
return False
elif r.get('type', '') != 'story':
logging.info('Type "{}" is not "story".'.format(r.get('type', '')))
return False
s = {}
@@ -81,85 +76,17 @@ def alg_story(ref):
s['title'] = r.get('title', '')
s['link'] = SITE_LINK(ref)
s['url'] = r.get('url', '')
s['comments'] = [alg_comment(i) for i in r['children']]
s['comments'] = [comment(i) for i in r['children']]
s['comments'] = list(filter(bool, s['comments']))
s['num_comments'] = alg_comment_count(s) - 1
s['num_comments'] = comment_count(s) - 1
if 'text' in r and r['text']:
s['text'] = clean(r['text'] or '')
return s
def bhn_comment(i):
if 'user' not in i:
return False
c = {}
c['author'] = i.get('user', '')
c['score'] = 0 # Not present?
c['date'] = i.get('time', 0)
c['text'] = clean(i.get('content', '') or '')
c['comments'] = [bhn_comment(j) for j in i['comments']]
c['comments'] = list(filter(bool, c['comments']))
return c
def bhn_story(ref):
r = api(BHN_API_ITEM, ref)
if not r:
logging.info('Bad BetterHN Hackernews API response.')
return None
if 'deleted' in r: # TODO: verify
logging.info('Story was deleted.')
return False
elif r.get('dead', False):
logging.info('Story was deleted.')
return False
elif r.get('type', '') != 'link':
logging.info('Type "{}" is not "link".'.format(r.get('type', '')))
return False
s = {}
s['author'] = r.get('user', '')
s['author_link'] = SITE_AUTHOR_LINK(r.get('user', ''))
s['score'] = r.get('points', 0)
s['date'] = r.get('time', 0)
s['title'] = r.get('title', '')
s['link'] = SITE_LINK(ref)
s['url'] = r.get('url', '')
if s['url'].startswith('item'):
s['url'] = SITE_LINK(ref)
s['comments'] = [bhn_comment(i) for i in r['comments']]
s['comments'] = list(filter(bool, s['comments']))
s['num_comments'] = r.get('comments_count', 0)
if 'content' in r and r['content']:
s['text'] = clean(r['content'] or '')
return s
def story(ref):
s = alg_story(ref)
if s is None:
s = bhn_story(ref)
if not s:
return False
if s['score'] < 25 and s['num_comments'] < 10:
logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
return False
return s
# scratchpad so I can quickly develop the parser
if __name__ == '__main__':
print(feed())
#print(story(20763961))
#print(story(20802050))
#print(story(42899834)) # type "job"
#print(story(42900076)) # Ask HN
#print(story(42898201)) # Show HN
#print(story(42899703)) # normal
print(story(42902678)) # bad title?

View File

@@ -1,120 +0,0 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
if __name__ == '__main__':
import sys
sys.path.insert(0,'.')
import requests
from datetime import datetime
from utils import clean
API_HOTTEST = lambda x: 'https://lobste.rs/hottest.json'
API_ITEM = lambda x : 'https://lobste.rs/s/{}.json'.format(x)
SITE_LINK = lambda x : 'https://lobste.rs/s/{}'.format(x)
SITE_AUTHOR_LINK = lambda x : 'https://lobste.rs/u/{}'.format(x)
def api(route, ref=None):
try:
r = requests.get(route(ref), timeout=5)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting lobsters API: {}, trying again'.format(str(e)))
try:
r = requests.get(route(ref), timeout=15)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting lobsters API: {}'.format(str(e)))
return False
def feed():
return [x['short_id'] for x in api(API_HOTTEST) or []]
def unix(date_str):
date_str = date_str.replace(':', '')
return int(datetime.strptime(date_str, '%Y-%m-%dT%H%M%S.%f%z').timestamp())
def make_comment(i):
c = {}
try:
c['author'] = i['commenting_user']
except KeyError:
c['author'] = ''
c['score'] = i.get('score', 0)
try:
c['date'] = unix(i['created_at'])
except KeyError:
c['date'] = 0
c['text'] = clean(i.get('comment', '') or '')
c['comments'] = []
return c
def iter_comments(flat_comments):
nested_comments = []
parent_stack = []
for comment in flat_comments:
c = make_comment(comment)
indent = comment['depth']
if indent == 0:
nested_comments.append(c)
parent_stack = [c]
else:
parent_stack = parent_stack[:indent]
p = parent_stack[-1]
p['comments'].append(c)
parent_stack.append(c)
return nested_comments
def story(ref):
r = api(API_ITEM, ref)
if not r:
logging.info('Bad Lobsters API response.')
return False
s = {}
try:
s['author'] = r['submitter_user']
s['author_link'] = SITE_AUTHOR_LINK(s['author'])
except KeyError:
s['author'] = ''
s['author_link'] = ''
s['score'] = r.get('score', 0)
try:
s['date'] = unix(r['created_at'])
except KeyError:
s['date'] = 0
s['title'] = r.get('title', '')
s['link'] = SITE_LINK(ref)
s['url'] = r.get('url', '')
s['comments'] = iter_comments(r['comments'])
s['num_comments'] = r['comment_count']
if s['score'] < 15 and s['num_comments'] < 10:
logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
return False
if 'description' in r and r['description']:
s['text'] = clean(r['description'] or '')
return s
# scratchpad so I can quickly develop the parser
if __name__ == '__main__':
#print(feed())
import json
print(json.dumps(story('fzvd1v'), indent=4))
#print(json.dumps(story('ixyv5u'), indent=4))

View File

@@ -27,9 +27,7 @@ def api(route):
def story(ref):
html = api(ref)
if not html:
logging.info('Bad http GET response.')
return False
if not html: return False
soup = BeautifulSoup(html, features='html.parser')

231
apiserver/feeds/news.py Normal file
View File

@@ -0,0 +1,231 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
if __name__ == '__main__':
import sys
sys.path.insert(0,'.')
import requests
from datetime import datetime
from bs4 import BeautifulSoup
from scrapers import declutter
import dateutil.parser
import extruct
import pytz
from utils import clean
USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'
#USER_AGENT = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
def unix(date_str, tz=None):
try:
dt = dateutil.parser.parse(date_str)
if tz:
dt = pytz.timezone(tz).localize(dt)
return int(dt.timestamp())
except:
pass
return 0
def xml(route, ref=None):
try:
headers = {'User-Agent': USER_AGENT, 'X-Forwarded-For': '66.249.66.1'}
r = requests.get(route(ref), headers=headers, timeout=5)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.text
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting URL: {}'.format(str(e)))
return False
def parse_extruct(s, data):
for rdfa in data['rdfa']:
for key, props in rdfa.items():
if 'http://ogp.me/ns#title' in props:
for values in props['http://ogp.me/ns#title']:
s['title'] = values['@value']
if 'http://ogp.me/ns/article#modified_time' in props:
for values in props['http://ogp.me/ns/article#modified_time']:
s['date'] = values['@value']
if 'http://ogp.me/ns/article#published_time' in props:
for values in props['http://ogp.me/ns/article#published_time']:
s['date'] = values['@value']
for og in data['opengraph']:
titles = list(filter(None, [value if 'og:title' in key else None for key, value in og['properties']]))
modified = list(filter(None, [value if 'article:modified_time' in key else None for key, value in og['properties']]))
published = list(filter(None, [value if 'article:published_time' in key else None for key, value in og['properties']]))
if len(modified):
s['date'] = modified[0]
if len(published):
s['date'] = published[0]
if len(titles):
s['title'] = titles[0]
for md in data['microdata']:
if md['type'] == 'https://schema.org/NewsArticle':
props = md['properties']
s['title'] = props['headline']
if props['dateModified']:
s['date'] = props['dateModified']
if props['datePublished']:
s['date'] = props['datePublished']
if 'author' in props and props['author']:
s['author'] = props['author']['properties']['name']
for ld in data['json-ld']:
if '@type' in ld and ld['@type'] in ['Article', 'NewsArticle']:
s['title'] = ld['headline']
if ld['dateModified']:
s['date'] = ld['dateModified']
if ld['datePublished']:
s['date'] = ld['datePublished']
if 'author' in ld and ld['author']:
s['author'] = ld['author']['name']
if '@graph' in ld:
for gld in ld['@graph']:
if '@type' in gld and gld['@type'] in ['Article', 'NewsArticle']:
s['title'] = gld['headline']
if gld['dateModified']:
s['date'] = gld['dateModified']
if gld['datePublished']:
s['date'] = gld['datePublished']
return s
def comment(i):
if 'author' not in i:
return False
c = {}
c['author'] = i.get('author', '')
c['score'] = i.get('points', 0)
c['date'] = unix(i.get('date', 0))
c['text'] = clean(i.get('text', '') or '')
c['comments'] = [comment(j) for j in i['children']]
c['comments'] = list(filter(bool, c['comments']))
return c
def comment_count(i):
alive = 1 if i['author'] else 0
return sum([comment_count(c) for c in i['comments']]) + alive
class _Base:
def __init__(url, tz=None):
self.url = url
self.tz = tz
def feed(self, excludes=None):
return []
def story(self, ref):
markup = xml(lambda x: ref)
if not markup:
return False
s = {}
s['author_link'] = ''
s['score'] = 0
s['comments'] = []
s['num_comments'] = 0
s['link'] = ref
s['url'] = ref
s['date'] = 0
data = extruct.extract(markup)
s = parse_extruct(s, data)
if s['date']:
s['date'] = unix(s['date'], tz=self.tz)
if 'disqus' in markup:
try:
s['comments'] = declutter.get_comments(ref)
c['comments'] = list(filter(bool, c['comments']))
s['num_comments'] = comment_count(s['comments'])
except KeyboardInterrupt:
raise
except:
pass
if not s['date']:
return False
return s
def get_sitemap_date(a):
if a.find('lastmod'):
return a.find('lastmod').text
if a.find('news:publication_date'):
return a.find('news:publication_date').text
if a.find('ns2:publication_date'):
return a.find('ns2:publication_date').text
return ''
class Sitemap(_Base):
def __init__(self, url, tz=None):
self.tz = tz
self.sitemap_url = url
def feed(self, excludes=None):
markup = xml(lambda x: self.sitemap_url)
if not markup: return []
soup = BeautifulSoup(markup, features='lxml')
sitemap = soup.find('urlset').findAll('url')
links = list(filter(None, [a if a.find('loc') else None for a in sitemap]))
links = list(filter(None, [a if get_sitemap_date(a) else None for a in links]))
links.sort(key=lambda a: unix(get_sitemap_date(a)), reverse=True)
links = [x.find('loc').text for x in links] or []
links = list(set(links))
if excludes:
links = list(filter(None, [None if any(e in link for e in excludes) else link for link in links]))
return links
class Category(_Base):
def __init__(self, url, tz=None):
self.tz = tz
self.category_url = url
self.base_url = '/'.join(url.split('/')[:3])
def feed(self, excludes=None):
markup = xml(lambda x: self.category_url)
if not markup: return []
soup = BeautifulSoup(markup, features='html.parser')
links = soup.find_all('a', href=True)
links = [link.get('href') for link in links]
links = [f"{self.base_url}{link}" if link.startswith('/') else link for link in links]
links = list(filter(None, [link if link.startswith(self.category_url) else None for link in links]))
links = list(filter(None, [link if link != self.category_url else None for link in links]))
links = list(set(links))
if excludes:
links = list(filter(None, [None if any(e in link for e in excludes) else link for link in links]))
return links
# scratchpad so I can quickly develop the parser
if __name__ == '__main__':
print("Sitemap: Stuff")
site = Sitemap("https://www.stuff.co.nz/sitemap/news/sitemap.xml")
posts = site.feed()
print(posts[:5])
print(site.story(posts[0]))
print("Category: RadioNZ Te Ao Māori")
site = Category("https://www.rnz.co.nz/news/te-manu-korihi/")
posts = site.feed()
print(posts[:5])
print(site.story(posts[0]))
print("Sitemap: Newsroom")
site = Sitemap("https://www.newsroom.co.nz/sitemap.xml")
posts = site.feed()
print(posts[:5])
print(site.story(posts[0]))

View File

@@ -32,8 +32,11 @@ def feed():
return [x.id for x in reddit.subreddit(subs).hot()]
except KeyboardInterrupt:
raise
except BaseException as e:
logging.critical('Problem hitting reddit API: {}'.format(str(e)))
except PRAWException as e:
logging.error('Problem hitting reddit API: {}'.format(str(e)))
return []
except PrawcoreException as e:
logging.error('Problem hitting reddit API: {}'.format(str(e)))
return []
def comment(i):
@@ -56,9 +59,7 @@ def comment(i):
def story(ref):
try:
r = reddit.submission(ref)
if not r:
logging.info('Bad Reddit API response.')
return False
if not r: return False
s = {}
s['author'] = r.author.name if r.author else '[Deleted]'
@@ -73,7 +74,6 @@ def story(ref):
s['num_comments'] = r.num_comments
if s['score'] < 25 and s['num_comments'] < 10:
logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
return False
if r.selftext:
@@ -84,10 +84,10 @@ def story(ref):
except KeyboardInterrupt:
raise
except PRAWException as e:
logging.critical('Problem hitting reddit API: {}'.format(str(e)))
logging.error('Problem hitting reddit API: {}'.format(str(e)))
return False
except PrawcoreException as e:
logging.critical('Problem hitting reddit API: {}'.format(str(e)))
logging.error('Problem hitting reddit API: {}'.format(str(e)))
return False
# scratchpad so I can quickly develop the parser

165
apiserver/feeds/substack.py Normal file
View File

@@ -0,0 +1,165 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
if __name__ == '__main__':
import sys
sys.path.insert(0,'.')
import requests
from datetime import datetime
from utils import clean
SUBSTACK_REFERER = 'https://substack.com'
SUBSTACK_API_TOP_POSTS = lambda x: "https://substack.com/api/v1/reader/top-posts"
def author_link(author_id, base_url):
return f"{base_url}/people/{author_id}"
def api_comments(post_id, base_url):
return f"{base_url}/api/v1/post/{post_id}/comments?all_comments=true&sort=best_first"
def api_stories(x, base_url):
return f"{base_url}/api/v1/archive?sort=new&search=&offset=0&limit=100"
def unix(date_str):
return int(datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S.%fZ').timestamp())
def api(route, ref=None, referer=None):
headers = {'Referer': referer} if referer else None
try:
r = requests.get(route(ref), headers=headers, timeout=10)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting Substack API: {}, trying again'.format(str(e)))
try:
r = requests.get(route(ref), headers=headers, timeout=20)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting Substack API: {}'.format(str(e)))
return False
def comment(i):
if 'body' not in i:
return False
c = {}
c['date'] = unix(i.get('date'))
c['author'] = i.get('name', '')
c['score'] = i.get('reactions').get('')
c['text'] = clean(i.get('body', '') or '')
c['comments'] = [comment(j) for j in i['children']]
c['comments'] = list(filter(bool, c['comments']))
return c
class Publication:
def __init__(self, domain):
self.BASE_DOMAIN = domain
def feed(self):
stories = api(lambda x: api_stories(x, self.BASE_DOMAIN), referer=self.BASE_DOMAIN)
if not stories: return []
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
return [str(i.get("id")) for i in stories or []]
def story(self, ref):
stories = api(lambda x: api_stories(x, self.BASE_DOMAIN), referer=self.BASE_DOMAIN)
if not stories: return False
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
stories = list(filter(None, [i if str(i.get('id')) == ref else None for i in stories]))
if len(stories) == 0:
return False
r = stories[0]
if not r:
return False
s = {}
s['author'] = ''
s['author_link'] = ''
s['date'] = unix(r.get('post_date'))
s['score'] = r.get('reactions').get('')
s['title'] = r.get('title', '')
s['link'] = r.get('canonical_url', '')
s['url'] = r.get('canonical_url', '')
comments = api(lambda x: api_comments(x, self.BASE_DOMAIN), r.get('id'), referer=self.BASE_DOMAIN)
s['comments'] = [comment(i) for i in comments.get('comments')]
s['comments'] = list(filter(bool, s['comments']))
s['num_comments'] = r.get('comment_count', 0)
authors = list(filter(None, [self._bylines(byline) for byline in r.get('publishedBylines')]))
if len(authors):
s['author'] = authors[0].get('name')
s['author_link'] = authors[0].get('link')
return s
def _bylines(self, b):
if 'id' not in b:
return None
a = {}
a['name'] = b.get('name')
a['link'] = author_link(b.get('id'), self.BASE_DOMAIN)
return a
class Top:
def feed(self):
stories = api(SUBSTACK_API_TOP_POSTS, referer=SUBSTACK_REFERER)
if not stories: return []
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
return [str(i.get("id")) for i in stories or []]
def story(self, ref):
stories = api(SUBSTACK_API_TOP_POSTS, referer=SUBSTACK_REFERER)
if not stories: return False
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
stories = list(filter(None, [i if str(i.get('id')) == ref else None for i in stories]))
if len(stories) == 0:
return False
r = stories[0]
if not r:
return False
s = {}
pub = r.get('pub')
base_url = pub.get('base_url')
s['author'] = pub.get('author_name')
s['author_link'] = author_link(pub.get('author_id'), base_url)
s['date'] = unix(r.get('post_date'))
s['score'] = r.get('score')
s['title'] = r.get('title', '')
s['link'] = r.get('canonical_url', '')
s['url'] = r.get('canonical_url', '')
comments = api(lambda x: api_comments(x, base_url), r.get('id'), referer=SUBSTACK_REFERER)
s['comments'] = [comment(i) for i in comments.get('comments')]
s['comments'] = list(filter(bool, s['comments']))
s['num_comments'] = r.get('comment_count', 0)
return s
top = Top()
# scratchpad so I can quickly develop the parser
if __name__ == '__main__':
top_posts = top.feed()
print(top.story(top_posts[0]))
webworm = Publication("https://www.webworm.co/")
posts = webworm.feed()
print(webworm.story(posts[0]))

View File

@@ -34,7 +34,7 @@ def api(route):
except KeyboardInterrupt:
raise
except BaseException as e:
logging.critical('Problem hitting tildes website: {}'.format(str(e)))
logging.error('Problem hitting tildes website: {}'.format(str(e)))
return False
def feed():
@@ -71,15 +71,11 @@ def story(ref):
html = api(SITE_LINK(group_lookup[ref], ref))
else:
html = api(API_ITEM(ref))
if not html:
logging.info('Bad Tildes API response.')
return False
if not html: return False
soup = BeautifulSoup(html, features='html.parser')
a = soup.find('article', class_='topic-full')
if a is None:
logging.info('Tildes <article> element not found.')
return False
if a is None: return False
h = a.find('header')
lu = h.find('a', class_='link-user')
@@ -87,7 +83,6 @@ def story(ref):
error = a.find('div', class_='text-error')
if error:
if 'deleted' in error.string or 'removed' in error.string:
logging.info('Article was deleted or removed.')
return False
s = {}
@@ -107,21 +102,7 @@ def story(ref):
ch = a.find('header', class_='topic-comments-header')
s['num_comments'] = int(ch.h2.string.split(' ')[0]) if ch else 0
if s['group'].split('.')[0] not in [
'~arts',
'~comp',
'~creative',
'~design',
'~engineering',
'~finance',
'~science',
'~tech',
]:
logging.info('Group ({}) not in whitelist.'.format(s['group']))
return False
if s['score'] < 15 and s['num_comments'] < 10:
logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
if s['score'] < 8 and s['num_comments'] < 6:
return False
td = a.find('div', class_='topic-full-text')
@@ -132,7 +113,7 @@ def story(ref):
# scratchpad so I can quickly develop the parser
if __name__ == '__main__':
print(feed())
#print(feed())
#normal = story('gxt')
#print(normal)
#no_comments = story('gxr')
@@ -141,8 +122,8 @@ if __name__ == '__main__':
#print(self_post)
#li_comment = story('gqx')
#print(li_comment)
#broken = story('q4y')
#print(broken)
broken = story('q4y')
print(broken)
# make sure there's no self-reference
#import copy

View File

@@ -4,6 +4,7 @@ certifi==2020.6.20
chardet==3.0.4
click==7.1.2
commonmark==0.9.1
extruct==0.10.0
Flask==1.1.2
Flask-Cors==3.0.8
gevent==20.6.2
@@ -11,11 +12,13 @@ greenlet==0.4.16
idna==2.10
itsdangerous==1.1.0
Jinja2==2.11.2
lxml==4.6.1
MarkupSafe==1.1.1
packaging==20.4
praw==6.4.0
prawcore==1.4.0
pyparsing==2.4.7
pytz==2020.4
requests==2.24.0
six==1.15.0
soupsieve==2.0.1
@@ -27,3 +30,4 @@ websocket-client==0.57.0
Werkzeug==1.0.1
zope.event==4.4
zope.interface==5.1.0
python-dateutil==2.8.1

View File

@@ -0,0 +1,41 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
import requests
DECLUTTER_API = 'https://declutter.1j.nz/details'
DECLUTTER_COMMENT_API = 'https://declutter.1j.nz/comments'
TIMEOUT = 30
def get_html(url):
logging.info(f"Declutter Scraper: {url}")
details = get_details(url)
if not details:
return ''
return details['content']
def get_details(url):
try:
r = requests.post(DECLUTTER_API, data=dict(url=url), timeout=TIMEOUT)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem decluttering article: {}'.format(str(e)))
return None
def get_comments(url):
try:
r = requests.post(DECLUTTER_COMMENT_API, data=dict(url=url), timeout=TIMEOUT)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem getting comments for article: {}'.format(str(e)))
return None

View File

@@ -0,0 +1,27 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
import requests
READ_API = 'http://127.0.0.1:33843/details'
TIMEOUT = 20
def get_html(url):
logging.info(f"Local Scraper: {url}")
details = get_details(url)
if not details:
return ''
return details['content']
def get_details(url):
try:
r = requests.post(READ_API, data=dict(url=url), timeout=TIMEOUT)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem getting article: {}'.format(str(e)))
return None

View File

@@ -0,0 +1,37 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
import requests
OUTLINE_REFERER = 'https://outline.com/'
OUTLINE_API = 'https://api.outline.com/v3/parse_article'
TIMEOUT = 20
def get_html(url):
details = get_details(url)
if not details:
return ''
return details['html']
def get_details(url):
try:
logging.info(f"Outline Scraper: {url}")
params = {'source_url': url}
headers = {'Referer': OUTLINE_REFERER}
r = requests.get(OUTLINE_API, params=params, headers=headers, timeout=TIMEOUT)
if r.status_code == 429:
logging.info('Rate limited by outline, sleeping 30s and skipping...')
time.sleep(30)
return None
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
data = r.json()['data']
if 'URL is not supported by Outline' in data['html']:
raise Exception('URL not supported by Outline')
return data
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem outlining article: {}'.format(str(e)))
return None

View File

@@ -1,58 +0,0 @@
import time
import json
import logging
import feed
import database
import search
database.init()
def fix_gzip_bug(story_list):
FIX_THRESHOLD = 150
count = 1
for sid in story_list:
try:
sid = sid[0]
story = database.get_story(sid)
full_json = json.loads(story.full_json)
meta_json = json.loads(story.meta_json)
text = full_json.get('text', '')
count = text.count('<EFBFBD>')
if not count: continue
ratio = count / len(text) * 1000
print('Bad story:', sid, 'Num ?:', count, 'Ratio:', ratio)
if ratio < FIX_THRESHOLD: continue
print('Attempting to fix...')
valid = feed.update_story(meta_json, is_manual=True)
if valid:
database.put_story(meta_json)
search.put_story(meta_json)
print('Success')
else:
print('Story was not valid')
time.sleep(3)
except KeyboardInterrupt:
raise
except BaseException as e:
logging.exception(e)
breakpoint()
if __name__ == '__main__':
num_stories = database.count_stories()
print('Fix {} stories?'.format(num_stories))
print('Press ENTER to continue, ctrl-c to cancel')
input()
story_list = database.get_story_list()
fix_gzip_bug(story_list)

View File

@@ -1,62 +0,0 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.INFO)
import database
from sqlalchemy import select
import search
import sys
import time
import json
import requests
database.init()
search.init()
BATCH_SIZE = 5000
def put_stories(stories):
return search.meili_api(requests.post, 'indexes/qotnews/documents', stories)
def get_update(update_id):
return search.meili_api(requests.get, 'tasks/{}'.format(update_id))
if __name__ == '__main__':
num_stories = database.count_stories()
print('Reindex {} stories?'.format(num_stories))
print('Press ENTER to continue, ctrl-c to cancel')
input()
story_list = database.get_story_list()
count = 1
while len(story_list):
stories = []
for _ in range(BATCH_SIZE):
try:
sid = story_list.pop()
except IndexError:
break
story = database.get_story(sid)
print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title))
story_obj = json.loads(story.meta_json)
stories.append(story_obj)
count += 1
res = put_stories(stories)
update_id = res['uid']
print('Waiting for processing', end='')
while get_update(update_id)['status'] != 'succeeded':
time.sleep(0.5)
print('.', end='', flush=True)
print()
print('Done.')

View File

@@ -1,23 +0,0 @@
import time
import requests
def test_search_api():
num_tests = 100
total_time = 0
for i in range(num_tests):
start = time.time()
res = requests.get('http://127.0.0.1:33842/api/search?q=iphone')
res.raise_for_status()
duration = time.time() - start
total_time += duration
avg_time = total_time / num_tests
print('Average search time:', avg_time)
if __name__ == '__main__':
test_search_api()

View File

@@ -4,62 +4,83 @@ logging.basicConfig(
level=logging.DEBUG)
import requests
import settings
SEARCH_ENABLED = bool(settings.MEILI_URL)
MEILI_URL = 'http://127.0.0.1:7700/'
def meili_api(method, route, json=None, params=None, parse_json=True):
def create_index():
try:
r = method(settings.MEILI_URL + route, json=json, params=params, timeout=4)
if r.status_code > 299:
json = dict(name='qotnews', uid='qotnews')
r = requests.post(MEILI_URL + 'indexes', json=json, timeout=2)
if r.status_code != 201:
raise Exception('Bad response code ' + str(r.status_code))
if parse_json:
return r.json()
else:
r.encoding = 'utf-8'
return r.text
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem with MeiliSearch api route: %s: %s', route, str(e))
logging.error('Problem creating MeiliSearch index: {}'.format(str(e)))
return False
def create_index():
json = dict(uid='qotnews', primaryKey='id')
return meili_api(requests.post, 'indexes', json=json)
def update_rankings():
json = ['typo', 'words', 'proximity', 'date:desc', 'exactness']
return meili_api(requests.post, 'indexes/qotnews/settings/ranking-rules', json=json)
try:
json = ['typo', 'words', 'proximity', 'attribute', 'desc(date)', 'wordsPosition', 'exactness']
r = requests.post(MEILI_URL + 'indexes/qotnews/settings/ranking-rules', json=json, timeout=2)
if r.status_code != 202:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem setting MeiliSearch ranking rules: {}'.format(str(e)))
return False
def update_attributes():
json = ['title', 'url', 'author']
r = meili_api(requests.post, 'indexes/qotnews/settings/searchable-attributes', json=json)
json = ['id', 'ref', 'source', 'author', 'author_link', 'score', 'date', 'title', 'link', 'url', 'num_comments']
r = meili_api(requests.post, 'indexes/qotnews/settings/displayed-attributes', json=json)
return r
try:
json = ['title', 'url', 'author', 'link', 'id']
r = requests.post(MEILI_URL + 'indexes/qotnews/settings/searchable-attributes', json=json, timeout=2)
if r.status_code != 202:
raise Exception('Bad response code ' + str(r.status_code))
requests.delete(MEILI_URL + 'indexes/qotnews/settings/displayed-attributes', timeout=2)
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem setting MeiliSearch searchable attributes: {}'.format(str(e)))
return False
def init():
if not SEARCH_ENABLED:
logging.info('Search is not enabled, skipping init.')
return
print(create_index())
create_index()
update_rankings()
update_attributes()
def put_story(story):
if not SEARCH_ENABLED: return
return meili_api(requests.post, 'indexes/qotnews/documents', [story])
story = story.copy()
story.pop('text', None)
story.pop('comments', None)
try:
r = requests.post(MEILI_URL + 'indexes/qotnews/documents', json=[story], timeout=2)
if r.status_code != 202:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem putting MeiliSearch story: {}'.format(str(e)))
return False
def search(q):
if not SEARCH_ENABLED: return []
params = dict(q=q, limit=settings.FEED_LENGTH)
r = meili_api(requests.get, 'indexes/qotnews/search', params=params, parse_json=False)
return r
try:
params = dict(q=q, limit=250)
r = requests.get(MEILI_URL + 'indexes/qotnews/search', params=params, timeout=2)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()['hits']
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem searching MeiliSearch: {}'.format(str(e)))
return False
if __name__ == '__main__':
init()
create_index()
print(update_rankings())
print(search('facebook'))
print(search('the'))

View File

@@ -28,8 +28,6 @@ from flask_cors import CORS
database.init()
search.init()
news_index = 0
def new_id():
nid = gen_rand_id()
while database.get_story(nid):
@@ -42,11 +40,8 @@ cors = CORS(flask_app)
@flask_app.route('/api')
def api():
skip = request.args.get('skip', 0)
limit = request.args.get('limit', settings.FEED_LENGTH)
stories = database.get_stories(limit, skip)
# hacky nested json
res = Response('{"stories":[' + ','.join(stories) + ']}')
stories = database.get_stories(settings.MAX_STORY_AGE)
res = Response(json.dumps({"stories": stories}))
res.headers['content-type'] = 'application/json'
return res
@@ -56,10 +51,8 @@ def apisearch():
if len(q) >= 3:
results = search.search(q)
else:
results = '[]'
res = Response(results)
res.headers['content-type'] = 'application/json'
return res
results = []
return dict(results=results)
@flask_app.route('/api/submit', methods=['POST'], strict_slashes=False)
def submit():
@@ -67,8 +60,6 @@ def submit():
url = request.form['url']
nid = new_id()
logging.info('Manual submission: ' + url)
parse = urlparse(url)
if 'news.ycombinator.com' in parse.hostname:
source = 'hackernews'
@@ -76,9 +67,6 @@ def submit():
elif 'tildes.net' in parse.hostname and '~' in url:
source = 'tildes'
ref = parse.path.split('/')[2]
elif 'lobste.rs' in parse.hostname and '/s/' in url:
source = 'lobsters'
ref = parse.path.split('/')[2]
elif 'reddit.com' in parse.hostname and 'comments' in url:
source = 'reddit'
ref = parse.path.split('/')[4]
@@ -111,8 +99,9 @@ def submit():
def story(sid):
story = database.get_story(sid)
if story:
# hacky nested json
res = Response('{"story":' + story.full_json + '}')
related = database.get_stories_by_url(story.meta['url'])
related = [r.meta for r in related]
res = Response(json.dumps({"story": story.data, "related": related}))
res.headers['content-type'] = 'application/json'
return res
else:
@@ -122,11 +111,9 @@ def story(sid):
@flask_app.route('/search')
def index():
return render_template('index.html',
title='QotNews',
url='news.t0.vc',
description='Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode',
robots='index',
)
title='Feed',
url='news.t0.vc',
description='Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode')
@flask_app.route('/<sid>', strict_slashes=False)
@flask_app.route('/<sid>/c', strict_slashes=False)
@@ -138,7 +125,7 @@ def static_story(sid):
story = database.get_story(sid)
if not story: return abort(404)
story = json.loads(story.full_json)
story = story.data
score = story['score']
num_comments = story['num_comments']
@@ -151,71 +138,65 @@ def static_story(sid):
url = url.replace('www.', '')
return render_template('index.html',
title=story['title'] + ' | QotNews',
url=url,
description=description,
robots='noindex',
)
title=story['title'],
url=url,
description=description)
http_server = WSGIServer(('', 33842), flask_app)
def feed_thread():
global news_index
def _add_new_refs():
for ref, source in feed.get_list():
if database.get_story_by_ref(ref):
continue
try:
nid = new_id()
database.put_ref(ref, nid, source)
logging.info('Added ref ' + ref)
except database.IntegrityError:
continue
def _update_current_story(item):
try:
story = database.get_story(item['sid']).data
except AttributeError:
story = dict(id=item['sid'], ref=item['ref'], source=item['source'])
logging.info('Updating story: {}'.format(str(story['ref'])))
valid = feed.update_story(story)
if valid:
database.put_story(story)
search.put_story(story)
else:
database.del_ref(item['ref'])
logging.info('Removed ref {}'.format(item['ref']))
def feed_thread():
ref_list = []
try:
while True:
# onboard new stories
if news_index == 0:
for ref, source in feed.list():
if database.get_story_by_ref(ref):
continue
try:
nid = new_id()
logging.info('Adding ref: {}, id: {}, source: {}'.format(ref, nid, source))
database.put_ref(ref, nid, source)
except database.IntegrityError:
logging.info('Already have ID / ref, skipping.')
continue
ref_list = database.get_reflist(settings.FEED_LENGTH)
if not len(ref_list):
_add_new_refs()
ref_list = database.get_reflist()
# update current stories
if news_index < len(ref_list):
item = ref_list[news_index]
try:
story_json = database.get_story(item['sid']).full_json
story = json.loads(story_json)
except AttributeError:
story = dict(id=item['sid'], ref=item['ref'], source=item['source'])
logging.info('Updating {} story: {}, index: {}'.format(story['source'], story['ref'], news_index))
valid = feed.update_story(story)
if valid:
database.put_story(story)
search.put_story(story)
else:
database.del_ref(item['ref'])
logging.info('Removed ref {}'.format(item['ref']))
else:
logging.info('Skipping index: ' + str(news_index))
if len(ref_list):
item = ref_list.pop(0)
_update_current_story(item)
gevent.sleep(6)
news_index += 1
if news_index == settings.FEED_LENGTH: news_index = 0
except KeyboardInterrupt:
logging.info('Ending feed thread...')
except ValueError as e:
logging.critical('feed_thread error: {} {}'.format(e.__class__.__name__, e))
logging.error('feed_thread error: {} {}'.format(e.__class__.__name__, e))
http_server.stop()
logging.info('Starting Feed thread...')
print('Starting Feed thread...')
gevent.spawn(feed_thread)
logging.info('Starting HTTP thread...')
print('Starting HTTP thread...')
try:
http_server.serve_forever()
except KeyboardInterrupt:

View File

@@ -1,23 +1,28 @@
# QotNews settings
# edit this file and save it as settings.py
MAX_STORY_AGE = 3*24*60*60
# Feed Lengths
# Number of top items from each site to pull
# set to 0 to disable that site
FEED_LENGTH = 75
NUM_HACKERNEWS = 15
NUM_LOBSTERS = 10
NUM_REDDIT = 15
NUM_REDDIT = 10
NUM_TILDES = 5
NUM_SUBSTACK = 10
# Meilisearch server URL
# Leave blank if not using search
#MEILI_URL = 'http://127.0.0.1:7700/'
MEILI_URL = ''
SITEMAP = {}
# SITEMAP['nzherald'] = { 'url': "https://www.nzherald.co.nz/arcio/news-sitemap/", 'count': 10},
# SITEMAP['stuff'] = { 'url': "https://www.stuff.co.nz/sitemap.xml", 'count': 10},
# Readerserver URL
# Leave blank if not using, but that defeats the whole point
READER_URL = 'http://127.0.0.1:33843/'
SUBSTACK = {}
# SUBSTACK['webworm'] = { 'url': "https://www.webworm.co", 'count': 10},
# SUBSTACK['the bulletin'] = { 'url': "https://thespinoff.substack.com", 'count': 10},
CATEGORY = {}
# CATEGORY['rnz national'] = { 'url': "https://www.rnz.co.nz/news/national", 'count': 10},
SCRAPERS = ['declutter', 'outline', 'local']
# Reddit account info
# leave blank if not using Reddit
@@ -33,9 +38,13 @@ SUBREDDITS = [
'HistoryofIdeas',
'LaymanJournals',
'PhilosophyofScience',
'PoliticsPDFs',
'Scholar',
'StateOfTheUnion',
'TheAgora',
'TrueFilm',
'TrueReddit',
'UniversityofReddit',
'culturalstudies',
'hardscience',
'indepthsports',
@@ -44,7 +53,4 @@ SUBREDDITS = [
'neurophilosophy',
'resilientcommunities',
'worldevents',
'StallmanWasRight',
'EverythingScience',
'longevity',
]

View File

@@ -8,14 +8,6 @@ import string
from bleach.sanitizer import Cleaner
def alert_tanner(message):
try:
logging.info('Alerting Tanner: ' + message)
params = dict(qotnews=message)
requests.get('https://tbot.tannercollin.com/message', params=params, timeout=4)
except BaseException as e:
logging.error('Problem alerting Tanner: ' + str(e))
def gen_rand_id():
return ''.join(random.choice(string.ascii_uppercase) for _ in range(4))

View File

@@ -1,53 +1,14 @@
const port = 33843;
const express = require('express');
const app = express();
const port = 33843;
const request = require('request');
const JSDOM = require('jsdom').JSDOM;
const { Readability } = require('readability');
const simple = require('./simple');
app.use(express.urlencoded({ extended: true }));
app.get('/', (req, res) => {
res.send('<form method="POST" accept-charset="UTF-8"><input name="url"><button type="submit">SUBMIT</button></form>');
});
const requestCallback = (url, res) => (error, response, body) => {
if (!error && response.statusCode == 200) {
console.log('Response OK.');
const doc = new JSDOM(body, {url: url});
const reader = new Readability(doc.window.document);
const article = reader.parse();
if (article && article.content) {
res.send(article.content);
} else {
res.sendStatus(404);
}
} else {
console.log('Response error:', error ? error.toString() : response.statusCode);
res.sendStatus(response ? response.statusCode : 404);
}
};
app.post('/', (req, res) => {
const url = req.body.url;
const requestOptions = {
url: url,
gzip: true,
//headers: {'User-Agent': 'Googlebot/2.1 (+http://www.google.com/bot.html)'},
//headers: {'User-Agent': 'Twitterbot/1.0'},
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0',
'X-Forwarded-For': '66.249.66.1',
},
};
console.log('Parse request for:', url);
request(requestOptions, requestCallback(url, res));
});
app.get('/', (req, res) => res.send(simple.FORM));
app.post('/', (req, res) => simple.scrape(req, res));
app.post('/details', (req, res) => simple.details(req, res));
// app.post('/browser', (req, res) => browser.scrape(req, res));
// app.post('/browser/details', (req, res) => browser.details(req, res));
app.listen(port, () => {
console.log(`Example app listening on port ${port}!`);

43
readerserver/simple.js Normal file
View File

@@ -0,0 +1,43 @@
const request = require('request');
const JSDOM = require('jsdom').JSDOM;
const { Readability } = require('readability');
const options = url => ({
url: url,
headers: {
'User-Agent': 'Googlebot/2.1 (+http://www.google.com/bot.html)',
'X-Forwarded-For': '66.249.66.1',
},
});
const extract = (url, body) => {
const doc = new JSDOM(body, { url: url });
const reader = new Readability(doc.window.document);
return reader.parse();
};
module.exports.FORM = '<form method="POST" action="/" accept-charset="UTF-8"><input name="url"><button type="submit">SUBMIT</button></form>';
module.exports.scrape = (req, res) => request(options(req.body.url), (error, response, body) => {
if (error || response.statusCode != 200) {
console.log('Response error:', error ? error.toString() : response.statusCode);
return res.sendStatus(response ? response.statusCode : 404);
}
const article = extract(url, body);
if (article && article.content) {
return res.send(article.content);
}
return res.sendStatus(404);
});
module.exports.details = (req, res) => request(options(req.body.url), (error, response, body) => {
if (error || response.statusCode != 200) {
console.log('Response error:', error ? error.toString() : response.statusCode);
return res.sendStatus(response ? response.statusCode : 404);
}
const article = extract(url, body);
if (article) {
return res.send(article);
}
return res.sendStatus(404);
});

File diff suppressed because it is too large Load Diff

View File

@@ -3,7 +3,7 @@
Download MeiliSearch with:
```
wget https://github.com/meilisearch/meilisearch/releases/download/v0.27.0/meilisearch-linux-amd64
wget https://github.com/meilisearch/MeiliSearch/releases/download/v0.11.1/meilisearch-linux-amd64
chmod +x meilisearch-linux-amd64
```

View File

@@ -8,8 +8,6 @@
content="{{ description }}"
/>
<meta content="{{ url }}" name="og:site_name">
<meta name="robots" content="{{ robots }}">
<link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
<link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">
@@ -28,7 +26,7 @@
work correctly both with client-side routing and a non-root public URL.
Learn how to configure a non-root public URL by running `npm run build`.
-->
<title>{{ title }}</title>
<title>{{ title }} - QotNews</title>
<style>
html {
@@ -39,23 +37,13 @@
}
.nojs {
color: white;
max-width: 32rem;
}
</style>
</head>
<body>
<div class="nojs">
<noscript>
You need to enable JavaScript to run this app because it's written in React.
I was planning on writing a server-side version, but I've become distracted
by other projects -- sorry!
<br/>
I originally wrote this for myself, and of course I whitelist JavaScript on
all my own domains.
<br/><br/>
Alternatively, try activex.news.t0.vc for an ActiveX™ version.
</noscript>
<noscript>You need to enable JavaScript to run this app.</noscript>
</div>
<div id="root"></div>
<!--

View File

@@ -3,10 +3,8 @@ import { BrowserRouter as Router, Route, Link, Switch } from 'react-router-dom';
import localForage from 'localforage';
import './Style-light.css';
import './Style-dark.css';
import './Style-black.css';
import './Style-red.css';
import './fonts/Fonts.css';
import { BackwardDot, ForwardDot } from './utils.js';
import { ForwardDot } from './utils.js';
import Feed from './Feed.js';
import Article from './Article.js';
import Comments from './Comments.js';
@@ -40,16 +38,6 @@ class App extends React.Component {
localStorage.setItem('theme', 'dark');
}
black() {
this.setState({ theme: 'black' });
localStorage.setItem('theme', 'black');
}
red() {
this.setState({ theme: 'red' });
localStorage.setItem('theme', 'red');
}
componentDidMount() {
if (!this.cache.length) {
localForage.iterate((value, key) => {
@@ -59,61 +47,22 @@ class App extends React.Component {
}
}
goFullScreen() {
if ('wakeLock' in navigator) {
navigator.wakeLock.request('screen');
}
document.body.requestFullscreen({ navigationUI: 'hide' }).then(() => {
window.addEventListener('resize', () => this.forceUpdate());
this.forceUpdate();
});
};
exitFullScreen() {
document.exitFullscreen().then(() => {
this.forceUpdate();
});
};
render() {
const theme = this.state.theme;
if (theme === 'dark') {
document.body.style.backgroundColor = '#1a1a1a';
} else if (theme === 'black') {
document.body.style.backgroundColor = '#000';
} else if (theme === 'red') {
document.body.style.backgroundColor = '#000';
} else {
document.body.style.backgroundColor = '#eeeeee';
}
const fullScreenAvailable = document.fullscreenEnabled ||
document.mozFullscreenEnabled ||
document.webkitFullscreenEnabled ||
document.msFullscreenEnabled;
document.body.style.backgroundColor = theme === 'dark' ? '#000' : '#eeeeee';
return (
<div className={theme}>
<Router>
<div className='container menu'>
<p>
<Link to='/'>QotNews</Link>
<span className='theme'><a href='#' onClick={() => this.light()}>Light</a> - <a href='#' onClick={() => this.dark()}>Dark</a> - <a href='#' onClick={() => this.black()}>Black</a> - <a href='#' onClick={() => this.red()}>Red</a></span>
<Link to='/'>QotNews - Feed</Link>
<span className='theme'>Theme: <a href='#' onClick={() => this.light()}>Light</a> - <a href='#' onClick={() => this.dark()}>Dark</a></span>
<br />
<span className='slogan'>Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode.</span>
<span className='slogan'>Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode.</span>
</p>
<Route path='/(|search)' component={Search} />
<Route path='/(|search)' component={Submit} />
{fullScreenAvailable &&
<Route path='/(|search)' render={() => !document.fullscreenElement ?
<button className='fullscreen' onClick={() => this.goFullScreen()}>Enter Fullscreen</button>
:
<button className='fullscreen' onClick={() => this.exitFullScreen()}>Exit Fullscreen</button>
} />
}
</div>
<Route path='/' exact render={(props) => <Feed {...props} updateCache={this.updateCache} />} />
@@ -123,7 +72,6 @@ class App extends React.Component {
</Switch>
<Route path='/:id/c' exact render={(props) => <Comments {...props} cache={this.cache} />} />
<BackwardDot />
<ForwardDot />
<ScrollToTop />

View File

@@ -67,8 +67,7 @@ class Article extends React.Component {
{story ?
<div className='article'>
<Helmet>
<title>{story.title} | QotNews</title>
<meta name="robots" content="noindex" />
<title>{story.title} - QotNews</title>
</Helmet>
<h1>{story.title}</h1>

View File

@@ -72,7 +72,7 @@ class Article extends React.Component {
}
displayComment(story, c, level) {
const cid = c.author+c.date;
const cid = c.author + c.date;
const collapsed = this.state.collapsed.includes(cid);
const expanded = this.state.expanded.includes(cid);
@@ -85,19 +85,22 @@ class Article extends React.Component {
<div className='info'>
<p>
{c.author === story.author ? '[OP]' : ''} {c.author || '[Deleted]'}
{' '} | <HashLink to={'#'+cid} id={cid}>{moment.unix(c.date).fromNow()}</HashLink>
{' '} | <HashLink to={'#' + cid} id={cid}>{moment.unix(c.date).fromNow()}</HashLink>
{hidden || hasChildren &&
<span className='collapser pointer' onClick={() => this.collapseComment(cid)}></span>
}
{hasChildren && (
hidden ?
<span className='collapser expander pointer' onClick={() => this.expandComment(cid)}>+</span>
:
<span className='collapser pointer' onClick={() => this.collapseComment(cid)}></span>
)}
</p>
</div>
<div className={collapsed ? 'text hidden' : 'text'} dangerouslySetInnerHTML={{ __html: c.text }} />
<div className={collapsed ? 'text hidden' : 'text'} dangerouslySetInnerHTML={{ __html: c.text }} />
{hidden && hasChildren ?
<div className='comment lined info pointer' onClick={() => this.expandComment(cid)}>[show {this.countComments(c)-1} more]</div>
:
<div className='comment lined info pointer' onClick={() => this.expandComment(cid)}>[show {this.countComments(c) - 1} more]</div>
:
c.comments.map(i => this.displayComment(story, i, level + 1))
}
</div>
@@ -115,8 +118,7 @@ class Article extends React.Component {
{story ?
<div className='article'>
<Helmet>
<title>{story.title} | QotNews</title>
<meta name="robots" content="noindex" />
<title>{story.title} - QotNews Comments</title>
</Helmet>
<h1>{story.title}</h1>
@@ -131,7 +133,7 @@ class Article extends React.Component {
{story.comments.map(c => this.displayComment(story, c, 0))}
</div>
</div>
:
:
<p>loading...</p>
}
<ToggleDot id={id} article={true} />

View File

@@ -22,20 +22,21 @@ class Feed extends React.Component {
const updated = !this.state.stories || this.state.stories[0].id !== result.stories[0].id;
console.log('updated:', updated);
this.setState({ stories: result.stories });
localStorage.setItem('stories', JSON.stringify(result.stories));
const { stories } = result;
this.setState({ stories });
localStorage.setItem('stories', JSON.stringify(stories));
if (updated) {
localForage.clear();
result.stories.forEach((x, i) => {
stories.forEach((x, i) => {
fetch('/api/' + x.id)
.then(res => res.json())
.then(result => {
localForage.setItem(x.id, result.story)
.then(({ story }) => {
localForage.setItem(x.id, story)
.then(console.log('preloaded', x.id, x.title));
this.props.updateCache(x.id, result.story);
}, error => {}
);
this.props.updateCache(x.id, story);
}, error => { }
);
});
}
},
@@ -52,8 +53,7 @@ class Feed extends React.Component {
return (
<div className='container'>
<Helmet>
<title>QotNews</title>
<meta name="robots" content="index" />
<title>Feed - QotNews</title>
</Helmet>
{error && <p>Connection error?</p>}
{stories ?
@@ -62,7 +62,7 @@ class Feed extends React.Component {
<div className='item' key={x.id}>
<div className='title'>
<Link className='link' to={'/' + x.id}>
<img className='source-logo' src={logos[x.source]} alt='source logo' /> {x.title}
<img className='source-logo' src={logos[x.source] || logos[x.source.split(' ')[0]]} alt='source logo' /> {x.title}
</Link>
<span className='source'>
@@ -74,7 +74,7 @@ class Feed extends React.Component {
</div>
)}
</div>
:
:
<p>loading...</p>
}
</div>

View File

@@ -29,7 +29,7 @@ class Results extends React.Component {
.then(res => res.json())
.then(
(result) => {
this.setState({ stories: result.hits });
this.setState({ stories: result.results });
},
(error) => {
if (error.message !== 'The operation was aborted. ') {
@@ -56,7 +56,7 @@ class Results extends React.Component {
return (
<div className='container'>
<Helmet>
<title>Search Results | QotNews</title>
<title>Feed - QotNews</title>
</Helmet>
{error && <p>Connection error?</p>}
{stories ?

View File

@@ -15,7 +15,6 @@ class ScrollToTop extends React.Component {
}
window.scrollTo(0, 0);
document.body.scrollTop = 0;
}
render() {

View File

@@ -37,7 +37,7 @@ class Search extends Component {
<span className='search'>
<form onSubmit={this.searchAgain}>
<input
placeholder='Search...'
placeholder='Search... (fixed)'
value={search}
onChange={this.searchArticles}
ref={this.inputRef}

View File

@@ -1,68 +0,0 @@
.black {
color: #ddd;
}
.black a {
color: #ddd;
}
.black input {
color: #ddd;
border: 1px solid #828282;
}
.black button {
background-color: #444444;
border-color: #bbb;
color: #ddd;
}
.black .item {
color: #828282;
}
.black .item .source-logo {
filter: grayscale(1);
}
.black .item a {
color: #828282;
}
.black .item a.link {
color: #ddd;
}
.black .item a.link:visited {
color: #828282;
}
.black .item .info a.hot {
color: #cccccc;
}
.black .article a {
border-bottom: 1px solid #aaaaaa;
}
.black .article u {
border-bottom: 1px solid #aaaaaa;
text-decoration: none;
}
.black .story-text video,
.black .story-text img {
filter: brightness(50%);
}
.black .article .info {
color: #828282;
}
.black .article .info a {
border-bottom: none;
color: #828282;
}
.black .comment.lined {
border-left: 1px solid #444444;
}

View File

@@ -11,16 +11,14 @@
border: 1px solid #828282;
}
.dark button {
background-color: #444444;
border-color: #bbb;
color: #ddd;
}
.dark .item {
color: #828282;
}
.dark .item .source-logo {
filter: grayscale(1);
}
.dark .item a {
color: #828282;
}
@@ -45,7 +43,6 @@
text-decoration: none;
}
.dark .story-text video,
.dark .story-text img {
filter: brightness(50%);
}

View File

@@ -2,30 +2,9 @@ body {
text-rendering: optimizeLegibility;
font: 1rem/1.3 sans-serif;
color: #000000;
margin-bottom: 100vh;
word-break: break-word;
font-kerning: normal;
margin: 0;
}
::backdrop {
background-color: rgba(0,0,0,0);
}
body:fullscreen {
overflow-y: scroll !important;
}
body:-ms-fullscreen {
overflow-y: scroll !important;
}
body:-webkit-full-screen {
overflow-y: scroll !important;
}
body:-moz-full-screen {
overflow-y: scroll !important;
}
#root {
margin: 8px 8px 100vh 8px !important;
}
a {
@@ -43,12 +22,6 @@ input {
border-radius: 4px;
}
.fullscreen {
margin: 0.25rem;
padding: 0.25rem;
}
pre {
overflow: auto;
}
@@ -212,20 +185,16 @@ span.source {
cursor: pointer;
}
.dot {
cursor: pointer;
.toggleDot {
position: fixed;
bottom: 1rem;
left: 1rem;
height: 3rem;
width: 3rem;
background-color: #828282;
border-radius: 50%;
}
.toggleDot {
bottom: 1rem;
left: 1rem;
}
.toggleDot .button {
font: 2rem/1 'icomoon';
position: relative;
@@ -234,27 +203,21 @@ span.source {
}
.forwardDot {
cursor: pointer;
position: fixed;
bottom: 1rem;
right: 1rem;
height: 3rem;
width: 3rem;
background-color: #828282;
border-radius: 50%;
}
.forwardDot .button {
font: 2rem/1 'icomoon';
font: 2.5rem/1 'icomoon';
position: relative;
top: 0.5rem;
left: 0.5rem;
}
.backwardDot {
bottom: 1rem;
right: 5rem;
}
.backwardDot .button {
font: 2rem/1 'icomoon';
position: relative;
top: 0.5rem;
left: 0.5rem;
top: 0.25rem;
left: 0.3rem;
}
.search form {

View File

@@ -1,82 +0,0 @@
.red {
color: #b00;
scrollbar-color: #b00 #440000;
}
.red a {
color: #b00;
}
.red input {
color: #b00;
border: 1px solid #690000;
}
.red input::placeholder {
color: #690000;
}
.red hr {
background-color: #690000;
}
.red button {
background-color: #440000;
border-color: #b00;
color: #b00;
}
.red .item,
.red .slogan {
color: #690000;
}
.red .item .source-logo {
display: none;
}
.red .item a {
color: #690000;
}
.red .item a.link {
color: #b00;
}
.red .item a.link:visited {
color: #690000;
}
.red .item .info a.hot {
color: #cc0000;
}
.red .article a {
border-bottom: 1px solid #aa0000;
}
.red .article u {
border-bottom: 1px solid #aa0000;
text-decoration: none;
}
.red .story-text video,
.red .story-text img {
filter: grayscale(100%) brightness(20%) sepia(100%) hue-rotate(-50deg) saturate(600%) contrast(0.8);
}
.red .article .info {
color: #690000;
}
.red .article .info a {
border-bottom: none;
color: #690000;
}
.red .comment.lined {
border-left: 1px solid #440000;
}
.red .dot {
background-color: #440000;
}

View File

@@ -41,7 +41,7 @@ class Submit extends Component {
<span className='search'>
<form onSubmit={this.submitArticle}>
<input
placeholder='Submit URL'
placeholder='Submit Article'
ref={this.inputRef}
/>
</form>

Binary file not shown.

File diff suppressed because one or more lines are too long