Compare commits

..

55 Commits

Author SHA1 Message Date
9ec61ea5bc Ignore dead and political stories 2025-05-27 18:47:17 +00:00
bdc7a6c10d Fix Better HN api content extraction 2025-02-01 22:39:13 +00:00
4858516b01 Add Better HN as an API backup 2025-02-01 21:42:06 +00:00
f10e6063fc Bug fixes 2025-02-01 20:31:35 +00:00
249a616531 Alert on story update error 2024-03-16 20:41:24 +00:00
ab92bd5441 Adjust score and comment thresholds 2024-03-08 03:08:18 +00:00
6b16a768a7 Fix deletion script 2024-03-08 03:08:03 +00:00
57de076fec Increase database timeout 2024-02-27 18:48:56 +00:00
074b898508 Fix lobsters comment parsing 2024-02-27 18:47:00 +00:00
f049d194ab Move scripts into own folder 2024-02-27 18:32:29 +00:00
c2b9a1cb7a Update readability 2024-02-27 18:32:19 +00:00
4435f49e17 Make "dark" theme grey, add "black" theme 2023-09-13 01:19:47 +00:00
494d89ac30 Disable lobsters 2023-09-13 01:02:15 +00:00
e79fca6ecc Replace "indent_level" with "depth" in lobsters API
See:
fe09e5aa31
2023-08-31 07:35:44 +00:00
c65fb69092 Handle Lobsters comment parsing TypeErrors
Too lazy to debug this:

2023-08-29 12:56:35,111 - root - INFO - Updating lobsters story: yktkwr, index: 55
Traceback (most recent call last):
  File "src/gevent/greenlet.py", line 854, in gevent._gevent_cgreenlet.Greenlet.run
  File "/home/tanner/qotnews/apiserver/server.py", line 194, in feed_thread
    valid = feed.update_story(story)
  File "/home/tanner/qotnews/apiserver/feed.py", line 74, in update_story
    res = lobsters.story(story['ref'])
  File "/home/tanner/qotnews/apiserver/feeds/lobsters.py", line 103, in story
    s['comments'] = iter_comments(r['comments'])
  File "/home/tanner/qotnews/apiserver/feeds/lobsters.py", line 76, in iter_comments
    parent_stack = parent_stack[:indent-1]
TypeError: unsupported operand type(s) for -: 'NoneType' and 'int'
2023-08-29T12:56:35Z <Greenlet at 0x7f92ad840ae0: feed_thread> failed with TypeError
2023-08-31 07:30:39 +00:00
632d028e4c Add Tildes group whitelist 2023-07-13 22:54:36 +00:00
ea8e9e5a23 Increase again 2023-06-13 17:11:50 +00:00
2838ea9b41 Increase Tildes story score requirement 2023-06-11 01:01:31 +00:00
f15d108971 Catch all possible Reddit API exceptions 2023-03-15 21:16:37 +00:00
f777348af8 Fix darkmode fullscreen button color 2022-08-11 19:36:36 +00:00
486404a413 Fix fix-stories bug 2022-08-10 04:06:39 +00:00
7c9c07a4cf Hide fullscreen button if it's not available 2022-08-10 04:05:25 +00:00
08d02f6013 Add fullscreen mode 2022-08-08 23:21:49 +00:00
1b54342702 Add red theme 2022-08-08 20:14:57 +00:00
9e9571a3c0 Write fixed stories to database 2022-07-05 00:57:56 +00:00
dc83a70887 Begin script to fix bad gzip text 2022-07-04 20:32:01 +00:00
2e2c9ae837 Move FEED_LENGTH to settings.py, use for search results 2022-07-04 19:08:24 +00:00
61021d8f91 Small UI changes 2022-07-04 19:08:24 +00:00
e65047fead Add accept gzip header to readability server 2022-07-04 19:07:31 +00:00
8e775c189f Add test file 2022-07-04 05:56:06 +00:00
3d9274309a Fix requests text encoding slowness 2022-07-04 05:55:52 +00:00
7bdbbf10b2 Return search results directly from the server 2022-07-04 04:33:01 +00:00
6aa0f78536 Remove Article / Comments, etc thing after name 2022-07-04 04:33:01 +00:00
bf3663bbec Remove hard-coded title 2022-06-30 00:12:22 +00:00
e6589dc61c Adjust title 2022-06-30 00:05:15 +00:00
307e8349f3 Change header based on page 2022-06-30 00:00:30 +00:00
04cd56daa8 Add index / noindex to client 2022-06-29 23:30:39 +00:00
c80769def6 Add noindex meta tag to stories 2022-06-29 23:20:53 +00:00
ebd1ad2140 Increase database timeout 2022-06-24 20:50:27 +00:00
2cc7dd0d6d Update software 2022-05-31 04:24:12 +00:00
6e7cb86d2e Explain no javascript 2022-05-31 04:23:52 +00:00
a25457254f Improve logging, sends tweets to nitter.net 2022-03-05 23:48:46 +00:00
a693ea5342 Remove outline API 2022-03-05 22:05:29 +00:00
7386e1d8b0 Include option to disable readerserver 2022-03-05 22:04:25 +00:00
f8e8597e3a Include option to disable search 2022-03-05 21:58:35 +00:00
55c282ee69 Fix search to work with low-RAM server 2022-03-05 21:33:07 +00:00
3f774a9e38 Improve logging 2021-09-06 00:21:05 +00:00
dcedd4caa1 Add script to reindex search, abstract search API 2021-09-06 00:20:21 +00:00
7a131ebd03 Change the order by which content-type is grabbed 2021-01-30 06:36:02 +00:00
6f64401785 Add optional skip and limit to API route 2021-01-18 03:59:33 +00:00
3ff917e806 Remove colons from date string so Python 3.5 can parse 2020-12-15 23:19:50 +00:00
c9fb9bd5df Add Lobsters to feed 2020-12-12 05:26:33 +00:00
fd9c9c888d Update gitignore 2020-12-11 23:49:45 +00:00
42dcf15374 Increase sqlite lock timeout 2020-11-19 21:38:18 +00:00
d8a0b77765 Blacklist sec.gov website 2020-11-19 21:37:59 +00:00
41 changed files with 1310 additions and 1234 deletions

View File

@@ -109,4 +109,5 @@ settings.py
data.db
data.db.bak
data/archive/*
data/backup/*
qotnews.sqlite

View File

@@ -1,11 +1,11 @@
from datetime import datetime, timedelta
import json
from sqlalchemy import create_engine, Column, String, ForeignKey, Integer
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy.exc import IntegrityError
from sqlalchemy.types import JSON
engine = create_engine('sqlite:///data/qotnews.sqlite')
engine = create_engine('sqlite:///data/qotnews.sqlite', connect_args={'timeout': 360})
Session = sessionmaker(bind=engine)
Base = declarative_base()
@@ -15,8 +15,8 @@ class Story(Base):
sid = Column(String(16), primary_key=True)
ref = Column(String(16), unique=True)
meta = Column(JSON)
data = Column(JSON)
meta_json = Column(String)
full_json = Column(String)
title = Column(String)
class Reflist(Base):
@@ -36,21 +36,19 @@ def get_story(sid):
def put_story(story):
story = story.copy()
data = {}
data.update(story)
full_json = json.dumps(story)
meta = {}
meta.update(story)
meta.pop('text', None)
meta.pop('comments', None)
story.pop('text', None)
story.pop('comments', None)
meta_json = json.dumps(story)
try:
session = Session()
s = Story(
sid=story['id'],
ref=story['ref'],
data=data,
meta=meta,
full_json=full_json,
meta_json=meta_json,
title=story.get('title', None),
)
session.merge(s)
@@ -65,26 +63,19 @@ def get_story_by_ref(ref):
session = Session()
return session.query(Story).filter(Story.ref==ref).first()
def get_stories_by_url(url):
def get_reflist(amount):
session = Session()
return session.query(Story).\
filter(Story.title != None).\
filter(Story.meta['url'].as_string() == url).\
order_by(Story.meta['date'].desc())
def get_reflist():
session = Session()
q = session.query(Reflist).order_by(Reflist.rid.desc())
q = session.query(Reflist).order_by(Reflist.rid.desc()).limit(amount)
return [dict(ref=x.ref, sid=x.sid, source=x.source) for x in q.all()]
def get_stories(maxage=60*60*24*2):
time = datetime.now().timestamp() - maxage
def get_stories(amount, skip=0):
session = Session()
q = session.query(Reflist, Story.meta).\
q = session.query(Reflist, Story.meta_json).\
order_by(Reflist.rid.desc()).\
join(Story).\
filter(Story.title != None).\
filter(Story.meta['date'] > time).\
order_by(Story.meta['date'].desc())
offset(skip).\
limit(amount)
return [x[1] for x in q]
def put_ref(ref, sid, source):
@@ -110,7 +101,22 @@ def del_ref(ref):
finally:
session.close()
def count_stories():
try:
session = Session()
return session.query(Story).count()
finally:
session.close()
def get_story_list():
try:
session = Session()
return session.query(Story.sid).all()
finally:
session.close()
if __name__ == '__main__':
init()
print(get_story_by_ref('hgi3sy'))
#print(get_story_by_ref('hgi3sy'))
print(len(get_reflist(99999)))

View File

@@ -6,117 +6,84 @@ logging.basicConfig(
import requests
import time
from bs4 import BeautifulSoup
import itertools
import settings
from feeds import hackernews, reddit, tildes, substack, manual, news
from scrapers import outline, declutter, local
from feeds import hackernews, reddit, tildes, manual, lobsters
import utils
INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com']
INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com', 'sec.gov']
TWO_DAYS = 60*60*24*2
substacks = {}
for key, value in settings.SUBSTACK.items():
substacks[key] = substack.Publication(value['url'])
categories = {}
for key, value in settings.CATEGORY.items():
categories[key] = news.Category(value['url'], value.get('tz'))
sitemaps = {}
for key, value in settings.SITEMAP.items():
sitemaps[key] = news.Sitemap(value['url'], value.get('tz'))
def get_list():
feeds = {}
def list():
feed = []
if settings.NUM_HACKERNEWS:
feeds['hackernews'] = [(x, 'hackernews') for x in hackernews.feed()[:settings.NUM_HACKERNEWS]]
feed += [(x, 'hackernews') for x in hackernews.feed()[:settings.NUM_HACKERNEWS]]
if settings.NUM_LOBSTERS:
feed += [(x, 'lobsters') for x in lobsters.feed()[:settings.NUM_LOBSTERS]]
if settings.NUM_REDDIT:
feeds['reddit'] = [(x, 'reddit') for x in reddit.feed()[:settings.NUM_REDDIT]]
feed += [(x, 'reddit') for x in reddit.feed()[:settings.NUM_REDDIT]]
if settings.NUM_TILDES:
feeds['tildes'] = [(x, 'tildes') for x in tildes.feed()[:settings.NUM_TILDES]]
feed += [(x, 'tildes') for x in tildes.feed()[:settings.NUM_TILDES]]
if settings.NUM_SUBSTACK:
feeds['substack'] = [(x, 'substack') for x in substack.top.feed()[:settings.NUM_SUBSTACK]]
for key, publication in substacks.items():
count = settings.SUBSTACK[key]['count']
feeds[key] = [(x, key) for x in publication.feed()[:count]]
for key, sites in categories.items():
count = settings.CATEGORY[key].get('count') or 0
excludes = settings.CATEGORY[key].get('excludes')
tz = settings.CATEGORY[key].get('tz')
feeds[key] = [(x, key) for x in sites.feed(excludes)[:count]]
for key, sites in sitemaps.items():
count = settings.SITEMAP[key].get('count') or 0
excludes = settings.SITEMAP[key].get('excludes')
feeds[key] = [(x, key) for x in sites.feed(excludes)[:count]]
values = feeds.values()
feed = itertools.chain.from_iterable(itertools.zip_longest(*values, fillvalue=None))
feed = list(filter(None, feed))
return feed
def get_article(url):
scrapers = {
'declutter': declutter,
'outline': outline,
'local': local,
}
available = settings.SCRAPERS or ['local']
if 'local' not in available:
available += ['local']
if not settings.READER_URL:
logging.info('Readerserver not configured, aborting.')
return ''
for scraper in available:
if scraper not in scrapers.keys():
continue
try:
html = scrapers[scraper].get_html(url)
if html:
return html
except KeyboardInterrupt:
raise
except:
pass
return ''
if url.startswith('https://twitter.com'):
logging.info('Replacing twitter.com url with nitter.net')
url = url.replace('twitter.com', 'nitter.net')
try:
r = requests.post(settings.READER_URL, data=dict(url=url), timeout=20)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.text
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem getting article: {}'.format(str(e)))
return ''
def get_content_type(url):
try:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'}
return requests.get(url, headers=headers, timeout=5).headers['content-type']
except:
return ''
try:
headers = {
'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
'X-Forwarded-For': '66.249.66.1',
}
return requests.get(url, headers=headers, timeout=5).headers['content-type']
except:
pass
try:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'}
return requests.get(url, headers=headers, timeout=10).headers['content-type']
except:
return ''
pass
def update_story(story, is_manual=False):
res = {}
if story['source'] == 'hackernews':
res = hackernews.story(story['ref'])
elif story['source'] == 'reddit':
res = reddit.story(story['ref'])
elif story['source'] == 'tildes':
res = tildes.story(story['ref'])
elif story['source'] == 'substack':
res = substack.top.story(story['ref'])
elif story['source'] in categories.keys():
res = categories[story['source']].story(story['ref'])
elif story['source'] in sitemaps.keys():
res = sitemaps[story['source']].story(story['ref'])
elif story['source'] in substacks.keys():
res = substacks[story['source']].story(story['ref'])
elif story['source'] == 'manual':
res = manual.story(story['ref'])
try:
if story['source'] == 'hackernews':
res = hackernews.story(story['ref'])
elif story['source'] == 'lobsters':
res = lobsters.story(story['ref'])
elif story['source'] == 'reddit':
res = reddit.story(story['ref'])
elif story['source'] == 'tildes':
res = tildes.story(story['ref'])
elif story['source'] == 'manual':
res = manual.story(story['ref'])
except BaseException as e:
utils.alert_tanner('Problem updating {} story, ref {}: {}'.format(story['source'], story['ref'], str(e)))
logging.exception(e)
return False
if res:
story.update(res) # join dicts
@@ -124,8 +91,8 @@ def update_story(story, is_manual=False):
logging.info('Story not ready yet')
return False
if story['date'] and not is_manual and story['date'] + settings.MAX_STORY_AGE < time.time():
logging.info('Story too old, removing')
if story['date'] and not is_manual and story['date'] + TWO_DAYS < time.time():
logging.info('Story too old, removing. Date: {}'.format(story['date']))
return False
if story.get('url', '') and not story.get('text', ''):
@@ -139,6 +106,12 @@ def update_story(story, is_manual=False):
logging.info(story['url'])
return False
if 'trump' in story['title'].lower() or 'musk' in story['title'].lower():
logging.info('Trump / Musk story, skipping')
logging.info(story['url'])
return False
logging.info('Getting article ' + story['url'])
story['text'] = get_article(story['url'])
if not story['text']: return False
@@ -156,7 +129,7 @@ if __name__ == '__main__':
#print(get_article('https://www.bloomberg.com/news/articles/2019-09-23/xi-s-communists-under-pressure-as-high-prices-hit-china-workers'))
a = get_article('https://blog.joinmastodon.org/2019/10/mastodon-3.0/')
a = get_content_type('https://tefkos.comminfo.rutgers.edu/Courses/e530/Readings/Beal%202008%20full%20text%20searching.pdf')
print(a)
print('done')

View File

@@ -12,7 +12,8 @@ import requests
from utils import clean
API_TOPSTORIES = lambda x: 'https://hacker-news.firebaseio.com/v0/topstories.json'
API_ITEM = lambda x : 'https://hn.algolia.com/api/v1/items/{}'.format(x)
ALG_API_ITEM = lambda x : 'https://hn.algolia.com/api/v1/items/{}'.format(x)
BHN_API_ITEM = lambda x : 'https://api.hnpwa.com/v0/item/{}.json'.format(x)
SITE_LINK = lambda x : 'https://news.ycombinator.com/item?id={}'.format(x)
SITE_AUTHOR_LINK = lambda x : 'https://news.ycombinator.com/user?id={}'.format(x)
@@ -42,7 +43,7 @@ def api(route, ref=None):
def feed():
return [str(x) for x in api(API_TOPSTORIES) or []]
def comment(i):
def alg_comment(i):
if 'author' not in i:
return False
@@ -51,21 +52,25 @@ def comment(i):
c['score'] = i.get('points', 0)
c['date'] = i.get('created_at_i', 0)
c['text'] = clean(i.get('text', '') or '')
c['comments'] = [comment(j) for j in i['children']]
c['comments'] = [alg_comment(j) for j in i['children']]
c['comments'] = list(filter(bool, c['comments']))
return c
def comment_count(i):
def alg_comment_count(i):
alive = 1 if i['author'] else 0
return sum([comment_count(c) for c in i['comments']]) + alive
return sum([alg_comment_count(c) for c in i['comments']]) + alive
def story(ref):
r = api(API_ITEM, ref)
if not r: return False
def alg_story(ref):
r = api(ALG_API_ITEM, ref)
if not r:
logging.info('Bad Algolia Hackernews API response.')
return None
if 'deleted' in r:
logging.info('Story was deleted.')
return False
elif r.get('type', '') != 'story':
logging.info('Type "{}" is not "story".'.format(r.get('type', '')))
return False
s = {}
@@ -76,17 +81,85 @@ def story(ref):
s['title'] = r.get('title', '')
s['link'] = SITE_LINK(ref)
s['url'] = r.get('url', '')
s['comments'] = [comment(i) for i in r['children']]
s['comments'] = [alg_comment(i) for i in r['children']]
s['comments'] = list(filter(bool, s['comments']))
s['num_comments'] = comment_count(s) - 1
s['num_comments'] = alg_comment_count(s) - 1
if 'text' in r and r['text']:
s['text'] = clean(r['text'] or '')
return s
def bhn_comment(i):
if 'user' not in i:
return False
c = {}
c['author'] = i.get('user', '')
c['score'] = 0 # Not present?
c['date'] = i.get('time', 0)
c['text'] = clean(i.get('content', '') or '')
c['comments'] = [bhn_comment(j) for j in i['comments']]
c['comments'] = list(filter(bool, c['comments']))
return c
def bhn_story(ref):
r = api(BHN_API_ITEM, ref)
if not r:
logging.info('Bad BetterHN Hackernews API response.')
return None
if 'deleted' in r: # TODO: verify
logging.info('Story was deleted.')
return False
elif r.get('dead', False):
logging.info('Story was deleted.')
return False
elif r.get('type', '') != 'link':
logging.info('Type "{}" is not "link".'.format(r.get('type', '')))
return False
s = {}
s['author'] = r.get('user', '')
s['author_link'] = SITE_AUTHOR_LINK(r.get('user', ''))
s['score'] = r.get('points', 0)
s['date'] = r.get('time', 0)
s['title'] = r.get('title', '')
s['link'] = SITE_LINK(ref)
s['url'] = r.get('url', '')
if s['url'].startswith('item'):
s['url'] = SITE_LINK(ref)
s['comments'] = [bhn_comment(i) for i in r['comments']]
s['comments'] = list(filter(bool, s['comments']))
s['num_comments'] = r.get('comments_count', 0)
if 'content' in r and r['content']:
s['text'] = clean(r['content'] or '')
return s
def story(ref):
s = alg_story(ref)
if s is None:
s = bhn_story(ref)
if not s:
return False
if s['score'] < 25 and s['num_comments'] < 10:
logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
return False
return s
# scratchpad so I can quickly develop the parser
if __name__ == '__main__':
print(feed())
#print(story(20763961))
#print(story(20802050))
#print(story(42899834)) # type "job"
#print(story(42900076)) # Ask HN
#print(story(42898201)) # Show HN
#print(story(42899703)) # normal
print(story(42902678)) # bad title?

120
apiserver/feeds/lobsters.py Normal file
View File

@@ -0,0 +1,120 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
if __name__ == '__main__':
import sys
sys.path.insert(0,'.')
import requests
from datetime import datetime
from utils import clean
API_HOTTEST = lambda x: 'https://lobste.rs/hottest.json'
API_ITEM = lambda x : 'https://lobste.rs/s/{}.json'.format(x)
SITE_LINK = lambda x : 'https://lobste.rs/s/{}'.format(x)
SITE_AUTHOR_LINK = lambda x : 'https://lobste.rs/u/{}'.format(x)
def api(route, ref=None):
try:
r = requests.get(route(ref), timeout=5)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting lobsters API: {}, trying again'.format(str(e)))
try:
r = requests.get(route(ref), timeout=15)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting lobsters API: {}'.format(str(e)))
return False
def feed():
return [x['short_id'] for x in api(API_HOTTEST) or []]
def unix(date_str):
date_str = date_str.replace(':', '')
return int(datetime.strptime(date_str, '%Y-%m-%dT%H%M%S.%f%z').timestamp())
def make_comment(i):
c = {}
try:
c['author'] = i['commenting_user']
except KeyError:
c['author'] = ''
c['score'] = i.get('score', 0)
try:
c['date'] = unix(i['created_at'])
except KeyError:
c['date'] = 0
c['text'] = clean(i.get('comment', '') or '')
c['comments'] = []
return c
def iter_comments(flat_comments):
nested_comments = []
parent_stack = []
for comment in flat_comments:
c = make_comment(comment)
indent = comment['depth']
if indent == 0:
nested_comments.append(c)
parent_stack = [c]
else:
parent_stack = parent_stack[:indent]
p = parent_stack[-1]
p['comments'].append(c)
parent_stack.append(c)
return nested_comments
def story(ref):
r = api(API_ITEM, ref)
if not r:
logging.info('Bad Lobsters API response.')
return False
s = {}
try:
s['author'] = r['submitter_user']
s['author_link'] = SITE_AUTHOR_LINK(s['author'])
except KeyError:
s['author'] = ''
s['author_link'] = ''
s['score'] = r.get('score', 0)
try:
s['date'] = unix(r['created_at'])
except KeyError:
s['date'] = 0
s['title'] = r.get('title', '')
s['link'] = SITE_LINK(ref)
s['url'] = r.get('url', '')
s['comments'] = iter_comments(r['comments'])
s['num_comments'] = r['comment_count']
if s['score'] < 15 and s['num_comments'] < 10:
logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
return False
if 'description' in r and r['description']:
s['text'] = clean(r['description'] or '')
return s
# scratchpad so I can quickly develop the parser
if __name__ == '__main__':
#print(feed())
import json
print(json.dumps(story('fzvd1v'), indent=4))
#print(json.dumps(story('ixyv5u'), indent=4))

View File

@@ -27,7 +27,9 @@ def api(route):
def story(ref):
html = api(ref)
if not html: return False
if not html:
logging.info('Bad http GET response.')
return False
soup = BeautifulSoup(html, features='html.parser')

View File

@@ -1,231 +0,0 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
if __name__ == '__main__':
import sys
sys.path.insert(0,'.')
import requests
from datetime import datetime
from bs4 import BeautifulSoup
from scrapers import declutter
import dateutil.parser
import extruct
import pytz
from utils import clean
USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'
#USER_AGENT = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
def unix(date_str, tz=None):
try:
dt = dateutil.parser.parse(date_str)
if tz:
dt = pytz.timezone(tz).localize(dt)
return int(dt.timestamp())
except:
pass
return 0
def xml(route, ref=None):
try:
headers = {'User-Agent': USER_AGENT, 'X-Forwarded-For': '66.249.66.1'}
r = requests.get(route(ref), headers=headers, timeout=5)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.text
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting URL: {}'.format(str(e)))
return False
def parse_extruct(s, data):
for rdfa in data['rdfa']:
for key, props in rdfa.items():
if 'http://ogp.me/ns#title' in props:
for values in props['http://ogp.me/ns#title']:
s['title'] = values['@value']
if 'http://ogp.me/ns/article#modified_time' in props:
for values in props['http://ogp.me/ns/article#modified_time']:
s['date'] = values['@value']
if 'http://ogp.me/ns/article#published_time' in props:
for values in props['http://ogp.me/ns/article#published_time']:
s['date'] = values['@value']
for og in data['opengraph']:
titles = list(filter(None, [value if 'og:title' in key else None for key, value in og['properties']]))
modified = list(filter(None, [value if 'article:modified_time' in key else None for key, value in og['properties']]))
published = list(filter(None, [value if 'article:published_time' in key else None for key, value in og['properties']]))
if len(modified):
s['date'] = modified[0]
if len(published):
s['date'] = published[0]
if len(titles):
s['title'] = titles[0]
for md in data['microdata']:
if md['type'] == 'https://schema.org/NewsArticle':
props = md['properties']
s['title'] = props['headline']
if props['dateModified']:
s['date'] = props['dateModified']
if props['datePublished']:
s['date'] = props['datePublished']
if 'author' in props and props['author']:
s['author'] = props['author']['properties']['name']
for ld in data['json-ld']:
if '@type' in ld and ld['@type'] in ['Article', 'NewsArticle']:
s['title'] = ld['headline']
if ld['dateModified']:
s['date'] = ld['dateModified']
if ld['datePublished']:
s['date'] = ld['datePublished']
if 'author' in ld and ld['author']:
s['author'] = ld['author']['name']
if '@graph' in ld:
for gld in ld['@graph']:
if '@type' in gld and gld['@type'] in ['Article', 'NewsArticle']:
s['title'] = gld['headline']
if gld['dateModified']:
s['date'] = gld['dateModified']
if gld['datePublished']:
s['date'] = gld['datePublished']
return s
def comment(i):
if 'author' not in i:
return False
c = {}
c['author'] = i.get('author', '')
c['score'] = i.get('points', 0)
c['date'] = unix(i.get('date', 0))
c['text'] = clean(i.get('text', '') or '')
c['comments'] = [comment(j) for j in i['children']]
c['comments'] = list(filter(bool, c['comments']))
return c
def comment_count(i):
alive = 1 if i['author'] else 0
return sum([comment_count(c) for c in i['comments']]) + alive
class _Base:
def __init__(url, tz=None):
self.url = url
self.tz = tz
def feed(self, excludes=None):
return []
def story(self, ref):
markup = xml(lambda x: ref)
if not markup:
return False
s = {}
s['author_link'] = ''
s['score'] = 0
s['comments'] = []
s['num_comments'] = 0
s['link'] = ref
s['url'] = ref
s['date'] = 0
data = extruct.extract(markup)
s = parse_extruct(s, data)
if s['date']:
s['date'] = unix(s['date'], tz=self.tz)
if 'disqus' in markup:
try:
s['comments'] = declutter.get_comments(ref)
c['comments'] = list(filter(bool, c['comments']))
s['num_comments'] = comment_count(s['comments'])
except KeyboardInterrupt:
raise
except:
pass
if not s['date']:
return False
return s
def get_sitemap_date(a):
if a.find('lastmod'):
return a.find('lastmod').text
if a.find('news:publication_date'):
return a.find('news:publication_date').text
if a.find('ns2:publication_date'):
return a.find('ns2:publication_date').text
return ''
class Sitemap(_Base):
def __init__(self, url, tz=None):
self.tz = tz
self.sitemap_url = url
def feed(self, excludes=None):
markup = xml(lambda x: self.sitemap_url)
if not markup: return []
soup = BeautifulSoup(markup, features='lxml')
sitemap = soup.find('urlset').findAll('url')
links = list(filter(None, [a if a.find('loc') else None for a in sitemap]))
links = list(filter(None, [a if get_sitemap_date(a) else None for a in links]))
links.sort(key=lambda a: unix(get_sitemap_date(a)), reverse=True)
links = [x.find('loc').text for x in links] or []
links = list(set(links))
if excludes:
links = list(filter(None, [None if any(e in link for e in excludes) else link for link in links]))
return links
class Category(_Base):
def __init__(self, url, tz=None):
self.tz = tz
self.category_url = url
self.base_url = '/'.join(url.split('/')[:3])
def feed(self, excludes=None):
markup = xml(lambda x: self.category_url)
if not markup: return []
soup = BeautifulSoup(markup, features='html.parser')
links = soup.find_all('a', href=True)
links = [link.get('href') for link in links]
links = [f"{self.base_url}{link}" if link.startswith('/') else link for link in links]
links = list(filter(None, [link if link.startswith(self.category_url) else None for link in links]))
links = list(filter(None, [link if link != self.category_url else None for link in links]))
links = list(set(links))
if excludes:
links = list(filter(None, [None if any(e in link for e in excludes) else link for link in links]))
return links
# scratchpad so I can quickly develop the parser
if __name__ == '__main__':
print("Sitemap: Stuff")
site = Sitemap("https://www.stuff.co.nz/sitemap/news/sitemap.xml")
posts = site.feed()
print(posts[:5])
print(site.story(posts[0]))
print("Category: RadioNZ Te Ao Māori")
site = Category("https://www.rnz.co.nz/news/te-manu-korihi/")
posts = site.feed()
print(posts[:5])
print(site.story(posts[0]))
print("Sitemap: Newsroom")
site = Sitemap("https://www.newsroom.co.nz/sitemap.xml")
posts = site.feed()
print(posts[:5])
print(site.story(posts[0]))

View File

@@ -32,11 +32,8 @@ def feed():
return [x.id for x in reddit.subreddit(subs).hot()]
except KeyboardInterrupt:
raise
except PRAWException as e:
logging.error('Problem hitting reddit API: {}'.format(str(e)))
return []
except PrawcoreException as e:
logging.error('Problem hitting reddit API: {}'.format(str(e)))
except BaseException as e:
logging.critical('Problem hitting reddit API: {}'.format(str(e)))
return []
def comment(i):
@@ -59,7 +56,9 @@ def comment(i):
def story(ref):
try:
r = reddit.submission(ref)
if not r: return False
if not r:
logging.info('Bad Reddit API response.')
return False
s = {}
s['author'] = r.author.name if r.author else '[Deleted]'
@@ -74,6 +73,7 @@ def story(ref):
s['num_comments'] = r.num_comments
if s['score'] < 25 and s['num_comments'] < 10:
logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
return False
if r.selftext:
@@ -84,10 +84,10 @@ def story(ref):
except KeyboardInterrupt:
raise
except PRAWException as e:
logging.error('Problem hitting reddit API: {}'.format(str(e)))
logging.critical('Problem hitting reddit API: {}'.format(str(e)))
return False
except PrawcoreException as e:
logging.error('Problem hitting reddit API: {}'.format(str(e)))
logging.critical('Problem hitting reddit API: {}'.format(str(e)))
return False
# scratchpad so I can quickly develop the parser

View File

@@ -1,165 +0,0 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
if __name__ == '__main__':
import sys
sys.path.insert(0,'.')
import requests
from datetime import datetime
from utils import clean
SUBSTACK_REFERER = 'https://substack.com'
SUBSTACK_API_TOP_POSTS = lambda x: "https://substack.com/api/v1/reader/top-posts"
def author_link(author_id, base_url):
return f"{base_url}/people/{author_id}"
def api_comments(post_id, base_url):
return f"{base_url}/api/v1/post/{post_id}/comments?all_comments=true&sort=best_first"
def api_stories(x, base_url):
return f"{base_url}/api/v1/archive?sort=new&search=&offset=0&limit=100"
def unix(date_str):
return int(datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S.%fZ').timestamp())
def api(route, ref=None, referer=None):
headers = {'Referer': referer} if referer else None
try:
r = requests.get(route(ref), headers=headers, timeout=10)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting Substack API: {}, trying again'.format(str(e)))
try:
r = requests.get(route(ref), headers=headers, timeout=20)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting Substack API: {}'.format(str(e)))
return False
def comment(i):
if 'body' not in i:
return False
c = {}
c['date'] = unix(i.get('date'))
c['author'] = i.get('name', '')
c['score'] = i.get('reactions').get('')
c['text'] = clean(i.get('body', '') or '')
c['comments'] = [comment(j) for j in i['children']]
c['comments'] = list(filter(bool, c['comments']))
return c
class Publication:
def __init__(self, domain):
self.BASE_DOMAIN = domain
def feed(self):
stories = api(lambda x: api_stories(x, self.BASE_DOMAIN), referer=self.BASE_DOMAIN)
if not stories: return []
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
return [str(i.get("id")) for i in stories or []]
def story(self, ref):
stories = api(lambda x: api_stories(x, self.BASE_DOMAIN), referer=self.BASE_DOMAIN)
if not stories: return False
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
stories = list(filter(None, [i if str(i.get('id')) == ref else None for i in stories]))
if len(stories) == 0:
return False
r = stories[0]
if not r:
return False
s = {}
s['author'] = ''
s['author_link'] = ''
s['date'] = unix(r.get('post_date'))
s['score'] = r.get('reactions').get('')
s['title'] = r.get('title', '')
s['link'] = r.get('canonical_url', '')
s['url'] = r.get('canonical_url', '')
comments = api(lambda x: api_comments(x, self.BASE_DOMAIN), r.get('id'), referer=self.BASE_DOMAIN)
s['comments'] = [comment(i) for i in comments.get('comments')]
s['comments'] = list(filter(bool, s['comments']))
s['num_comments'] = r.get('comment_count', 0)
authors = list(filter(None, [self._bylines(byline) for byline in r.get('publishedBylines')]))
if len(authors):
s['author'] = authors[0].get('name')
s['author_link'] = authors[0].get('link')
return s
def _bylines(self, b):
if 'id' not in b:
return None
a = {}
a['name'] = b.get('name')
a['link'] = author_link(b.get('id'), self.BASE_DOMAIN)
return a
class Top:
def feed(self):
stories = api(SUBSTACK_API_TOP_POSTS, referer=SUBSTACK_REFERER)
if not stories: return []
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
return [str(i.get("id")) for i in stories or []]
def story(self, ref):
stories = api(SUBSTACK_API_TOP_POSTS, referer=SUBSTACK_REFERER)
if not stories: return False
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
stories = list(filter(None, [i if str(i.get('id')) == ref else None for i in stories]))
if len(stories) == 0:
return False
r = stories[0]
if not r:
return False
s = {}
pub = r.get('pub')
base_url = pub.get('base_url')
s['author'] = pub.get('author_name')
s['author_link'] = author_link(pub.get('author_id'), base_url)
s['date'] = unix(r.get('post_date'))
s['score'] = r.get('score')
s['title'] = r.get('title', '')
s['link'] = r.get('canonical_url', '')
s['url'] = r.get('canonical_url', '')
comments = api(lambda x: api_comments(x, base_url), r.get('id'), referer=SUBSTACK_REFERER)
s['comments'] = [comment(i) for i in comments.get('comments')]
s['comments'] = list(filter(bool, s['comments']))
s['num_comments'] = r.get('comment_count', 0)
return s
top = Top()
# scratchpad so I can quickly develop the parser
if __name__ == '__main__':
top_posts = top.feed()
print(top.story(top_posts[0]))
webworm = Publication("https://www.webworm.co/")
posts = webworm.feed()
print(webworm.story(posts[0]))

View File

@@ -34,7 +34,7 @@ def api(route):
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting tildes website: {}'.format(str(e)))
logging.critical('Problem hitting tildes website: {}'.format(str(e)))
return False
def feed():
@@ -71,11 +71,15 @@ def story(ref):
html = api(SITE_LINK(group_lookup[ref], ref))
else:
html = api(API_ITEM(ref))
if not html: return False
if not html:
logging.info('Bad Tildes API response.')
return False
soup = BeautifulSoup(html, features='html.parser')
a = soup.find('article', class_='topic-full')
if a is None: return False
if a is None:
logging.info('Tildes <article> element not found.')
return False
h = a.find('header')
lu = h.find('a', class_='link-user')
@@ -83,6 +87,7 @@ def story(ref):
error = a.find('div', class_='text-error')
if error:
if 'deleted' in error.string or 'removed' in error.string:
logging.info('Article was deleted or removed.')
return False
s = {}
@@ -102,7 +107,21 @@ def story(ref):
ch = a.find('header', class_='topic-comments-header')
s['num_comments'] = int(ch.h2.string.split(' ')[0]) if ch else 0
if s['score'] < 8 and s['num_comments'] < 6:
if s['group'].split('.')[0] not in [
'~arts',
'~comp',
'~creative',
'~design',
'~engineering',
'~finance',
'~science',
'~tech',
]:
logging.info('Group ({}) not in whitelist.'.format(s['group']))
return False
if s['score'] < 15 and s['num_comments'] < 10:
logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
return False
td = a.find('div', class_='topic-full-text')
@@ -113,7 +132,7 @@ def story(ref):
# scratchpad so I can quickly develop the parser
if __name__ == '__main__':
#print(feed())
print(feed())
#normal = story('gxt')
#print(normal)
#no_comments = story('gxr')
@@ -122,8 +141,8 @@ if __name__ == '__main__':
#print(self_post)
#li_comment = story('gqx')
#print(li_comment)
broken = story('q4y')
print(broken)
#broken = story('q4y')
#print(broken)
# make sure there's no self-reference
#import copy

View File

@@ -4,7 +4,6 @@ certifi==2020.6.20
chardet==3.0.4
click==7.1.2
commonmark==0.9.1
extruct==0.10.0
Flask==1.1.2
Flask-Cors==3.0.8
gevent==20.6.2
@@ -12,13 +11,11 @@ greenlet==0.4.16
idna==2.10
itsdangerous==1.1.0
Jinja2==2.11.2
lxml==4.6.1
MarkupSafe==1.1.1
packaging==20.4
praw==6.4.0
prawcore==1.4.0
pyparsing==2.4.7
pytz==2020.4
requests==2.24.0
six==1.15.0
soupsieve==2.0.1
@@ -30,4 +27,3 @@ websocket-client==0.57.0
Werkzeug==1.0.1
zope.event==4.4
zope.interface==5.1.0
python-dateutil==2.8.1

View File

@@ -1,41 +0,0 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
import requests
DECLUTTER_API = 'https://declutter.1j.nz/details'
DECLUTTER_COMMENT_API = 'https://declutter.1j.nz/comments'
TIMEOUT = 30
def get_html(url):
logging.info(f"Declutter Scraper: {url}")
details = get_details(url)
if not details:
return ''
return details['content']
def get_details(url):
try:
r = requests.post(DECLUTTER_API, data=dict(url=url), timeout=TIMEOUT)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem decluttering article: {}'.format(str(e)))
return None
def get_comments(url):
try:
r = requests.post(DECLUTTER_COMMENT_API, data=dict(url=url), timeout=TIMEOUT)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem getting comments for article: {}'.format(str(e)))
return None

View File

@@ -1,27 +0,0 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
import requests
READ_API = 'http://127.0.0.1:33843/details'
TIMEOUT = 20
def get_html(url):
logging.info(f"Local Scraper: {url}")
details = get_details(url)
if not details:
return ''
return details['content']
def get_details(url):
try:
r = requests.post(READ_API, data=dict(url=url), timeout=TIMEOUT)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem getting article: {}'.format(str(e)))
return None

View File

@@ -1,37 +0,0 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
import requests
OUTLINE_REFERER = 'https://outline.com/'
OUTLINE_API = 'https://api.outline.com/v3/parse_article'
TIMEOUT = 20
def get_html(url):
details = get_details(url)
if not details:
return ''
return details['html']
def get_details(url):
try:
logging.info(f"Outline Scraper: {url}")
params = {'source_url': url}
headers = {'Referer': OUTLINE_REFERER}
r = requests.get(OUTLINE_API, params=params, headers=headers, timeout=TIMEOUT)
if r.status_code == 429:
logging.info('Rate limited by outline, sleeping 30s and skipping...')
time.sleep(30)
return None
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
data = r.json()['data']
if 'URL is not supported by Outline' in data['html']:
raise Exception('URL not supported by Outline')
return data
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem outlining article: {}'.format(str(e)))
return None

View File

@@ -1,6 +1,8 @@
import database
import search
import sys
import settings
import logging
import json
import requests
@@ -21,7 +23,7 @@ def database_del_story(sid):
def search_del_story(sid):
try:
r = requests.delete(search.MEILI_URL + 'indexes/qotnews/documents/'+sid, timeout=2)
r = requests.delete(settings.MEILI_URL + 'indexes/qotnews/documents/'+sid, timeout=2)
if r.status_code != 202:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()

View File

@@ -0,0 +1,58 @@
import time
import json
import logging
import feed
import database
import search
database.init()
def fix_gzip_bug(story_list):
FIX_THRESHOLD = 150
count = 1
for sid in story_list:
try:
sid = sid[0]
story = database.get_story(sid)
full_json = json.loads(story.full_json)
meta_json = json.loads(story.meta_json)
text = full_json.get('text', '')
count = text.count('<EFBFBD>')
if not count: continue
ratio = count / len(text) * 1000
print('Bad story:', sid, 'Num ?:', count, 'Ratio:', ratio)
if ratio < FIX_THRESHOLD: continue
print('Attempting to fix...')
valid = feed.update_story(meta_json, is_manual=True)
if valid:
database.put_story(meta_json)
search.put_story(meta_json)
print('Success')
else:
print('Story was not valid')
time.sleep(3)
except KeyboardInterrupt:
raise
except BaseException as e:
logging.exception(e)
breakpoint()
if __name__ == '__main__':
num_stories = database.count_stories()
print('Fix {} stories?'.format(num_stories))
print('Press ENTER to continue, ctrl-c to cancel')
input()
story_list = database.get_story_list()
fix_gzip_bug(story_list)

View File

@@ -0,0 +1,62 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.INFO)
import database
from sqlalchemy import select
import search
import sys
import time
import json
import requests
database.init()
search.init()
BATCH_SIZE = 5000
def put_stories(stories):
return search.meili_api(requests.post, 'indexes/qotnews/documents', stories)
def get_update(update_id):
return search.meili_api(requests.get, 'tasks/{}'.format(update_id))
if __name__ == '__main__':
num_stories = database.count_stories()
print('Reindex {} stories?'.format(num_stories))
print('Press ENTER to continue, ctrl-c to cancel')
input()
story_list = database.get_story_list()
count = 1
while len(story_list):
stories = []
for _ in range(BATCH_SIZE):
try:
sid = story_list.pop()
except IndexError:
break
story = database.get_story(sid)
print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title))
story_obj = json.loads(story.meta_json)
stories.append(story_obj)
count += 1
res = put_stories(stories)
update_id = res['uid']
print('Waiting for processing', end='')
while get_update(update_id)['status'] != 'succeeded':
time.sleep(0.5)
print('.', end='', flush=True)
print()
print('Done.')

View File

@@ -0,0 +1,23 @@
import time
import requests
def test_search_api():
num_tests = 100
total_time = 0
for i in range(num_tests):
start = time.time()
res = requests.get('http://127.0.0.1:33842/api/search?q=iphone')
res.raise_for_status()
duration = time.time() - start
total_time += duration
avg_time = total_time / num_tests
print('Average search time:', avg_time)
if __name__ == '__main__':
test_search_api()

View File

@@ -4,83 +4,62 @@ logging.basicConfig(
level=logging.DEBUG)
import requests
import settings
MEILI_URL = 'http://127.0.0.1:7700/'
SEARCH_ENABLED = bool(settings.MEILI_URL)
def meili_api(method, route, json=None, params=None, parse_json=True):
try:
r = method(settings.MEILI_URL + route, json=json, params=params, timeout=4)
if r.status_code > 299:
raise Exception('Bad response code ' + str(r.status_code))
if parse_json:
return r.json()
else:
r.encoding = 'utf-8'
return r.text
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem with MeiliSearch api route: %s: %s', route, str(e))
return False
def create_index():
try:
json = dict(name='qotnews', uid='qotnews')
r = requests.post(MEILI_URL + 'indexes', json=json, timeout=2)
if r.status_code != 201:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem creating MeiliSearch index: {}'.format(str(e)))
return False
json = dict(uid='qotnews', primaryKey='id')
return meili_api(requests.post, 'indexes', json=json)
def update_rankings():
try:
json = ['typo', 'words', 'proximity', 'attribute', 'desc(date)', 'wordsPosition', 'exactness']
r = requests.post(MEILI_URL + 'indexes/qotnews/settings/ranking-rules', json=json, timeout=2)
if r.status_code != 202:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem setting MeiliSearch ranking rules: {}'.format(str(e)))
return False
json = ['typo', 'words', 'proximity', 'date:desc', 'exactness']
return meili_api(requests.post, 'indexes/qotnews/settings/ranking-rules', json=json)
def update_attributes():
try:
json = ['title', 'url', 'author', 'link', 'id']
r = requests.post(MEILI_URL + 'indexes/qotnews/settings/searchable-attributes', json=json, timeout=2)
if r.status_code != 202:
raise Exception('Bad response code ' + str(r.status_code))
requests.delete(MEILI_URL + 'indexes/qotnews/settings/displayed-attributes', timeout=2)
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem setting MeiliSearch searchable attributes: {}'.format(str(e)))
return False
json = ['title', 'url', 'author']
r = meili_api(requests.post, 'indexes/qotnews/settings/searchable-attributes', json=json)
json = ['id', 'ref', 'source', 'author', 'author_link', 'score', 'date', 'title', 'link', 'url', 'num_comments']
r = meili_api(requests.post, 'indexes/qotnews/settings/displayed-attributes', json=json)
return r
def init():
create_index()
if not SEARCH_ENABLED:
logging.info('Search is not enabled, skipping init.')
return
print(create_index())
update_rankings()
update_attributes()
def put_story(story):
story = story.copy()
story.pop('text', None)
story.pop('comments', None)
try:
r = requests.post(MEILI_URL + 'indexes/qotnews/documents', json=[story], timeout=2)
if r.status_code != 202:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem putting MeiliSearch story: {}'.format(str(e)))
return False
if not SEARCH_ENABLED: return
return meili_api(requests.post, 'indexes/qotnews/documents', [story])
def search(q):
try:
params = dict(q=q, limit=250)
r = requests.get(MEILI_URL + 'indexes/qotnews/search', params=params, timeout=2)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()['hits']
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem searching MeiliSearch: {}'.format(str(e)))
return False
if not SEARCH_ENABLED: return []
params = dict(q=q, limit=settings.FEED_LENGTH)
r = meili_api(requests.get, 'indexes/qotnews/search', params=params, parse_json=False)
return r
if __name__ == '__main__':
create_index()
init()
print(search('the'))
print(update_rankings())
print(search('facebook'))

View File

@@ -28,6 +28,8 @@ from flask_cors import CORS
database.init()
search.init()
news_index = 0
def new_id():
nid = gen_rand_id()
while database.get_story(nid):
@@ -40,8 +42,11 @@ cors = CORS(flask_app)
@flask_app.route('/api')
def api():
stories = database.get_stories(settings.MAX_STORY_AGE)
res = Response(json.dumps({"stories": stories}))
skip = request.args.get('skip', 0)
limit = request.args.get('limit', settings.FEED_LENGTH)
stories = database.get_stories(limit, skip)
# hacky nested json
res = Response('{"stories":[' + ','.join(stories) + ']}')
res.headers['content-type'] = 'application/json'
return res
@@ -51,8 +56,10 @@ def apisearch():
if len(q) >= 3:
results = search.search(q)
else:
results = []
return dict(results=results)
results = '[]'
res = Response(results)
res.headers['content-type'] = 'application/json'
return res
@flask_app.route('/api/submit', methods=['POST'], strict_slashes=False)
def submit():
@@ -60,6 +67,8 @@ def submit():
url = request.form['url']
nid = new_id()
logging.info('Manual submission: ' + url)
parse = urlparse(url)
if 'news.ycombinator.com' in parse.hostname:
source = 'hackernews'
@@ -67,6 +76,9 @@ def submit():
elif 'tildes.net' in parse.hostname and '~' in url:
source = 'tildes'
ref = parse.path.split('/')[2]
elif 'lobste.rs' in parse.hostname and '/s/' in url:
source = 'lobsters'
ref = parse.path.split('/')[2]
elif 'reddit.com' in parse.hostname and 'comments' in url:
source = 'reddit'
ref = parse.path.split('/')[4]
@@ -99,9 +111,8 @@ def submit():
def story(sid):
story = database.get_story(sid)
if story:
related = database.get_stories_by_url(story.meta['url'])
related = [r.meta for r in related]
res = Response(json.dumps({"story": story.data, "related": related}))
# hacky nested json
res = Response('{"story":' + story.full_json + '}')
res.headers['content-type'] = 'application/json'
return res
else:
@@ -111,9 +122,11 @@ def story(sid):
@flask_app.route('/search')
def index():
return render_template('index.html',
title='Feed',
url='news.t0.vc',
description='Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode')
title='QotNews',
url='news.t0.vc',
description='Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode',
robots='index',
)
@flask_app.route('/<sid>', strict_slashes=False)
@flask_app.route('/<sid>/c', strict_slashes=False)
@@ -125,7 +138,7 @@ def static_story(sid):
story = database.get_story(sid)
if not story: return abort(404)
story = story.data
story = json.loads(story.full_json)
score = story['score']
num_comments = story['num_comments']
@@ -138,65 +151,71 @@ def static_story(sid):
url = url.replace('www.', '')
return render_template('index.html',
title=story['title'],
url=url,
description=description)
title=story['title'] + ' | QotNews',
url=url,
description=description,
robots='noindex',
)
http_server = WSGIServer(('', 33842), flask_app)
def _add_new_refs():
for ref, source in feed.get_list():
if database.get_story_by_ref(ref):
continue
try:
nid = new_id()
database.put_ref(ref, nid, source)
logging.info('Added ref ' + ref)
except database.IntegrityError:
continue
def _update_current_story(item):
try:
story = database.get_story(item['sid']).data
except AttributeError:
story = dict(id=item['sid'], ref=item['ref'], source=item['source'])
logging.info('Updating story: {}'.format(str(story['ref'])))
valid = feed.update_story(story)
if valid:
database.put_story(story)
search.put_story(story)
else:
database.del_ref(item['ref'])
logging.info('Removed ref {}'.format(item['ref']))
def feed_thread():
ref_list = []
global news_index
try:
while True:
# onboard new stories
if not len(ref_list):
_add_new_refs()
ref_list = database.get_reflist()
if news_index == 0:
for ref, source in feed.list():
if database.get_story_by_ref(ref):
continue
try:
nid = new_id()
logging.info('Adding ref: {}, id: {}, source: {}'.format(ref, nid, source))
database.put_ref(ref, nid, source)
except database.IntegrityError:
logging.info('Already have ID / ref, skipping.')
continue
ref_list = database.get_reflist(settings.FEED_LENGTH)
# update current stories
if len(ref_list):
item = ref_list.pop(0)
_update_current_story(item)
if news_index < len(ref_list):
item = ref_list[news_index]
try:
story_json = database.get_story(item['sid']).full_json
story = json.loads(story_json)
except AttributeError:
story = dict(id=item['sid'], ref=item['ref'], source=item['source'])
logging.info('Updating {} story: {}, index: {}'.format(story['source'], story['ref'], news_index))
valid = feed.update_story(story)
if valid:
database.put_story(story)
search.put_story(story)
else:
database.del_ref(item['ref'])
logging.info('Removed ref {}'.format(item['ref']))
else:
logging.info('Skipping index: ' + str(news_index))
gevent.sleep(6)
news_index += 1
if news_index == settings.FEED_LENGTH: news_index = 0
except KeyboardInterrupt:
logging.info('Ending feed thread...')
except ValueError as e:
logging.error('feed_thread error: {} {}'.format(e.__class__.__name__, e))
logging.critical('feed_thread error: {} {}'.format(e.__class__.__name__, e))
http_server.stop()
print('Starting Feed thread...')
logging.info('Starting Feed thread...')
gevent.spawn(feed_thread)
print('Starting HTTP thread...')
logging.info('Starting HTTP thread...')
try:
http_server.serve_forever()
except KeyboardInterrupt:

View File

@@ -1,28 +1,23 @@
# QotNews settings
# edit this file and save it as settings.py
MAX_STORY_AGE = 3*24*60*60
# Feed Lengths
# Number of top items from each site to pull
# set to 0 to disable that site
FEED_LENGTH = 75
NUM_HACKERNEWS = 15
NUM_REDDIT = 10
NUM_LOBSTERS = 10
NUM_REDDIT = 15
NUM_TILDES = 5
NUM_SUBSTACK = 10
SITEMAP = {}
# SITEMAP['nzherald'] = { 'url': "https://www.nzherald.co.nz/arcio/news-sitemap/", 'count': 10},
# SITEMAP['stuff'] = { 'url': "https://www.stuff.co.nz/sitemap.xml", 'count': 10},
# Meilisearch server URL
# Leave blank if not using search
#MEILI_URL = 'http://127.0.0.1:7700/'
MEILI_URL = ''
SUBSTACK = {}
# SUBSTACK['webworm'] = { 'url': "https://www.webworm.co", 'count': 10},
# SUBSTACK['the bulletin'] = { 'url': "https://thespinoff.substack.com", 'count': 10},
CATEGORY = {}
# CATEGORY['rnz national'] = { 'url': "https://www.rnz.co.nz/news/national", 'count': 10},
SCRAPERS = ['declutter', 'outline', 'local']
# Readerserver URL
# Leave blank if not using, but that defeats the whole point
READER_URL = 'http://127.0.0.1:33843/'
# Reddit account info
# leave blank if not using Reddit
@@ -38,13 +33,9 @@ SUBREDDITS = [
'HistoryofIdeas',
'LaymanJournals',
'PhilosophyofScience',
'PoliticsPDFs',
'Scholar',
'StateOfTheUnion',
'TheAgora',
'TrueFilm',
'TrueReddit',
'UniversityofReddit',
'culturalstudies',
'hardscience',
'indepthsports',
@@ -53,4 +44,7 @@ SUBREDDITS = [
'neurophilosophy',
'resilientcommunities',
'worldevents',
'StallmanWasRight',
'EverythingScience',
'longevity',
]

View File

@@ -8,6 +8,14 @@ import string
from bleach.sanitizer import Cleaner
def alert_tanner(message):
try:
logging.info('Alerting Tanner: ' + message)
params = dict(qotnews=message)
requests.get('https://tbot.tannercollin.com/message', params=params, timeout=4)
except BaseException as e:
logging.error('Problem alerting Tanner: ' + str(e))
def gen_rand_id():
return ''.join(random.choice(string.ascii_uppercase) for _ in range(4))

View File

@@ -1,14 +1,53 @@
const port = 33843;
const express = require('express');
const app = express();
const simple = require('./simple');
const port = 33843;
const request = require('request');
const JSDOM = require('jsdom').JSDOM;
const { Readability } = require('readability');
app.use(express.urlencoded({ extended: true }));
app.get('/', (req, res) => res.send(simple.FORM));
app.post('/', (req, res) => simple.scrape(req, res));
app.post('/details', (req, res) => simple.details(req, res));
// app.post('/browser', (req, res) => browser.scrape(req, res));
// app.post('/browser/details', (req, res) => browser.details(req, res));
app.get('/', (req, res) => {
res.send('<form method="POST" accept-charset="UTF-8"><input name="url"><button type="submit">SUBMIT</button></form>');
});
const requestCallback = (url, res) => (error, response, body) => {
if (!error && response.statusCode == 200) {
console.log('Response OK.');
const doc = new JSDOM(body, {url: url});
const reader = new Readability(doc.window.document);
const article = reader.parse();
if (article && article.content) {
res.send(article.content);
} else {
res.sendStatus(404);
}
} else {
console.log('Response error:', error ? error.toString() : response.statusCode);
res.sendStatus(response ? response.statusCode : 404);
}
};
app.post('/', (req, res) => {
const url = req.body.url;
const requestOptions = {
url: url,
gzip: true,
//headers: {'User-Agent': 'Googlebot/2.1 (+http://www.google.com/bot.html)'},
//headers: {'User-Agent': 'Twitterbot/1.0'},
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0',
'X-Forwarded-For': '66.249.66.1',
},
};
console.log('Parse request for:', url);
request(requestOptions, requestCallback(url, res));
});
app.listen(port, () => {
console.log(`Example app listening on port ${port}!`);

View File

@@ -1,43 +0,0 @@
const request = require('request');
const JSDOM = require('jsdom').JSDOM;
const { Readability } = require('readability');
const options = url => ({
url: url,
headers: {
'User-Agent': 'Googlebot/2.1 (+http://www.google.com/bot.html)',
'X-Forwarded-For': '66.249.66.1',
},
});
const extract = (url, body) => {
const doc = new JSDOM(body, { url: url });
const reader = new Readability(doc.window.document);
return reader.parse();
};
module.exports.FORM = '<form method="POST" action="/" accept-charset="UTF-8"><input name="url"><button type="submit">SUBMIT</button></form>';
module.exports.scrape = (req, res) => request(options(req.body.url), (error, response, body) => {
if (error || response.statusCode != 200) {
console.log('Response error:', error ? error.toString() : response.statusCode);
return res.sendStatus(response ? response.statusCode : 404);
}
const article = extract(url, body);
if (article && article.content) {
return res.send(article.content);
}
return res.sendStatus(404);
});
module.exports.details = (req, res) => request(options(req.body.url), (error, response, body) => {
if (error || response.statusCode != 200) {
console.log('Response error:', error ? error.toString() : response.statusCode);
return res.sendStatus(response ? response.statusCode : 404);
}
const article = extract(url, body);
if (article) {
return res.send(article);
}
return res.sendStatus(404);
});

File diff suppressed because it is too large Load Diff

View File

@@ -3,7 +3,7 @@
Download MeiliSearch with:
```
wget https://github.com/meilisearch/MeiliSearch/releases/download/v0.11.1/meilisearch-linux-amd64
wget https://github.com/meilisearch/meilisearch/releases/download/v0.27.0/meilisearch-linux-amd64
chmod +x meilisearch-linux-amd64
```

View File

@@ -8,6 +8,8 @@
content="{{ description }}"
/>
<meta content="{{ url }}" name="og:site_name">
<meta name="robots" content="{{ robots }}">
<link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
<link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">
@@ -26,7 +28,7 @@
work correctly both with client-side routing and a non-root public URL.
Learn how to configure a non-root public URL by running `npm run build`.
-->
<title>{{ title }} - QotNews</title>
<title>{{ title }}</title>
<style>
html {
@@ -37,13 +39,23 @@
}
.nojs {
color: white;
max-width: 32rem;
}
</style>
</head>
<body>
<div class="nojs">
<noscript>You need to enable JavaScript to run this app.</noscript>
<noscript>
You need to enable JavaScript to run this app because it's written in React.
I was planning on writing a server-side version, but I've become distracted
by other projects -- sorry!
<br/>
I originally wrote this for myself, and of course I whitelist JavaScript on
all my own domains.
<br/><br/>
Alternatively, try activex.news.t0.vc for an ActiveX™ version.
</noscript>
</div>
<div id="root"></div>
<!--

View File

@@ -3,8 +3,10 @@ import { BrowserRouter as Router, Route, Link, Switch } from 'react-router-dom';
import localForage from 'localforage';
import './Style-light.css';
import './Style-dark.css';
import './Style-black.css';
import './Style-red.css';
import './fonts/Fonts.css';
import { ForwardDot } from './utils.js';
import { BackwardDot, ForwardDot } from './utils.js';
import Feed from './Feed.js';
import Article from './Article.js';
import Comments from './Comments.js';
@@ -38,6 +40,16 @@ class App extends React.Component {
localStorage.setItem('theme', 'dark');
}
black() {
this.setState({ theme: 'black' });
localStorage.setItem('theme', 'black');
}
red() {
this.setState({ theme: 'red' });
localStorage.setItem('theme', 'red');
}
componentDidMount() {
if (!this.cache.length) {
localForage.iterate((value, key) => {
@@ -47,22 +59,61 @@ class App extends React.Component {
}
}
goFullScreen() {
if ('wakeLock' in navigator) {
navigator.wakeLock.request('screen');
}
document.body.requestFullscreen({ navigationUI: 'hide' }).then(() => {
window.addEventListener('resize', () => this.forceUpdate());
this.forceUpdate();
});
};
exitFullScreen() {
document.exitFullscreen().then(() => {
this.forceUpdate();
});
};
render() {
const theme = this.state.theme;
document.body.style.backgroundColor = theme === 'dark' ? '#000' : '#eeeeee';
if (theme === 'dark') {
document.body.style.backgroundColor = '#1a1a1a';
} else if (theme === 'black') {
document.body.style.backgroundColor = '#000';
} else if (theme === 'red') {
document.body.style.backgroundColor = '#000';
} else {
document.body.style.backgroundColor = '#eeeeee';
}
const fullScreenAvailable = document.fullscreenEnabled ||
document.mozFullscreenEnabled ||
document.webkitFullscreenEnabled ||
document.msFullscreenEnabled;
return (
<div className={theme}>
<Router>
<div className='container menu'>
<p>
<Link to='/'>QotNews - Feed</Link>
<span className='theme'>Theme: <a href='#' onClick={() => this.light()}>Light</a> - <a href='#' onClick={() => this.dark()}>Dark</a></span>
<Link to='/'>QotNews</Link>
<span className='theme'><a href='#' onClick={() => this.light()}>Light</a> - <a href='#' onClick={() => this.dark()}>Dark</a> - <a href='#' onClick={() => this.black()}>Black</a> - <a href='#' onClick={() => this.red()}>Red</a></span>
<br />
<span className='slogan'>Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode.</span>
<span className='slogan'>Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode.</span>
</p>
<Route path='/(|search)' component={Search} />
<Route path='/(|search)' component={Submit} />
{fullScreenAvailable &&
<Route path='/(|search)' render={() => !document.fullscreenElement ?
<button className='fullscreen' onClick={() => this.goFullScreen()}>Enter Fullscreen</button>
:
<button className='fullscreen' onClick={() => this.exitFullScreen()}>Exit Fullscreen</button>
} />
}
</div>
<Route path='/' exact render={(props) => <Feed {...props} updateCache={this.updateCache} />} />
@@ -72,6 +123,7 @@ class App extends React.Component {
</Switch>
<Route path='/:id/c' exact render={(props) => <Comments {...props} cache={this.cache} />} />
<BackwardDot />
<ForwardDot />
<ScrollToTop />

View File

@@ -67,7 +67,8 @@ class Article extends React.Component {
{story ?
<div className='article'>
<Helmet>
<title>{story.title} - QotNews</title>
<title>{story.title} | QotNews</title>
<meta name="robots" content="noindex" />
</Helmet>
<h1>{story.title}</h1>

View File

@@ -72,7 +72,7 @@ class Article extends React.Component {
}
displayComment(story, c, level) {
const cid = c.author + c.date;
const cid = c.author+c.date;
const collapsed = this.state.collapsed.includes(cid);
const expanded = this.state.expanded.includes(cid);
@@ -85,22 +85,19 @@ class Article extends React.Component {
<div className='info'>
<p>
{c.author === story.author ? '[OP]' : ''} {c.author || '[Deleted]'}
{' '} | <HashLink to={'#' + cid} id={cid}>{moment.unix(c.date).fromNow()}</HashLink>
{' '} | <HashLink to={'#'+cid} id={cid}>{moment.unix(c.date).fromNow()}</HashLink>
{hasChildren && (
hidden ?
<span className='collapser expander pointer' onClick={() => this.expandComment(cid)}>+</span>
:
<span className='collapser pointer' onClick={() => this.collapseComment(cid)}></span>
)}
{hidden || hasChildren &&
<span className='collapser pointer' onClick={() => this.collapseComment(cid)}></span>
}
</p>
</div>
<div className={collapsed ? 'text hidden' : 'text'} dangerouslySetInnerHTML={{ __html: c.text }} />
<div className={collapsed ? 'text hidden' : 'text'} dangerouslySetInnerHTML={{ __html: c.text }} />
{hidden && hasChildren ?
<div className='comment lined info pointer' onClick={() => this.expandComment(cid)}>[show {this.countComments(c) - 1} more]</div>
:
<div className='comment lined info pointer' onClick={() => this.expandComment(cid)}>[show {this.countComments(c)-1} more]</div>
:
c.comments.map(i => this.displayComment(story, i, level + 1))
}
</div>
@@ -118,7 +115,8 @@ class Article extends React.Component {
{story ?
<div className='article'>
<Helmet>
<title>{story.title} - QotNews Comments</title>
<title>{story.title} | QotNews</title>
<meta name="robots" content="noindex" />
</Helmet>
<h1>{story.title}</h1>
@@ -133,7 +131,7 @@ class Article extends React.Component {
{story.comments.map(c => this.displayComment(story, c, 0))}
</div>
</div>
:
:
<p>loading...</p>
}
<ToggleDot id={id} article={true} />

View File

@@ -22,21 +22,20 @@ class Feed extends React.Component {
const updated = !this.state.stories || this.state.stories[0].id !== result.stories[0].id;
console.log('updated:', updated);
const { stories } = result;
this.setState({ stories });
localStorage.setItem('stories', JSON.stringify(stories));
this.setState({ stories: result.stories });
localStorage.setItem('stories', JSON.stringify(result.stories));
if (updated) {
localForage.clear();
stories.forEach((x, i) => {
result.stories.forEach((x, i) => {
fetch('/api/' + x.id)
.then(res => res.json())
.then(({ story }) => {
localForage.setItem(x.id, story)
.then(result => {
localForage.setItem(x.id, result.story)
.then(console.log('preloaded', x.id, x.title));
this.props.updateCache(x.id, story);
}, error => { }
);
this.props.updateCache(x.id, result.story);
}, error => {}
);
});
}
},
@@ -53,7 +52,8 @@ class Feed extends React.Component {
return (
<div className='container'>
<Helmet>
<title>Feed - QotNews</title>
<title>QotNews</title>
<meta name="robots" content="index" />
</Helmet>
{error && <p>Connection error?</p>}
{stories ?
@@ -62,7 +62,7 @@ class Feed extends React.Component {
<div className='item' key={x.id}>
<div className='title'>
<Link className='link' to={'/' + x.id}>
<img className='source-logo' src={logos[x.source] || logos[x.source.split(' ')[0]]} alt='source logo' /> {x.title}
<img className='source-logo' src={logos[x.source]} alt='source logo' /> {x.title}
</Link>
<span className='source'>
@@ -74,7 +74,7 @@ class Feed extends React.Component {
</div>
)}
</div>
:
:
<p>loading...</p>
}
</div>

View File

@@ -29,7 +29,7 @@ class Results extends React.Component {
.then(res => res.json())
.then(
(result) => {
this.setState({ stories: result.results });
this.setState({ stories: result.hits });
},
(error) => {
if (error.message !== 'The operation was aborted. ') {
@@ -56,7 +56,7 @@ class Results extends React.Component {
return (
<div className='container'>
<Helmet>
<title>Feed - QotNews</title>
<title>Search Results | QotNews</title>
</Helmet>
{error && <p>Connection error?</p>}
{stories ?

View File

@@ -15,6 +15,7 @@ class ScrollToTop extends React.Component {
}
window.scrollTo(0, 0);
document.body.scrollTop = 0;
}
render() {

View File

@@ -37,7 +37,7 @@ class Search extends Component {
<span className='search'>
<form onSubmit={this.searchAgain}>
<input
placeholder='Search... (fixed)'
placeholder='Search...'
value={search}
onChange={this.searchArticles}
ref={this.inputRef}

View File

@@ -0,0 +1,68 @@
.black {
color: #ddd;
}
.black a {
color: #ddd;
}
.black input {
color: #ddd;
border: 1px solid #828282;
}
.black button {
background-color: #444444;
border-color: #bbb;
color: #ddd;
}
.black .item {
color: #828282;
}
.black .item .source-logo {
filter: grayscale(1);
}
.black .item a {
color: #828282;
}
.black .item a.link {
color: #ddd;
}
.black .item a.link:visited {
color: #828282;
}
.black .item .info a.hot {
color: #cccccc;
}
.black .article a {
border-bottom: 1px solid #aaaaaa;
}
.black .article u {
border-bottom: 1px solid #aaaaaa;
text-decoration: none;
}
.black .story-text video,
.black .story-text img {
filter: brightness(50%);
}
.black .article .info {
color: #828282;
}
.black .article .info a {
border-bottom: none;
color: #828282;
}
.black .comment.lined {
border-left: 1px solid #444444;
}

View File

@@ -11,12 +11,14 @@
border: 1px solid #828282;
}
.dark .item {
color: #828282;
.dark button {
background-color: #444444;
border-color: #bbb;
color: #ddd;
}
.dark .item .source-logo {
filter: grayscale(1);
.dark .item {
color: #828282;
}
.dark .item a {
@@ -43,6 +45,7 @@
text-decoration: none;
}
.dark .story-text video,
.dark .story-text img {
filter: brightness(50%);
}

View File

@@ -2,9 +2,30 @@ body {
text-rendering: optimizeLegibility;
font: 1rem/1.3 sans-serif;
color: #000000;
margin-bottom: 100vh;
word-break: break-word;
font-kerning: normal;
margin: 0;
}
::backdrop {
background-color: rgba(0,0,0,0);
}
body:fullscreen {
overflow-y: scroll !important;
}
body:-ms-fullscreen {
overflow-y: scroll !important;
}
body:-webkit-full-screen {
overflow-y: scroll !important;
}
body:-moz-full-screen {
overflow-y: scroll !important;
}
#root {
margin: 8px 8px 100vh 8px !important;
}
a {
@@ -22,6 +43,12 @@ input {
border-radius: 4px;
}
.fullscreen {
margin: 0.25rem;
padding: 0.25rem;
}
pre {
overflow: auto;
}
@@ -185,16 +212,20 @@ span.source {
cursor: pointer;
}
.toggleDot {
.dot {
cursor: pointer;
position: fixed;
bottom: 1rem;
left: 1rem;
height: 3rem;
width: 3rem;
background-color: #828282;
border-radius: 50%;
}
.toggleDot {
bottom: 1rem;
left: 1rem;
}
.toggleDot .button {
font: 2rem/1 'icomoon';
position: relative;
@@ -203,21 +234,27 @@ span.source {
}
.forwardDot {
cursor: pointer;
position: fixed;
bottom: 1rem;
right: 1rem;
height: 3rem;
width: 3rem;
background-color: #828282;
border-radius: 50%;
}
.forwardDot .button {
font: 2.5rem/1 'icomoon';
font: 2rem/1 'icomoon';
position: relative;
top: 0.25rem;
left: 0.3rem;
top: 0.5rem;
left: 0.5rem;
}
.backwardDot {
bottom: 1rem;
right: 5rem;
}
.backwardDot .button {
font: 2rem/1 'icomoon';
position: relative;
top: 0.5rem;
left: 0.5rem;
}
.search form {

View File

@@ -0,0 +1,82 @@
.red {
color: #b00;
scrollbar-color: #b00 #440000;
}
.red a {
color: #b00;
}
.red input {
color: #b00;
border: 1px solid #690000;
}
.red input::placeholder {
color: #690000;
}
.red hr {
background-color: #690000;
}
.red button {
background-color: #440000;
border-color: #b00;
color: #b00;
}
.red .item,
.red .slogan {
color: #690000;
}
.red .item .source-logo {
display: none;
}
.red .item a {
color: #690000;
}
.red .item a.link {
color: #b00;
}
.red .item a.link:visited {
color: #690000;
}
.red .item .info a.hot {
color: #cc0000;
}
.red .article a {
border-bottom: 1px solid #aa0000;
}
.red .article u {
border-bottom: 1px solid #aa0000;
text-decoration: none;
}
.red .story-text video,
.red .story-text img {
filter: grayscale(100%) brightness(20%) sepia(100%) hue-rotate(-50deg) saturate(600%) contrast(0.8);
}
.red .article .info {
color: #690000;
}
.red .article .info a {
border-bottom: none;
color: #690000;
}
.red .comment.lined {
border-left: 1px solid #440000;
}
.red .dot {
background-color: #440000;
}

View File

@@ -41,7 +41,7 @@ class Submit extends Component {
<span className='search'>
<form onSubmit={this.submitArticle}>
<input
placeholder='Submit Article'
placeholder='Submit URL'
ref={this.inputRef}
/>
</form>

Binary file not shown.

File diff suppressed because one or more lines are too long