Compare commits

..

59 Commits

Author SHA1 Message Date
Jason Schwarzenberger
44b8b36547 add data cast in query. 2020-11-10 15:50:18 +13:00
Jason Schwarzenberger
4f49684194 remove logos from utils.js 2020-11-10 15:38:48 +13:00
Jason Schwarzenberger
1d78b1c592 fix favicon url. 2020-11-10 15:34:21 +13:00
Jason Schwarzenberger
0374794536 Sitemap and Category to get favicon into icon property of story. 2020-11-10 15:22:27 +13:00
Jason Schwarzenberger
943a1cfa4f reader server 2020-11-10 14:56:21 +13:00
Jason Schwarzenberger
9cee370a25 tvnz icon 2020-11-10 14:10:02 +13:00
Jason Schwarzenberger
5efc6ef2d3 add related stories (in api only) 2020-11-10 14:09:56 +13:00
Jason Schwarzenberger
4ec50e20cb feed thread loop. 2020-11-10 10:10:38 +13:00
Jason Schwarzenberger
c1b7877f4b remove limit. 2020-11-09 17:54:50 +13:00
Jason Schwarzenberger
7b8cbfc9b9 try to make feed only determined by the max age. 2020-11-09 17:50:58 +13:00
Jason Schwarzenberger
bfa4108a8e Merge remote-tracking branch 'tanner/master' 2020-11-09 16:08:28 +13:00
Jason Schwarzenberger
0bd0d40a31 use json type in sqlite. 2020-11-09 15:45:10 +13:00
Jason Schwarzenberger
4e04595415 fix search. 2020-11-09 15:44:44 +13:00
Jason
006db2960c change to 3 days 2020-11-09 01:36:51 +00:00
Jason Schwarzenberger
1f063f0dac undo log level change 2020-11-06 11:20:34 +13:00
Jason Schwarzenberger
1658346aa9 fix news.py feed. 2020-11-06 10:37:43 +13:00
Jason Schwarzenberger
2dbc702b40 switch to python-dateutil for parser, reverse sort xml feeds. 2020-11-06 10:02:39 +13:00
Jason Schwarzenberger
1c4764e67d sort sitemap feed by lastmod time. 2020-11-06 09:30:15 +13:00
Jason
ee49d2021e newsroom 2020-11-05 20:28:55 +00:00
Jason
c391c50ab1 use localize 2020-11-05 04:15:31 +00:00
Jason Schwarzenberger
095f0d549a use replace. 2020-11-05 16:57:08 +13:00
Jason Schwarzenberger
c21c71667e fix date issue. 2020-11-05 16:41:15 +13:00
Jason Schwarzenberger
c3a2c91a11 update requirements.txt 2020-11-05 16:33:50 +13:00
Jason Schwarzenberger
0f39446a61 tz aware for use in settings. 2020-11-05 16:30:55 +13:00
Jason Schwarzenberger
351059aab1 fix excludes. 2020-11-05 15:59:13 +13:00
Jason Schwarzenberger
4488e2c292 add an excludes list of substrings for urls in the settings for sitemap/category. 2020-11-05 15:51:59 +13:00
Jason Schwarzenberger
afda5b635c disqus test. 2020-11-05 14:23:51 +13:00
Jason Schwarzenberger
0fc1a44d2b fix issue in substack. 2020-11-04 17:40:29 +13:00
Jason Schwarzenberger
9fff1b9e46 avoid duplicate articles listed on the category page 2020-11-04 17:14:42 +13:00
Jason Schwarzenberger
16b59f6c67 try stop bad pages. 2020-11-04 16:34:31 +13:00
Jason Schwarzenberger
939f4775a7 better settings example. 2020-11-04 15:52:34 +13:00
Jason Schwarzenberger
9bfc6fc6fa scraper settings, ordering and loop. 2020-11-04 15:47:12 +13:00
Jason Schwarzenberger
6ea9844d00 remove useless try blocks. 2020-11-04 15:37:19 +13:00
Jason Schwarzenberger
1318259d3d imply referrer is substack. 2020-11-04 15:21:07 +13:00
Jason Schwarzenberger
98a0c2257c increase declutter timeout. 2020-11-04 15:15:00 +13:00
Jason Schwarzenberger
e6976db25d fix tabs 2020-11-04 15:04:20 +13:00
Jason Schwarzenberger
9edc8b7cca move scraping for article content to files. 2020-11-04 15:00:58 +13:00
Jason Schwarzenberger
33e21e7f30 fix mistake. 2020-11-04 12:45:01 +13:00
Jason Schwarzenberger
892a99eca6 add + expander in place of collapser. 2020-11-04 12:43:15 +13:00
Jason Schwarzenberger
d718d05a04 fix dates for newsroom. 2020-11-04 11:53:16 +13:00
Jason Schwarzenberger
d1795eb1b8 add radionz and newsroom logos. 2020-11-04 11:30:56 +13:00
Jason Schwarzenberger
9f4ff4acf0 remove unnecessary sitemap.xml request. 2020-11-04 11:22:15 +13:00
Jason Schwarzenberger
db6aad84ec fix mistake. 2020-11-04 11:12:01 +13:00
Jason Schwarzenberger
29f8a8b8cc add news site categories feed. 2020-11-04 11:08:50 +13:00
Jason
abf8589e02 fix sitemap 2020-11-03 10:53:40 +00:00
Jason
b759f46582 use extruct for opengraph/json-ld/microdata of articles 2020-11-03 10:31:36 +00:00
Jason Schwarzenberger
736cdc8576 fix mistake. 2020-11-03 17:04:46 +13:00
Jason Schwarzenberger
244d416f6e settings config of sitemap/substack publications. 2020-11-03 17:01:29 +13:00
Jason Schwarzenberger
5f98a2e76a Merge remote-tracking branch 'tanner/master' into master
And adding relevant setings.py.example/etc.
2020-11-03 16:44:02 +13:00
Jason Schwarzenberger
0567cdfd9b move sort to render. 2020-11-03 16:30:22 +13:00
Jason Schwarzenberger
4f90671cec order feed by reverse chronological 2020-11-03 16:21:23 +13:00
Jason Schwarzenberger
e63a1456a5 add logos. 2020-11-03 16:07:07 +13:00
Jason Schwarzenberger
76f1d57702 sitemap based feed. 2020-11-03 16:00:03 +13:00
Jason Schwarzenberger
de80389ed0 add logos. 2020-11-03 12:48:19 +13:00
Jason Schwarzenberger
4e64cf682a add the bulletin. 2020-11-03 12:41:16 +13:00
Jason Schwarzenberger
c5fe5d25a0 add substack.py top sites, replacing webworm.py 2020-11-03 12:28:39 +13:00
Jason
283a2b1545 fix webworm comments 2020-11-02 22:06:43 +00:00
Jason Schwarzenberger
0d6a86ace2 fix webworm dates. 2020-11-03 10:31:14 +13:00
Jason Schwarzenberger
f23bf628e0 add webworm/substack as a feed. 2020-11-02 17:09:59 +13:00
54 changed files with 5016 additions and 7126 deletions

1
.gitignore vendored
View File

@@ -1 +0,0 @@
.aider*

View File

@@ -109,5 +109,4 @@ settings.py
data.db data.db
data.db.bak data.db.bak
data/archive/* data/archive/*
data/backup/*
qotnews.sqlite qotnews.sqlite

View File

@@ -1,11 +1,11 @@
import json from datetime import datetime, timedelta
from sqlalchemy import create_engine, Column, String, ForeignKey, Integer from sqlalchemy import create_engine, Column, String, ForeignKey, Integer
from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker from sqlalchemy.orm import sessionmaker
from sqlalchemy.exc import IntegrityError from sqlalchemy.exc import IntegrityError
from sqlalchemy.types import JSON
engine = create_engine('sqlite:///data/qotnews.sqlite', connect_args={'timeout': 360}) engine = create_engine('sqlite:///data/qotnews.sqlite')
Session = sessionmaker(bind=engine) Session = sessionmaker(bind=engine)
Base = declarative_base() Base = declarative_base()
@@ -15,8 +15,8 @@ class Story(Base):
sid = Column(String(16), primary_key=True) sid = Column(String(16), primary_key=True)
ref = Column(String(16), unique=True) ref = Column(String(16), unique=True)
meta_json = Column(String) meta = Column(JSON)
full_json = Column(String) data = Column(JSON)
title = Column(String) title = Column(String)
class Reflist(Base): class Reflist(Base):
@@ -36,19 +36,21 @@ def get_story(sid):
def put_story(story): def put_story(story):
story = story.copy() story = story.copy()
full_json = json.dumps(story) data = {}
data.update(story)
story.pop('text', None) meta = {}
story.pop('comments', None) meta.update(story)
meta_json = json.dumps(story) meta.pop('text', None)
meta.pop('comments', None)
try: try:
session = Session() session = Session()
s = Story( s = Story(
sid=story['id'], sid=story['id'],
ref=story['ref'], ref=story['ref'],
full_json=full_json, data=data,
meta_json=meta_json, meta=meta,
title=story.get('title', None), title=story.get('title', None),
) )
session.merge(s) session.merge(s)
@@ -63,19 +65,26 @@ def get_story_by_ref(ref):
session = Session() session = Session()
return session.query(Story).filter(Story.ref==ref).first() return session.query(Story).filter(Story.ref==ref).first()
def get_reflist(amount): def get_stories_by_url(url):
session = Session() session = Session()
q = session.query(Reflist).order_by(Reflist.rid.desc()).limit(amount) return session.query(Story).\
filter(Story.title != None).\
filter(Story.meta['url'].as_string() == url).\
order_by(Story.meta['date'].desc())
def get_reflist():
session = Session()
q = session.query(Reflist).order_by(Reflist.rid.desc())
return [dict(ref=x.ref, sid=x.sid, source=x.source) for x in q.all()] return [dict(ref=x.ref, sid=x.sid, source=x.source) for x in q.all()]
def get_stories(amount, skip=0): def get_stories(maxage=60*60*24*2):
time = datetime.now().timestamp() - maxage
session = Session() session = Session()
q = session.query(Reflist, Story.meta_json).\ q = session.query(Reflist, Story.meta).\
order_by(Reflist.rid.desc()).\
join(Story).\ join(Story).\
filter(Story.title != None).\ filter(Story.title != None).\
offset(skip).\ filter(Story.meta['date'].as_integer() > time).\
limit(amount) order_by(Story.meta['date'].desc())
return [x[1] for x in q] return [x[1] for x in q]
def put_ref(ref, sid, source): def put_ref(ref, sid, source):
@@ -101,22 +110,7 @@ def del_ref(ref):
finally: finally:
session.close() session.close()
def count_stories():
try:
session = Session()
return session.query(Story).count()
finally:
session.close()
def get_story_list():
try:
session = Session()
return session.query(Story.sid).all()
finally:
session.close()
if __name__ == '__main__': if __name__ == '__main__':
init() init()
#print(get_story_by_ref('hgi3sy')) print(get_story_by_ref('hgi3sy'))
print(len(get_reflist(99999)))

View File

@@ -1,8 +1,6 @@
import database import database
import search import search
import sys import sys
import settings
import logging
import json import json
import requests import requests
@@ -23,7 +21,7 @@ def database_del_story(sid):
def search_del_story(sid): def search_del_story(sid):
try: try:
r = requests.delete(settings.MEILI_URL + 'indexes/qotnews/documents/'+sid, timeout=2) r = requests.delete(search.MEILI_URL + 'indexes/qotnews/documents/'+sid, timeout=2)
if r.status_code != 202: if r.status_code != 202:
raise Exception('Bad response code ' + str(r.status_code)) raise Exception('Bad response code ' + str(r.status_code))
return r.json() return r.json()

View File

@@ -6,84 +6,117 @@ logging.basicConfig(
import requests import requests
import time import time
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import itertools
import settings import settings
from feeds import hackernews, reddit, tildes, manual, lobsters from feeds import hackernews, reddit, tildes, substack, manual, news
import utils from scrapers import outline, declutter, local
INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com', 'sec.gov'] INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com']
TWO_DAYS = 60*60*24*2
substacks = {}
for key, value in settings.SUBSTACK.items():
substacks[key] = substack.Publication(value['url'])
categories = {}
for key, value in settings.CATEGORY.items():
categories[key] = news.Category(value['url'], value.get('tz'))
sitemaps = {}
for key, value in settings.SITEMAP.items():
sitemaps[key] = news.Sitemap(value['url'], value.get('tz'))
def get_list():
feeds = {}
def list():
feed = []
if settings.NUM_HACKERNEWS: if settings.NUM_HACKERNEWS:
feed += [(x, 'hackernews') for x in hackernews.feed()[:settings.NUM_HACKERNEWS]] feeds['hackernews'] = [(x, 'hackernews') for x in hackernews.feed()[:settings.NUM_HACKERNEWS]]
if settings.NUM_LOBSTERS:
feed += [(x, 'lobsters') for x in lobsters.feed()[:settings.NUM_LOBSTERS]]
if settings.NUM_REDDIT: if settings.NUM_REDDIT:
feed += [(x, 'reddit') for x in reddit.feed()[:settings.NUM_REDDIT]] feeds['reddit'] = [(x, 'reddit') for x in reddit.feed()[:settings.NUM_REDDIT]]
if settings.NUM_TILDES: if settings.NUM_TILDES:
feed += [(x, 'tildes') for x in tildes.feed()[:settings.NUM_TILDES]] feeds['tildes'] = [(x, 'tildes') for x in tildes.feed()[:settings.NUM_TILDES]]
if settings.NUM_SUBSTACK:
feeds['substack'] = [(x, 'substack') for x in substack.top.feed()[:settings.NUM_SUBSTACK]]
for key, publication in substacks.items():
count = settings.SUBSTACK[key]['count']
feeds[key] = [(x, key) for x in publication.feed()[:count]]
for key, sites in categories.items():
count = settings.CATEGORY[key].get('count') or 0
excludes = settings.CATEGORY[key].get('excludes')
tz = settings.CATEGORY[key].get('tz')
feeds[key] = [(x, key) for x in sites.feed(excludes)[:count]]
for key, sites in sitemaps.items():
count = settings.SITEMAP[key].get('count') or 0
excludes = settings.SITEMAP[key].get('excludes')
feeds[key] = [(x, key) for x in sites.feed(excludes)[:count]]
values = feeds.values()
feed = itertools.chain.from_iterable(itertools.zip_longest(*values, fillvalue=None))
feed = list(filter(None, feed))
return feed return feed
def get_article(url): def get_article(url):
if not settings.READER_URL: scrapers = {
logging.info('Readerserver not configured, aborting.') 'declutter': declutter,
return '' 'outline': outline,
'local': local,
if url.startswith('https://twitter.com'): }
logging.info('Replacing twitter.com url with nitter.net') available = settings.SCRAPERS or ['local']
url = url.replace('twitter.com', 'nitter.net') if 'local' not in available:
available += ['local']
for scraper in available:
if scraper not in scrapers.keys():
continue
try: try:
r = requests.post(settings.READER_URL, data=dict(url=url), timeout=20) html = scrapers[scraper].get_html(url)
if r.status_code != 200: if html:
raise Exception('Bad response code ' + str(r.status_code)) return html
return r.text
except KeyboardInterrupt: except KeyboardInterrupt:
raise raise
except BaseException as e: except:
logging.error('Problem getting article: {}'.format(str(e))) pass
return '' return ''
def get_content_type(url): def get_content_type(url):
try:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'}
return requests.get(url, headers=headers, timeout=5).headers['content-type']
except:
return ''
try: try:
headers = { headers = {
'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', 'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
'X-Forwarded-For': '66.249.66.1', 'X-Forwarded-For': '66.249.66.1',
} }
return requests.get(url, headers=headers, timeout=10).headers['content-type'] return requests.get(url, headers=headers, timeout=5).headers['content-type']
except: except:
pass pass
try:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'}
return requests.get(url, headers=headers, timeout=10).headers['content-type']
except:
return ''
def update_story(story, is_manual=False): def update_story(story, is_manual=False):
res = {} res = {}
try:
if story['source'] == 'hackernews': if story['source'] == 'hackernews':
res = hackernews.story(story['ref']) res = hackernews.story(story['ref'])
elif story['source'] == 'lobsters':
res = lobsters.story(story['ref'])
elif story['source'] == 'reddit': elif story['source'] == 'reddit':
res = reddit.story(story['ref']) res = reddit.story(story['ref'])
elif story['source'] == 'tildes': elif story['source'] == 'tildes':
res = tildes.story(story['ref']) res = tildes.story(story['ref'])
elif story['source'] == 'substack':
res = substack.top.story(story['ref'])
elif story['source'] in categories.keys():
res = categories[story['source']].story(story['ref'])
elif story['source'] in sitemaps.keys():
res = sitemaps[story['source']].story(story['ref'])
elif story['source'] in substacks.keys():
res = substacks[story['source']].story(story['ref'])
elif story['source'] == 'manual': elif story['source'] == 'manual':
res = manual.story(story['ref']) res = manual.story(story['ref'])
except BaseException as e:
utils.alert_tanner('Problem updating {} story, ref {}: {}'.format(story['source'], story['ref'], str(e)))
logging.exception(e)
return False
if res: if res:
story.update(res) # join dicts story.update(res) # join dicts
@@ -91,8 +124,8 @@ def update_story(story, is_manual=False):
logging.info('Story not ready yet') logging.info('Story not ready yet')
return False return False
if story['date'] and not is_manual and story['date'] + TWO_DAYS < time.time(): if story['date'] and not is_manual and story['date'] + settings.MAX_STORY_AGE < time.time():
logging.info('Story too old, removing. Date: {}'.format(story['date'])) logging.info('Story too old, removing')
return False return False
if story.get('url', '') and not story.get('text', ''): if story.get('url', '') and not story.get('text', ''):
@@ -106,12 +139,6 @@ def update_story(story, is_manual=False):
logging.info(story['url']) logging.info(story['url'])
return False return False
if 'trump' in story['title'].lower() or 'musk' in story['title'].lower() or 'Removed by moderator' in story['title']:
logging.info('Trump / Musk / removed story, skipping')
logging.info(story['url'])
return False
logging.info('Getting article ' + story['url']) logging.info('Getting article ' + story['url'])
story['text'] = get_article(story['url']) story['text'] = get_article(story['url'])
if not story['text']: return False if not story['text']: return False
@@ -129,7 +156,7 @@ if __name__ == '__main__':
#print(get_article('https://www.bloomberg.com/news/articles/2019-09-23/xi-s-communists-under-pressure-as-high-prices-hit-china-workers')) #print(get_article('https://www.bloomberg.com/news/articles/2019-09-23/xi-s-communists-under-pressure-as-high-prices-hit-china-workers'))
a = get_content_type('https://tefkos.comminfo.rutgers.edu/Courses/e530/Readings/Beal%202008%20full%20text%20searching.pdf') a = get_article('https://blog.joinmastodon.org/2019/10/mastodon-3.0/')
print(a) print(a)
print('done') print('done')

View File

@@ -12,8 +12,7 @@ import requests
from utils import clean from utils import clean
API_TOPSTORIES = lambda x: 'https://hacker-news.firebaseio.com/v0/topstories.json' API_TOPSTORIES = lambda x: 'https://hacker-news.firebaseio.com/v0/topstories.json'
ALG_API_ITEM = lambda x : 'https://hn.algolia.com/api/v1/items/{}'.format(x) API_ITEM = lambda x : 'https://hn.algolia.com/api/v1/items/{}'.format(x)
BHN_API_ITEM = lambda x : 'https://api.hnpwa.com/v0/item/{}.json'.format(x)
SITE_LINK = lambda x : 'https://news.ycombinator.com/item?id={}'.format(x) SITE_LINK = lambda x : 'https://news.ycombinator.com/item?id={}'.format(x)
SITE_AUTHOR_LINK = lambda x : 'https://news.ycombinator.com/user?id={}'.format(x) SITE_AUTHOR_LINK = lambda x : 'https://news.ycombinator.com/user?id={}'.format(x)
@@ -43,7 +42,7 @@ def api(route, ref=None):
def feed(): def feed():
return [str(x) for x in api(API_TOPSTORIES) or []] return [str(x) for x in api(API_TOPSTORIES) or []]
def alg_comment(i): def comment(i):
if 'author' not in i: if 'author' not in i:
return False return False
@@ -52,25 +51,21 @@ def alg_comment(i):
c['score'] = i.get('points', 0) c['score'] = i.get('points', 0)
c['date'] = i.get('created_at_i', 0) c['date'] = i.get('created_at_i', 0)
c['text'] = clean(i.get('text', '') or '') c['text'] = clean(i.get('text', '') or '')
c['comments'] = [alg_comment(j) for j in i['children']] c['comments'] = [comment(j) for j in i['children']]
c['comments'] = list(filter(bool, c['comments'])) c['comments'] = list(filter(bool, c['comments']))
return c return c
def alg_comment_count(i): def comment_count(i):
alive = 1 if i['author'] else 0 alive = 1 if i['author'] else 0
return sum([alg_comment_count(c) for c in i['comments']]) + alive return sum([comment_count(c) for c in i['comments']]) + alive
def alg_story(ref): def story(ref):
r = api(ALG_API_ITEM, ref) r = api(API_ITEM, ref)
if not r: if not r: return False
logging.info('Bad Algolia Hackernews API response.')
return None
if 'deleted' in r: if 'deleted' in r:
logging.info('Story was deleted.')
return False return False
elif r.get('type', '') != 'story': elif r.get('type', '') != 'story':
logging.info('Type "{}" is not "story".'.format(r.get('type', '')))
return False return False
s = {} s = {}
@@ -81,88 +76,17 @@ def alg_story(ref):
s['title'] = r.get('title', '') s['title'] = r.get('title', '')
s['link'] = SITE_LINK(ref) s['link'] = SITE_LINK(ref)
s['url'] = r.get('url', '') s['url'] = r.get('url', '')
s['comments'] = [alg_comment(i) for i in r['children']] s['comments'] = [comment(i) for i in r['children']]
s['comments'] = list(filter(bool, s['comments'])) s['comments'] = list(filter(bool, s['comments']))
s['num_comments'] = alg_comment_count(s) - 1 s['num_comments'] = comment_count(s) - 1
if 'text' in r and r['text']: if 'text' in r and r['text']:
s['text'] = clean(r['text'] or '') s['text'] = clean(r['text'] or '')
return s return s
def bhn_comment(i):
if 'user' not in i:
return False
c = {}
c['author'] = i.get('user', '')
c['score'] = 0 # Not present?
c['date'] = i.get('time', 0)
c['text'] = clean(i.get('content', '') or '')
c['comments'] = [bhn_comment(j) for j in i['comments']]
c['comments'] = list(filter(bool, c['comments']))
return c
def bhn_story(ref):
r = api(BHN_API_ITEM, ref)
if not r:
logging.info('Bad BetterHN Hackernews API response.')
return None
if 'deleted' in r: # TODO: verify
logging.info('Story was deleted.')
return False
elif r.get('dead', False):
logging.info('Story was deleted.')
return False
elif r.get('type', '') != 'link':
logging.info('Type "{}" is not "link".'.format(r.get('type', '')))
return False
s = {}
s['author'] = r.get('user', '')
s['author_link'] = SITE_AUTHOR_LINK(r.get('user', ''))
s['score'] = r.get('points', 0)
s['date'] = r.get('time', 0)
s['title'] = r.get('title', '')
s['link'] = SITE_LINK(ref)
s['url'] = r.get('url', '')
if s['url'].startswith('item'):
s['url'] = SITE_LINK(ref)
s['comments'] = [bhn_comment(i) for i in r['comments']]
s['comments'] = list(filter(bool, s['comments']))
s['num_comments'] = r.get('comments_count', 0)
if 'content' in r and r['content']:
s['text'] = clean(r['content'] or '')
return s
def story(ref):
s = alg_story(ref)
if s is None:
s = bhn_story(ref)
if not s:
return False
if not s['title']:
return False
if s['score'] < 25 and s['num_comments'] < 10:
logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
return False
return s
# scratchpad so I can quickly develop the parser # scratchpad so I can quickly develop the parser
if __name__ == '__main__': if __name__ == '__main__':
print(feed()) print(feed())
#print(story(20763961)) #print(story(20763961))
#print(story(20802050)) #print(story(20802050))
#print(story(42899834)) # type "job"
#print(story(42900076)) # Ask HN
#print(story(42898201)) # Show HN
#print(story(42899703)) # normal
print(story(42902678)) # bad title?

View File

@@ -1,120 +0,0 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
if __name__ == '__main__':
import sys
sys.path.insert(0,'.')
import requests
from datetime import datetime
from utils import clean
API_HOTTEST = lambda x: 'https://lobste.rs/hottest.json'
API_ITEM = lambda x : 'https://lobste.rs/s/{}.json'.format(x)
SITE_LINK = lambda x : 'https://lobste.rs/s/{}'.format(x)
SITE_AUTHOR_LINK = lambda x : 'https://lobste.rs/u/{}'.format(x)
def api(route, ref=None):
try:
r = requests.get(route(ref), timeout=5)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting lobsters API: {}, trying again'.format(str(e)))
try:
r = requests.get(route(ref), timeout=15)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting lobsters API: {}'.format(str(e)))
return False
def feed():
return [x['short_id'] for x in api(API_HOTTEST) or []]
def unix(date_str):
date_str = date_str.replace(':', '')
return int(datetime.strptime(date_str, '%Y-%m-%dT%H%M%S.%f%z').timestamp())
def make_comment(i):
c = {}
try:
c['author'] = i['commenting_user']
except KeyError:
c['author'] = ''
c['score'] = i.get('score', 0)
try:
c['date'] = unix(i['created_at'])
except KeyError:
c['date'] = 0
c['text'] = clean(i.get('comment', '') or '')
c['comments'] = []
return c
def iter_comments(flat_comments):
nested_comments = []
parent_stack = []
for comment in flat_comments:
c = make_comment(comment)
indent = comment['depth']
if indent == 0:
nested_comments.append(c)
parent_stack = [c]
else:
parent_stack = parent_stack[:indent]
p = parent_stack[-1]
p['comments'].append(c)
parent_stack.append(c)
return nested_comments
def story(ref):
r = api(API_ITEM, ref)
if not r:
logging.info('Bad Lobsters API response.')
return False
s = {}
try:
s['author'] = r['submitter_user']
s['author_link'] = SITE_AUTHOR_LINK(s['author'])
except KeyError:
s['author'] = ''
s['author_link'] = ''
s['score'] = r.get('score', 0)
try:
s['date'] = unix(r['created_at'])
except KeyError:
s['date'] = 0
s['title'] = r.get('title', '')
s['link'] = SITE_LINK(ref)
s['url'] = r.get('url', '')
s['comments'] = iter_comments(r['comments'])
s['num_comments'] = r['comment_count']
if s['score'] < 15 and s['num_comments'] < 10:
logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
return False
if 'description' in r and r['description']:
s['text'] = clean(r['description'] or '')
return s
# scratchpad so I can quickly develop the parser
if __name__ == '__main__':
#print(feed())
import json
print(json.dumps(story('fzvd1v'), indent=4))
#print(json.dumps(story('ixyv5u'), indent=4))

View File

@@ -27,9 +27,7 @@ def api(route):
def story(ref): def story(ref):
html = api(ref) html = api(ref)
if not html: if not html: return False
logging.info('Bad http GET response.')
return False
soup = BeautifulSoup(html, features='html.parser') soup = BeautifulSoup(html, features='html.parser')

244
apiserver/feeds/news.py Normal file
View File

@@ -0,0 +1,244 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
if __name__ == '__main__':
import sys
sys.path.insert(0,'.')
import requests
from datetime import datetime
from bs4 import BeautifulSoup
from scrapers import declutter
import dateutil.parser
import extruct
import pytz
from utils import clean
USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'
#USER_AGENT = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
def unix(date_str, tz=None):
try:
dt = dateutil.parser.parse(date_str)
if tz:
dt = pytz.timezone(tz).localize(dt)
return int(dt.timestamp())
except:
pass
return 0
def xml(route, ref=None):
try:
headers = {'User-Agent': USER_AGENT, 'X-Forwarded-For': '66.249.66.1'}
r = requests.get(route(ref), headers=headers, timeout=5)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.text
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting URL: {}'.format(str(e)))
return False
def parse_extruct(s, data):
for rdfa in data['rdfa']:
for key, props in rdfa.items():
if 'http://ogp.me/ns#title' in props:
for values in props['http://ogp.me/ns#title']:
s['title'] = values['@value']
if 'http://ogp.me/ns/article#modified_time' in props:
for values in props['http://ogp.me/ns/article#modified_time']:
s['date'] = values['@value']
if 'http://ogp.me/ns/article#published_time' in props:
for values in props['http://ogp.me/ns/article#published_time']:
s['date'] = values['@value']
for og in data['opengraph']:
titles = list(filter(None, [value if 'og:title' in key else None for key, value in og['properties']]))
modified = list(filter(None, [value if 'article:modified_time' in key else None for key, value in og['properties']]))
published = list(filter(None, [value if 'article:published_time' in key else None for key, value in og['properties']]))
if len(modified):
s['date'] = modified[0]
if len(published):
s['date'] = published[0]
if len(titles):
s['title'] = titles[0]
for md in data['microdata']:
if md['type'] == 'https://schema.org/NewsArticle':
props = md['properties']
s['title'] = props['headline']
if props['dateModified']:
s['date'] = props['dateModified']
if props['datePublished']:
s['date'] = props['datePublished']
if 'author' in props and props['author']:
s['author'] = props['author']['properties']['name']
for ld in data['json-ld']:
if '@type' in ld and ld['@type'] in ['Article', 'NewsArticle']:
s['title'] = ld['headline']
if ld['dateModified']:
s['date'] = ld['dateModified']
if ld['datePublished']:
s['date'] = ld['datePublished']
if 'author' in ld and ld['author']:
s['author'] = ld['author']['name']
if '@graph' in ld:
for gld in ld['@graph']:
if '@type' in gld and gld['@type'] in ['Article', 'NewsArticle']:
s['title'] = gld['headline']
if gld['dateModified']:
s['date'] = gld['dateModified']
if gld['datePublished']:
s['date'] = gld['datePublished']
return s
def comment(i):
if 'author' not in i:
return False
c = {}
c['author'] = i.get('author', '')
c['score'] = i.get('points', 0)
c['date'] = unix(i.get('date', 0))
c['text'] = clean(i.get('text', '') or '')
c['comments'] = [comment(j) for j in i['children']]
c['comments'] = list(filter(bool, c['comments']))
return c
def comment_count(i):
alive = 1 if i['author'] else 0
return sum([comment_count(c) for c in i['comments']]) + alive
class _Base:
def __init__(url, tz=None):
self.url = url
self.tz = tz
def feed(self, excludes=None):
return []
def story(self, ref):
markup = xml(lambda x: ref)
if not markup:
return False
s = {}
s['author_link'] = ''
s['score'] = 0
s['comments'] = []
s['num_comments'] = 0
s['link'] = ref
s['url'] = ref
s['date'] = 0
soup = BeautifulSoup(markup, features='html.parser')
icon32 = soup.find_all('link', rel="icon", href=True, sizes="32x32")
icon16 = soup.find_all('link', rel="icon", href=True, sizes="16x16")
favicon = soup.find_all('link', rel="shortcut icon", href=True)
others = soup.find_all('link', rel="icon", href=True)
icons = icon32 + icon16 + favicon + others
base_url = '/'.join(ref.split('/')[:3])
icons = list(set([i.get('href') for i in icons]))
icons = [i if i.startswith('http') else base_url + i for i in icons]
if icons:
s['icon'] = icons[0]
data = extruct.extract(markup)
s = parse_extruct(s, data)
if s['date']:
s['date'] = unix(s['date'], tz=self.tz)
if 'disqus' in markup:
try:
s['comments'] = declutter.get_comments(ref)
c['comments'] = list(filter(bool, c['comments']))
s['num_comments'] = comment_count(s['comments'])
except KeyboardInterrupt:
raise
except:
pass
if not s['date']:
return False
return s
def get_sitemap_date(a):
if a.find('lastmod'):
return a.find('lastmod').text
if a.find('news:publication_date'):
return a.find('news:publication_date').text
if a.find('ns2:publication_date'):
return a.find('ns2:publication_date').text
return ''
class Sitemap(_Base):
def __init__(self, url, tz=None):
self.tz = tz
self.sitemap_url = url
def feed(self, excludes=None):
markup = xml(lambda x: self.sitemap_url)
if not markup: return []
soup = BeautifulSoup(markup, features='lxml')
sitemap = soup.find('urlset').findAll('url')
links = list(filter(None, [a if a.find('loc') else None for a in sitemap]))
links = list(filter(None, [a if get_sitemap_date(a) else None for a in links]))
links.sort(key=lambda a: unix(get_sitemap_date(a)), reverse=True)
links = [x.find('loc').text for x in links] or []
links = list(set(links))
if excludes:
links = list(filter(None, [None if any(e in link for e in excludes) else link for link in links]))
return links
class Category(_Base):
def __init__(self, url, tz=None):
self.tz = tz
self.category_url = url
self.base_url = '/'.join(url.split('/')[:3])
def feed(self, excludes=None):
markup = xml(lambda x: self.category_url)
if not markup: return []
soup = BeautifulSoup(markup, features='html.parser')
links = soup.find_all('a', href=True)
links = [link.get('href') for link in links]
links = [f"{self.base_url}{link}" if link.startswith('/') else link for link in links]
links = list(filter(None, [link if link.startswith(self.category_url) else None for link in links]))
links = list(filter(None, [link if link != self.category_url else None for link in links]))
links = list(set(links))
if excludes:
links = list(filter(None, [None if any(e in link for e in excludes) else link for link in links]))
return links
# scratchpad so I can quickly develop the parser
if __name__ == '__main__':
print("Sitemap: Stuff")
site = Sitemap("https://www.stuff.co.nz/sitemap/news/sitemap.xml")
posts = site.feed()
print(posts[:5])
print(site.story(posts[0]))
print("Category: RadioNZ Te Ao Māori")
site = Category("https://www.rnz.co.nz/news/te-manu-korihi/")
posts = site.feed()
print(posts[:5])
print(site.story(posts[0]))
print("Sitemap: Newsroom")
site = Sitemap("https://www.newsroom.co.nz/sitemap.xml")
posts = site.feed()
print(posts[:5])
print(site.story(posts[0]))

View File

@@ -32,8 +32,11 @@ def feed():
return [x.id for x in reddit.subreddit(subs).hot()] return [x.id for x in reddit.subreddit(subs).hot()]
except KeyboardInterrupt: except KeyboardInterrupt:
raise raise
except BaseException as e: except PRAWException as e:
logging.critical('Problem hitting reddit API: {}'.format(str(e))) logging.error('Problem hitting reddit API: {}'.format(str(e)))
return []
except PrawcoreException as e:
logging.error('Problem hitting reddit API: {}'.format(str(e)))
return [] return []
def comment(i): def comment(i):
@@ -56,9 +59,7 @@ def comment(i):
def story(ref): def story(ref):
try: try:
r = reddit.submission(ref) r = reddit.submission(ref)
if not r: if not r: return False
logging.info('Bad Reddit API response.')
return False
s = {} s = {}
s['author'] = r.author.name if r.author else '[Deleted]' s['author'] = r.author.name if r.author else '[Deleted]'
@@ -73,7 +74,6 @@ def story(ref):
s['num_comments'] = r.num_comments s['num_comments'] = r.num_comments
if s['score'] < 25 and s['num_comments'] < 10: if s['score'] < 25 and s['num_comments'] < 10:
logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
return False return False
if r.selftext: if r.selftext:
@@ -84,10 +84,10 @@ def story(ref):
except KeyboardInterrupt: except KeyboardInterrupt:
raise raise
except PRAWException as e: except PRAWException as e:
logging.critical('Problem hitting reddit API: {}'.format(str(e))) logging.error('Problem hitting reddit API: {}'.format(str(e)))
return False return False
except PrawcoreException as e: except PrawcoreException as e:
logging.critical('Problem hitting reddit API: {}'.format(str(e))) logging.error('Problem hitting reddit API: {}'.format(str(e)))
return False return False
# scratchpad so I can quickly develop the parser # scratchpad so I can quickly develop the parser

165
apiserver/feeds/substack.py Normal file
View File

@@ -0,0 +1,165 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
if __name__ == '__main__':
import sys
sys.path.insert(0,'.')
import requests
from datetime import datetime
from utils import clean
SUBSTACK_REFERER = 'https://substack.com'
SUBSTACK_API_TOP_POSTS = lambda x: "https://substack.com/api/v1/reader/top-posts"
def author_link(author_id, base_url):
return f"{base_url}/people/{author_id}"
def api_comments(post_id, base_url):
return f"{base_url}/api/v1/post/{post_id}/comments?all_comments=true&sort=best_first"
def api_stories(x, base_url):
return f"{base_url}/api/v1/archive?sort=new&search=&offset=0&limit=100"
def unix(date_str):
return int(datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S.%fZ').timestamp())
def api(route, ref=None, referer=None):
headers = {'Referer': referer} if referer else None
try:
r = requests.get(route(ref), headers=headers, timeout=10)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting Substack API: {}, trying again'.format(str(e)))
try:
r = requests.get(route(ref), headers=headers, timeout=20)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting Substack API: {}'.format(str(e)))
return False
def comment(i):
if 'body' not in i:
return False
c = {}
c['date'] = unix(i.get('date'))
c['author'] = i.get('name', '')
c['score'] = i.get('reactions').get('')
c['text'] = clean(i.get('body', '') or '')
c['comments'] = [comment(j) for j in i['children']]
c['comments'] = list(filter(bool, c['comments']))
return c
class Publication:
def __init__(self, domain):
self.BASE_DOMAIN = domain
def feed(self):
stories = api(lambda x: api_stories(x, self.BASE_DOMAIN), referer=self.BASE_DOMAIN)
if not stories: return []
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
return [str(i.get("id")) for i in stories or []]
def story(self, ref):
stories = api(lambda x: api_stories(x, self.BASE_DOMAIN), referer=self.BASE_DOMAIN)
if not stories: return False
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
stories = list(filter(None, [i if str(i.get('id')) == ref else None for i in stories]))
if len(stories) == 0:
return False
r = stories[0]
if not r:
return False
s = {}
s['author'] = ''
s['author_link'] = ''
s['date'] = unix(r.get('post_date'))
s['score'] = r.get('reactions').get('')
s['title'] = r.get('title', '')
s['link'] = r.get('canonical_url', '')
s['url'] = r.get('canonical_url', '')
comments = api(lambda x: api_comments(x, self.BASE_DOMAIN), r.get('id'), referer=self.BASE_DOMAIN)
s['comments'] = [comment(i) for i in comments.get('comments')]
s['comments'] = list(filter(bool, s['comments']))
s['num_comments'] = r.get('comment_count', 0)
authors = list(filter(None, [self._bylines(byline) for byline in r.get('publishedBylines')]))
if len(authors):
s['author'] = authors[0].get('name')
s['author_link'] = authors[0].get('link')
return s
def _bylines(self, b):
if 'id' not in b:
return None
a = {}
a['name'] = b.get('name')
a['link'] = author_link(b.get('id'), self.BASE_DOMAIN)
return a
class Top:
def feed(self):
stories = api(SUBSTACK_API_TOP_POSTS, referer=SUBSTACK_REFERER)
if not stories: return []
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
return [str(i.get("id")) for i in stories or []]
def story(self, ref):
stories = api(SUBSTACK_API_TOP_POSTS, referer=SUBSTACK_REFERER)
if not stories: return False
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
stories = list(filter(None, [i if str(i.get('id')) == ref else None for i in stories]))
if len(stories) == 0:
return False
r = stories[0]
if not r:
return False
s = {}
pub = r.get('pub')
base_url = pub.get('base_url')
s['author'] = pub.get('author_name')
s['author_link'] = author_link(pub.get('author_id'), base_url)
s['date'] = unix(r.get('post_date'))
s['score'] = r.get('score')
s['title'] = r.get('title', '')
s['link'] = r.get('canonical_url', '')
s['url'] = r.get('canonical_url', '')
comments = api(lambda x: api_comments(x, base_url), r.get('id'), referer=SUBSTACK_REFERER)
s['comments'] = [comment(i) for i in comments.get('comments')]
s['comments'] = list(filter(bool, s['comments']))
s['num_comments'] = r.get('comment_count', 0)
return s
top = Top()
# scratchpad so I can quickly develop the parser
if __name__ == '__main__':
top_posts = top.feed()
print(top.story(top_posts[0]))
webworm = Publication("https://www.webworm.co/")
posts = webworm.feed()
print(webworm.story(posts[0]))

View File

@@ -34,7 +34,7 @@ def api(route):
except KeyboardInterrupt: except KeyboardInterrupt:
raise raise
except BaseException as e: except BaseException as e:
logging.critical('Problem hitting tildes website: {}'.format(str(e))) logging.error('Problem hitting tildes website: {}'.format(str(e)))
return False return False
def feed(): def feed():
@@ -71,15 +71,11 @@ def story(ref):
html = api(SITE_LINK(group_lookup[ref], ref)) html = api(SITE_LINK(group_lookup[ref], ref))
else: else:
html = api(API_ITEM(ref)) html = api(API_ITEM(ref))
if not html: if not html: return False
logging.info('Bad Tildes API response.')
return False
soup = BeautifulSoup(html, features='html.parser') soup = BeautifulSoup(html, features='html.parser')
a = soup.find('article', class_='topic-full') a = soup.find('article', class_='topic-full')
if a is None: if a is None: return False
logging.info('Tildes <article> element not found.')
return False
h = a.find('header') h = a.find('header')
lu = h.find('a', class_='link-user') lu = h.find('a', class_='link-user')
@@ -87,7 +83,6 @@ def story(ref):
error = a.find('div', class_='text-error') error = a.find('div', class_='text-error')
if error: if error:
if 'deleted' in error.string or 'removed' in error.string: if 'deleted' in error.string or 'removed' in error.string:
logging.info('Article was deleted or removed.')
return False return False
s = {} s = {}
@@ -107,21 +102,7 @@ def story(ref):
ch = a.find('header', class_='topic-comments-header') ch = a.find('header', class_='topic-comments-header')
s['num_comments'] = int(ch.h2.string.split(' ')[0]) if ch else 0 s['num_comments'] = int(ch.h2.string.split(' ')[0]) if ch else 0
if s['group'].split('.')[0] not in [ if s['score'] < 8 and s['num_comments'] < 6:
'~arts',
'~comp',
'~creative',
'~design',
'~engineering',
'~finance',
'~science',
'~tech',
]:
logging.info('Group ({}) not in whitelist.'.format(s['group']))
return False
if s['score'] < 15 and s['num_comments'] < 10:
logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
return False return False
td = a.find('div', class_='topic-full-text') td = a.find('div', class_='topic-full-text')
@@ -132,7 +113,7 @@ def story(ref):
# scratchpad so I can quickly develop the parser # scratchpad so I can quickly develop the parser
if __name__ == '__main__': if __name__ == '__main__':
print(feed()) #print(feed())
#normal = story('gxt') #normal = story('gxt')
#print(normal) #print(normal)
#no_comments = story('gxr') #no_comments = story('gxr')
@@ -141,8 +122,8 @@ if __name__ == '__main__':
#print(self_post) #print(self_post)
#li_comment = story('gqx') #li_comment = story('gqx')
#print(li_comment) #print(li_comment)
#broken = story('q4y') broken = story('q4y')
#print(broken) print(broken)
# make sure there's no self-reference # make sure there's no self-reference
#import copy #import copy

View File

@@ -4,19 +4,21 @@ certifi==2020.6.20
chardet==3.0.4 chardet==3.0.4
click==7.1.2 click==7.1.2
commonmark==0.9.1 commonmark==0.9.1
extruct==0.10.0
Flask==1.1.2 Flask==1.1.2
Flask-Cors==3.0.8 Flask-Cors==3.0.8
gevent==20.6.2 gevent==20.6.2
greenlet==0.4.16 greenlet==0.4.16
humanize==4.10.0
idna==2.10 idna==2.10
itsdangerous==1.1.0 itsdangerous==1.1.0
Jinja2==2.11.2 Jinja2==2.11.2
lxml==4.6.1
MarkupSafe==1.1.1 MarkupSafe==1.1.1
packaging==20.4 packaging==20.4
praw==6.4.0 praw==6.4.0
prawcore==1.4.0 prawcore==1.4.0
pyparsing==2.4.7 pyparsing==2.4.7
pytz==2020.4
requests==2.24.0 requests==2.24.0
six==1.15.0 six==1.15.0
soupsieve==2.0.1 soupsieve==2.0.1
@@ -28,3 +30,4 @@ websocket-client==0.57.0
Werkzeug==1.0.1 Werkzeug==1.0.1
zope.event==4.4 zope.event==4.4
zope.interface==5.1.0 zope.interface==5.1.0
python-dateutil==2.8.1

View File

@@ -0,0 +1,41 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
import requests
DECLUTTER_API = 'https://declutter.1j.nz/details'
DECLUTTER_COMMENT_API = 'https://declutter.1j.nz/comments'
TIMEOUT = 30
def get_html(url):
logging.info(f"Declutter Scraper: {url}")
details = get_details(url)
if not details:
return ''
return details['content']
def get_details(url):
try:
r = requests.post(DECLUTTER_API, data=dict(url=url), timeout=TIMEOUT)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem decluttering article: {}'.format(str(e)))
return None
def get_comments(url):
try:
r = requests.post(DECLUTTER_COMMENT_API, data=dict(url=url), timeout=TIMEOUT)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem getting comments for article: {}'.format(str(e)))
return None

View File

@@ -0,0 +1,27 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
import requests
READ_API = 'http://127.0.0.1:33843/details'
TIMEOUT = 20
def get_html(url):
logging.info(f"Local Scraper: {url}")
details = get_details(url)
if not details:
return ''
return details['content']
def get_details(url):
try:
r = requests.post(READ_API, data=dict(url=url), timeout=TIMEOUT)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem getting article: {}'.format(str(e)))
return None

View File

@@ -0,0 +1,37 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
import requests
OUTLINE_REFERER = 'https://outline.com/'
OUTLINE_API = 'https://api.outline.com/v3/parse_article'
TIMEOUT = 20
def get_html(url):
details = get_details(url)
if not details:
return ''
return details['html']
def get_details(url):
try:
logging.info(f"Outline Scraper: {url}")
params = {'source_url': url}
headers = {'Referer': OUTLINE_REFERER}
r = requests.get(OUTLINE_API, params=params, headers=headers, timeout=TIMEOUT)
if r.status_code == 429:
logging.info('Rate limited by outline, sleeping 30s and skipping...')
time.sleep(30)
return None
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
data = r.json()['data']
if 'URL is not supported by Outline' in data['html']:
raise Exception('URL not supported by Outline')
return data
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem outlining article: {}'.format(str(e)))
return None

View File

@@ -1,58 +0,0 @@
import time
import json
import logging
import feed
import database
import search
database.init()
def fix_gzip_bug(story_list):
FIX_THRESHOLD = 150
count = 1
for sid in story_list:
try:
sid = sid[0]
story = database.get_story(sid)
full_json = json.loads(story.full_json)
meta_json = json.loads(story.meta_json)
text = full_json.get('text', '')
count = text.count('<EFBFBD>')
if not count: continue
ratio = count / len(text) * 1000
print('Bad story:', sid, 'Num ?:', count, 'Ratio:', ratio)
if ratio < FIX_THRESHOLD: continue
print('Attempting to fix...')
valid = feed.update_story(meta_json, is_manual=True)
if valid:
database.put_story(meta_json)
search.put_story(meta_json)
print('Success')
else:
print('Story was not valid')
time.sleep(3)
except KeyboardInterrupt:
raise
except BaseException as e:
logging.exception(e)
breakpoint()
if __name__ == '__main__':
num_stories = database.count_stories()
print('Fix {} stories?'.format(num_stories))
print('Press ENTER to continue, ctrl-c to cancel')
input()
story_list = database.get_story_list()
fix_gzip_bug(story_list)

View File

@@ -1,62 +0,0 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.INFO)
import database
from sqlalchemy import select
import search
import sys
import time
import json
import requests
database.init()
search.init()
BATCH_SIZE = 5000
def put_stories(stories):
return search.meili_api(requests.post, 'indexes/qotnews/documents', stories)
def get_update(update_id):
return search.meili_api(requests.get, 'tasks/{}'.format(update_id))
if __name__ == '__main__':
num_stories = database.count_stories()
print('Reindex {} stories?'.format(num_stories))
print('Press ENTER to continue, ctrl-c to cancel')
input()
story_list = database.get_story_list()
count = 1
while len(story_list):
stories = []
for _ in range(BATCH_SIZE):
try:
sid = story_list.pop()
except IndexError:
break
story = database.get_story(sid)
print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title))
story_obj = json.loads(story.meta_json)
stories.append(story_obj)
count += 1
res = put_stories(stories)
update_id = res['uid']
print('Waiting for processing', end='')
while get_update(update_id)['status'] != 'succeeded':
time.sleep(0.5)
print('.', end='', flush=True)
print()
print('Done.')

View File

@@ -1,23 +0,0 @@
import time
import requests
def test_search_api():
num_tests = 100
total_time = 0
for i in range(num_tests):
start = time.time()
res = requests.get('http://127.0.0.1:33842/api/search?q=iphone')
res.raise_for_status()
duration = time.time() - start
total_time += duration
avg_time = total_time / num_tests
print('Average search time:', avg_time)
if __name__ == '__main__':
test_search_api()

View File

@@ -4,62 +4,83 @@ logging.basicConfig(
level=logging.DEBUG) level=logging.DEBUG)
import requests import requests
import settings
SEARCH_ENABLED = bool(settings.MEILI_URL) MEILI_URL = 'http://127.0.0.1:7700/'
def meili_api(method, route, json=None, params=None, parse_json=True): def create_index():
try: try:
r = method(settings.MEILI_URL + route, json=json, params=params, timeout=4) json = dict(name='qotnews', uid='qotnews')
if r.status_code > 299: r = requests.post(MEILI_URL + 'indexes', json=json, timeout=2)
if r.status_code != 201:
raise Exception('Bad response code ' + str(r.status_code)) raise Exception('Bad response code ' + str(r.status_code))
if parse_json:
return r.json() return r.json()
else:
r.encoding = 'utf-8'
return r.text
except KeyboardInterrupt: except KeyboardInterrupt:
raise raise
except BaseException as e: except BaseException as e:
logging.error('Problem with MeiliSearch api route: %s: %s', route, str(e)) logging.error('Problem creating MeiliSearch index: {}'.format(str(e)))
return False return False
def create_index():
json = dict(uid='qotnews', primaryKey='id')
return meili_api(requests.post, 'indexes', json=json)
def update_rankings(): def update_rankings():
json = ['typo', 'words', 'proximity', 'date:desc', 'exactness'] try:
return meili_api(requests.post, 'indexes/qotnews/settings/ranking-rules', json=json) json = ['typo', 'words', 'proximity', 'attribute', 'desc(date)', 'wordsPosition', 'exactness']
r = requests.post(MEILI_URL + 'indexes/qotnews/settings/ranking-rules', json=json, timeout=2)
if r.status_code != 202:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem setting MeiliSearch ranking rules: {}'.format(str(e)))
return False
def update_attributes(): def update_attributes():
json = ['title', 'url', 'author'] try:
r = meili_api(requests.post, 'indexes/qotnews/settings/searchable-attributes', json=json) json = ['title', 'url', 'author', 'link', 'id']
json = ['id', 'ref', 'source', 'author', 'author_link', 'score', 'date', 'title', 'link', 'url', 'num_comments'] r = requests.post(MEILI_URL + 'indexes/qotnews/settings/searchable-attributes', json=json, timeout=2)
r = meili_api(requests.post, 'indexes/qotnews/settings/displayed-attributes', json=json) if r.status_code != 202:
return r raise Exception('Bad response code ' + str(r.status_code))
requests.delete(MEILI_URL + 'indexes/qotnews/settings/displayed-attributes', timeout=2)
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem setting MeiliSearch searchable attributes: {}'.format(str(e)))
return False
def init(): def init():
if not SEARCH_ENABLED: create_index()
logging.info('Search is not enabled, skipping init.')
return
print(create_index())
update_rankings() update_rankings()
update_attributes() update_attributes()
def put_story(story): def put_story(story):
if not SEARCH_ENABLED: return story = story.copy()
return meili_api(requests.post, 'indexes/qotnews/documents', [story]) story.pop('text', None)
story.pop('comments', None)
try:
r = requests.post(MEILI_URL + 'indexes/qotnews/documents', json=[story], timeout=2)
if r.status_code != 202:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem putting MeiliSearch story: {}'.format(str(e)))
return False
def search(q): def search(q):
if not SEARCH_ENABLED: return [] try:
params = dict(q=q, limit=settings.FEED_LENGTH) params = dict(q=q, limit=250)
r = meili_api(requests.get, 'indexes/qotnews/search', params=params, parse_json=False) r = requests.get(MEILI_URL + 'indexes/qotnews/search', params=params, timeout=2)
return r if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()['hits']
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem searching MeiliSearch: {}'.format(str(e)))
return False
if __name__ == '__main__': if __name__ == '__main__':
init() create_index()
print(update_rankings()) print(search('the'))
print(search('facebook'))

View File

@@ -1,8 +1,7 @@
import os, logging import logging
DEBUG = os.environ.get('DEBUG')
logging.basicConfig( logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG if DEBUG else logging.INFO) level=logging.INFO)
import gevent import gevent
from gevent import monkey from gevent import monkey
@@ -14,146 +13,53 @@ import json
import threading import threading
import traceback import traceback
import time import time
import datetime
import humanize
import urllib.request
from urllib.parse import urlparse, parse_qs from urllib.parse import urlparse, parse_qs
import settings import settings
import database import database
import search import search
import feed import feed
from utils import gen_rand_id, NUM_ID_CHARS from utils import gen_rand_id
from flask import abort, Flask, request, render_template, stream_with_context, Response from flask import abort, Flask, request, render_template, stream_with_context, Response
from werkzeug.exceptions import NotFound from werkzeug.exceptions import NotFound
from flask_cors import CORS from flask_cors import CORS
smallweb_set = set()
def load_smallweb_list():
EXCLUDED = [
'github.com',
]
global smallweb_set
try:
url = 'https://raw.githubusercontent.com/kagisearch/smallweb/refs/heads/main/smallweb.txt'
with urllib.request.urlopen(url, timeout=10) as response:
urls = response.read().decode('utf-8').splitlines()
hosts = {urlparse(u).hostname for u in urls if u and urlparse(u).hostname}
smallweb_set = {h.replace('www.', '') for h in hosts if h not in EXCLUDED}
logging.info('Loaded {} smallweb domains.'.format(len(smallweb_set)))
except Exception as e:
logging.error('Failed to load smallweb list: {}'.format(e))
load_smallweb_list()
database.init() database.init()
search.init() search.init()
news_index = 0
ref_list = []
current_item = {}
def new_id(): def new_id():
nid = gen_rand_id() nid = gen_rand_id()
while database.get_story(nid): while database.get_story(nid):
nid = gen_rand_id() nid = gen_rand_id()
return nid return nid
build_folder = '../webclient/build'
def fromnow(ts):
return humanize.naturaltime(datetime.datetime.fromtimestamp(ts))
build_folder = './build'
flask_app = Flask(__name__, template_folder=build_folder, static_folder=build_folder, static_url_path='') flask_app = Flask(__name__, template_folder=build_folder, static_folder=build_folder, static_url_path='')
flask_app.jinja_env.filters['fromnow'] = fromnow
cors = CORS(flask_app) cors = CORS(flask_app)
@flask_app.route('/api') @flask_app.route('/api')
def api(): def api():
skip = request.args.get('skip', 0) stories = database.get_stories(settings.MAX_STORY_AGE)
limit = request.args.get('limit', settings.FEED_LENGTH) res = Response(json.dumps({"stories": stories}))
if request.args.get('smallweb') == 'true' and smallweb_set:
limit = int(limit)
skip = int(skip)
filtered_stories = []
current_skip = skip
while len(filtered_stories) < limit:
stories_batch = database.get_stories(limit, current_skip)
if not stories_batch:
break
for story_str in stories_batch:
story = json.loads(story_str)
story_url = story.get('url') or story.get('link') or ''
if not story_url:
continue
hostname = urlparse(story_url).hostname
if hostname:
hostname = hostname.replace('www.', '')
if hostname in smallweb_set:
filtered_stories.append(story_str)
if len(filtered_stories) == limit:
break
if len(filtered_stories) == limit:
break
current_skip += limit
stories = filtered_stories
else:
stories = database.get_stories(limit, skip)
# hacky nested json
res = Response('{"stories":[' + ','.join(stories) + ']}')
res.headers['content-type'] = 'application/json' res.headers['content-type'] = 'application/json'
return res return res
@flask_app.route('/api/stats', strict_slashes=False)
def apistats():
stats = {
'news_index': news_index,
'ref_list': ref_list,
'len_ref_list': len(ref_list),
'current_item': current_item,
'total_stories': database.count_stories(),
'id_space': 26**NUM_ID_CHARS,
}
return stats
@flask_app.route('/api/search', strict_slashes=False) @flask_app.route('/api/search', strict_slashes=False)
def apisearch(): def apisearch():
q = request.args.get('q', '') q = request.args.get('q', '')
if len(q) >= 3: if len(q) >= 3:
results = search.search(q) results = search.search(q)
else: else:
results = '[]' results = []
res = Response(results) return dict(results=results)
res.headers['content-type'] = 'application/json'
return res
@flask_app.route('/api/submit', methods=['POST'], strict_slashes=False) @flask_app.route('/api/submit', methods=['POST'], strict_slashes=False)
def submit(): def submit():
try: try:
url = request.form['url'] url = request.form['url']
for prefix in ['http://', 'https://']:
if url.lower().startswith(prefix):
break
else: # for
url = 'http://' + url
nid = new_id() nid = new_id()
logging.info('Manual submission: ' + url)
parse = urlparse(url) parse = urlparse(url)
if 'news.ycombinator.com' in parse.hostname: if 'news.ycombinator.com' in parse.hostname:
source = 'hackernews' source = 'hackernews'
@@ -161,9 +67,6 @@ def submit():
elif 'tildes.net' in parse.hostname and '~' in url: elif 'tildes.net' in parse.hostname and '~' in url:
source = 'tildes' source = 'tildes'
ref = parse.path.split('/')[2] ref = parse.path.split('/')[2]
elif 'lobste.rs' in parse.hostname and '/s/' in url:
source = 'lobsters'
ref = parse.path.split('/')[2]
elif 'reddit.com' in parse.hostname and 'comments' in url: elif 'reddit.com' in parse.hostname and 'comments' in url:
source = 'reddit' source = 'reddit'
ref = parse.path.split('/')[4] ref = parse.path.split('/')[4]
@@ -174,11 +77,6 @@ def submit():
ref = url ref = url
existing = database.get_story_by_ref(ref) existing = database.get_story_by_ref(ref)
if existing and DEBUG:
ref = ref + '#' + str(time.time())
existing = False
if existing: if existing:
return {'nid': existing.sid} return {'nid': existing.sid}
else: else:
@@ -187,28 +85,23 @@ def submit():
if valid: if valid:
database.put_story(story) database.put_story(story)
search.put_story(story) search.put_story(story)
if DEBUG:
logging.info('Adding manual ref: {}, id: {}, source: {}'.format(ref, nid, source))
database.put_ref(ref, nid, source)
return {'nid': nid} return {'nid': nid}
else: else:
raise Exception('Invalid article') raise Exception('Invalid article')
except Exception as e: except BaseException as e:
msg = 'Problem with article submission: {} - {}'.format(e.__class__.__name__, str(e)) logging.error('Problem with article submission: {} - {}'.format(e.__class__.__name__, str(e)))
logging.error(msg)
print(traceback.format_exc()) print(traceback.format_exc())
return {'error': msg.split('\n')[0]}, 400 abort(400)
@flask_app.route('/api/<sid>') @flask_app.route('/api/<sid>')
def story(sid): def story(sid):
story = database.get_story(sid) story = database.get_story(sid)
if story: if story:
# hacky nested json related = database.get_stories_by_url(story.meta['url'])
res = Response('{"story":' + story.full_json + '}') related = [r.meta for r in related]
res = Response(json.dumps({"story": story.data, "related": related}))
res.headers['content-type'] = 'application/json' res.headers['content-type'] = 'application/json'
return res return res
else: else:
@@ -217,19 +110,10 @@ def story(sid):
@flask_app.route('/') @flask_app.route('/')
@flask_app.route('/search') @flask_app.route('/search')
def index(): def index():
stories_json = database.get_stories(settings.FEED_LENGTH, 0)
stories = [json.loads(s) for s in stories_json]
for s in stories:
url = urlparse(s.get('url') or s.get('link') or '').hostname or ''
s['hostname'] = url.replace('www.', '')
return render_template('index.html', return render_template('index.html',
title='QotNews', title='Feed',
url='news.t0.vc', url='news.t0.vc',
description='Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode', description='Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode')
robots='index',
stories=stories,
)
@flask_app.route('/<sid>', strict_slashes=False) @flask_app.route('/<sid>', strict_slashes=False)
@flask_app.route('/<sid>/c', strict_slashes=False) @flask_app.route('/<sid>/c', strict_slashes=False)
@@ -239,9 +123,9 @@ def static_story(sid):
except NotFound: except NotFound:
pass pass
story_obj = database.get_story(sid) story = database.get_story(sid)
if not story_obj: return abort(404) if not story: return abort(404)
story = json.loads(story_obj.full_json) story = story.data
score = story['score'] score = story['score']
num_comments = story['num_comments'] num_comments = story['num_comments']
@@ -250,77 +134,69 @@ def static_story(sid):
score, 's' if score != 1 else '', score, 's' if score != 1 else '',
num_comments, 's' if num_comments != 1 else '', num_comments, 's' if num_comments != 1 else '',
source) source)
url = urlparse(story.get('url') or story.get('link') or '').hostname or '' url = urlparse(story['url']).hostname or urlparse(story['link']).hostname or ''
url = url.replace('www.', '') url = url.replace('www.', '')
return render_template('index.html', return render_template('index.html',
title=story['title'] + ' | QotNews', title=story['title'],
url=url, url=url,
description=description, description=description)
robots='noindex',
story=story,
show_comments=request.path.endswith('/c'),
)
http_server = WSGIServer(('', 33842), flask_app) http_server = WSGIServer(('', 33842), flask_app)
def feed_thread(): def _add_new_refs():
global news_index, ref_list, current_item for ref, source in feed.get_list():
try:
while True:
# onboard new stories
if news_index == 0:
for ref, source in feed.list():
if database.get_story_by_ref(ref): if database.get_story_by_ref(ref):
continue continue
try: try:
nid = new_id() nid = new_id()
logging.info('Adding ref: {}, id: {}, source: {}'.format(ref, nid, source))
database.put_ref(ref, nid, source) database.put_ref(ref, nid, source)
logging.info('Added ref ' + ref)
except database.IntegrityError: except database.IntegrityError:
logging.info('Already have ID / ref, skipping.')
continue continue
ref_list = database.get_reflist(settings.FEED_LENGTH) def _update_current_story(item):
# update current stories
if news_index < len(ref_list):
current_item = ref_list[news_index]
try: try:
story_json = database.get_story(current_item['sid']).full_json story = database.get_story(item['sid']).data
story = json.loads(story_json)
except AttributeError: except AttributeError:
story = dict(id=current_item['sid'], ref=current_item['ref'], source=current_item['source']) story = dict(id=item['sid'], ref=item['ref'], source=item['source'])
logging.info('Updating {} story: {}, index: {}'.format(story['source'], story['ref'], news_index)) logging.info('Updating story: {}'.format(str(story['ref'])))
valid = feed.update_story(story) valid = feed.update_story(story)
if valid: if valid:
database.put_story(story) database.put_story(story)
search.put_story(story) search.put_story(story)
else: else:
database.del_ref(current_item['ref']) database.del_ref(item['ref'])
logging.info('Removed ref {}'.format(current_item['ref'])) logging.info('Removed ref {}'.format(item['ref']))
else:
logging.info('Skipping index: ' + str(news_index)) def feed_thread():
ref_list = []
try:
while True:
# onboard new stories
if not len(ref_list):
_add_new_refs()
ref_list = database.get_reflist()
# update current stories
if len(ref_list):
item = ref_list.pop(0)
_update_current_story(item)
gevent.sleep(6) gevent.sleep(6)
news_index += 1
if news_index == settings.FEED_LENGTH: news_index = 0
except KeyboardInterrupt: except KeyboardInterrupt:
logging.info('Ending feed thread...') logging.info('Ending feed thread...')
except ValueError as e: except ValueError as e:
logging.critical('feed_thread error: {} {}'.format(e.__class__.__name__, e)) logging.error('feed_thread error: {} {}'.format(e.__class__.__name__, e))
http_server.stop() http_server.stop()
logging.info('Starting Feed thread...') print('Starting Feed thread...')
gevent.spawn(feed_thread) gevent.spawn(feed_thread)
logging.info('Starting HTTP thread...') print('Starting HTTP thread...')
try: try:
http_server.serve_forever() http_server.serve_forever()
except KeyboardInterrupt: except KeyboardInterrupt:

View File

@@ -1,23 +1,28 @@
# QotNews settings # QotNews settings
# edit this file and save it as settings.py # edit this file and save it as settings.py
MAX_STORY_AGE = 3*24*60*60
# Feed Lengths # Feed Lengths
# Number of top items from each site to pull # Number of top items from each site to pull
# set to 0 to disable that site # set to 0 to disable that site
FEED_LENGTH = 75
NUM_HACKERNEWS = 15 NUM_HACKERNEWS = 15
NUM_LOBSTERS = 10 NUM_REDDIT = 10
NUM_REDDIT = 15
NUM_TILDES = 5 NUM_TILDES = 5
NUM_SUBSTACK = 10
# Meilisearch server URL SITEMAP = {}
# Leave blank if not using search # SITEMAP['nzherald'] = { 'url': "https://www.nzherald.co.nz/arcio/news-sitemap/", 'count': 10},
#MEILI_URL = 'http://127.0.0.1:7700/' # SITEMAP['stuff'] = { 'url': "https://www.stuff.co.nz/sitemap.xml", 'count': 10},
MEILI_URL = ''
# Readerserver URL SUBSTACK = {}
# Leave blank if not using, but that defeats the whole point # SUBSTACK['webworm'] = { 'url': "https://www.webworm.co", 'count': 10},
READER_URL = 'http://127.0.0.1:33843/' # SUBSTACK['the bulletin'] = { 'url': "https://thespinoff.substack.com", 'count': 10},
CATEGORY = {}
# CATEGORY['rnz national'] = { 'url': "https://www.rnz.co.nz/news/national", 'count': 10},
SCRAPERS = ['declutter', 'outline', 'local']
# Reddit account info # Reddit account info
# leave blank if not using Reddit # leave blank if not using Reddit
@@ -33,9 +38,13 @@ SUBREDDITS = [
'HistoryofIdeas', 'HistoryofIdeas',
'LaymanJournals', 'LaymanJournals',
'PhilosophyofScience', 'PhilosophyofScience',
'PoliticsPDFs',
'Scholar',
'StateOfTheUnion', 'StateOfTheUnion',
'TheAgora', 'TheAgora',
'TrueFilm',
'TrueReddit', 'TrueReddit',
'UniversityofReddit',
'culturalstudies', 'culturalstudies',
'hardscience', 'hardscience',
'indepthsports', 'indepthsports',
@@ -44,7 +53,4 @@ SUBREDDITS = [
'neurophilosophy', 'neurophilosophy',
'resilientcommunities', 'resilientcommunities',
'worldevents', 'worldevents',
'StallmanWasRight',
'EverythingScience',
'longevity',
] ]

View File

@@ -8,17 +8,8 @@ import string
from bleach.sanitizer import Cleaner from bleach.sanitizer import Cleaner
def alert_tanner(message):
try:
logging.info('Alerting Tanner: ' + message)
params = dict(qotnews=message)
requests.get('https://tbot.tannercollin.com/message', params=params, timeout=4)
except BaseException as e:
logging.error('Problem alerting Tanner: ' + str(e))
NUM_ID_CHARS = 4
def gen_rand_id(): def gen_rand_id():
return ''.join(random.choice(string.ascii_uppercase) for _ in range(NUM_ID_CHARS)) return ''.join(random.choice(string.ascii_uppercase) for _ in range(4))
def render_md(md): def render_md(md):
if md: if md:

View File

@@ -0,0 +1,4 @@
module.exports.headers = {
'User-Agent': 'Googlebot/2.1 (+http://www.google.com/bot.html)',
'X-Forwarded-For': '66.249.66.1',
};

View File

@@ -1,53 +1,29 @@
const port = 33843;
const express = require('express'); const express = require('express');
const app = express(); const app = express();
const port = 33843; const simple = require('./scraper/simple');
const request = require('request');
const JSDOM = require('jsdom').JSDOM;
const { Readability } = require('readability');
app.use(express.urlencoded({ extended: true })); app.use(express.urlencoded({ extended: true }));
app.get('/', (req, res) => { app.get('/', (req, res) => {
res.send('<form method="POST" accept-charset="UTF-8"><input name="url"><button type="submit">SUBMIT</button></form>'); // const routes = ['/', '/details', '/browser', '/browser/details', '/browser/comments'];
}); const routes = ['/', '/details'];
const requestCallback = (url, res) => (error, response, body) => { const html = routes.map(route => `
if (!error && response.statusCode == 200) { <form method="POST" action="${route}" accept-charset="UTF-8">
console.log('Response OK.'); <fieldset>
<legend>route: POST ${route}</legend>
const doc = new JSDOM(body, {url: url}); <input name="url">
const reader = new Readability(doc.window.document); <button type="submit">SUBMIT</button>
const article = reader.parse(); </fieldset>
</form>`).join('<hr />');
if (article && article.content) { res.send(html);
res.send(article.content);
} else {
res.sendStatus(404);
}
} else {
console.log('Response error:', error ? error.toString() : response.statusCode);
res.sendStatus(response ? response.statusCode : 404);
}
};
app.post('/', (req, res) => {
const url = req.body.url;
const requestOptions = {
url: url,
gzip: true,
//headers: {'User-Agent': 'Googlebot/2.1 (+http://www.google.com/bot.html)'},
//headers: {'User-Agent': 'Twitterbot/1.0'},
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0',
'X-Forwarded-For': '66.249.66.1',
},
};
console.log('Parse request for:', url);
request(requestOptions, requestCallback(url, res));
}); });
app.post('/', simple.scrape);
app.post('/details', simple.details);
// app.post('/browser', browser.scrape);
// app.post('/browser/details', browser.details);
// app.post('/browser/comments', browser.comments);
app.listen(port, () => { app.listen(port, () => {
console.log(`Example app listening on port ${port}!`); console.log(`Example app listening on port ${port}!`);

View File

@@ -0,0 +1,41 @@
const request = require('request');
const JSDOM = require('jsdom').JSDOM;
const { Readability } = require('readability');
const { headers } = require('../constants');
const options = url => ({
url,
headers,
});
const extract = (url, body) => {
const doc = new JSDOM(body, { url: url });
const reader = new Readability(doc.window.document);
return reader.parse();
};
module.exports.scrape = (req, res) => request(options(req.body.url), (error, response, body) => {
if (error || response.statusCode != 200) {
console.log('Response error:', error ? error.toString() : response.statusCode);
return res.sendStatus(response ? response.statusCode : 404);
}
const article = extract(req.body.url, body);
if (article && article.content) {
return res.send(article.content);
}
return res.sendStatus(404);
});
module.exports.details = (req, res) => request(options(req.body.url), (error, response, body) => {
if (error || response.statusCode != 200) {
console.log('Response error:', error ? error.toString() : response.statusCode);
return res.sendStatus(response ? response.statusCode : 404);
}
const article = extract(req.body.url, body);
if (article) {
return res.send(article);
}
return res.sendStatus(404);
});

File diff suppressed because it is too large Load Diff

View File

@@ -3,7 +3,7 @@
Download MeiliSearch with: Download MeiliSearch with:
``` ```
wget https://github.com/meilisearch/meilisearch/releases/download/v0.27.0/meilisearch-linux-amd64 wget https://github.com/meilisearch/MeiliSearch/releases/download/v0.11.1/meilisearch-linux-amd64
chmod +x meilisearch-linux-amd64 chmod +x meilisearch-linux-amd64
``` ```

View File

Before

Width:  |  Height:  |  Size: 538 B

After

Width:  |  Height:  |  Size: 538 B

View File

Before

Width:  |  Height:  |  Size: 6.5 KiB

After

Width:  |  Height:  |  Size: 6.5 KiB

View File

Before

Width:  |  Height:  |  Size: 5.4 KiB

After

Width:  |  Height:  |  Size: 5.4 KiB

View File

Before

Width:  |  Height:  |  Size: 500 B

After

Width:  |  Height:  |  Size: 500 B

View File

@@ -4,14 +4,12 @@
"private": true, "private": true,
"dependencies": { "dependencies": {
"abort-controller": "^3.0.0", "abort-controller": "^3.0.0",
"katex": "^0.16.25",
"localforage": "^1.7.3", "localforage": "^1.7.3",
"moment": "^2.24.0", "moment": "^2.24.0",
"query-string": "^6.8.3", "query-string": "^6.8.3",
"react": "^16.9.0", "react": "^16.9.0",
"react-dom": "^16.9.0", "react-dom": "^16.9.0",
"react-helmet": "^5.2.1", "react-helmet": "^5.2.1",
"react-latex-next": "^3.0.0",
"react-router-dom": "^5.0.1", "react-router-dom": "^5.0.1",
"react-router-hash-link": "^1.2.2", "react-router-hash-link": "^1.2.2",
"react-scripts": "3.1.1" "react-scripts": "3.1.1"

View File

@@ -8,8 +8,6 @@
content="{{ description }}" content="{{ description }}"
/> />
<meta content="{{ url }}" name="og:site_name"> <meta content="{{ url }}" name="og:site_name">
<meta name="robots" content="{{ robots }}">
<link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png"> <link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
<link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">
@@ -28,112 +26,26 @@
work correctly both with client-side routing and a non-root public URL. work correctly both with client-side routing and a non-root public URL.
Learn how to configure a non-root public URL by running `npm run build`. Learn how to configure a non-root public URL by running `npm run build`.
--> -->
<title>{{ title }}</title> <title>{{ title }} - QotNews</title>
<style> <style>
html { html {
overflow-y: scroll; overflow-y: scroll;
} }
body { body {
background: #eeeeee; background: #000;
}
.nojs {
color: white;
} }
</style> </style>
</head> </head>
<body> <body>
<div id="root"> <div class="nojs">
<div class="container menu"> <noscript>You need to enable JavaScript to run this app.</noscript>
<p>
<a href="/">QotNews</a>
<br />
<span class="slogan">Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode.</span>
</p>
</div>
{% if story %}
<div class="{% if show_comments %}container{% else %}article-container{% endif %}">
<div class="article">
<h1>{{ story.title }}</h1>
{% if show_comments %}
<div class="info">
<a href="/{{ story.id }}">View article</a>
</div>
{% else %}
<div class="info">
Source: <a class="source" href="{{ story.url or story.link }}">{{ url }}</a>
</div>
{% endif %}
<div class="info">
{{ story.score }} points
by <a href="{{ story.author_link }}">{{ story.author }}</a>
{{ story.date | fromnow }}
on <a href="{{ story.link }}">{{ story.source }}</a> |
<a href="/{{ story.id }}/c">
{{ story.num_comments }} comment{{ 's' if story.num_comments != 1 }}
</a>
</div>
{% if not show_comments and story.text %}
<div class="story-text">{{ story.text | safe }}</div>
{% elif show_comments %}
{% macro render_comment(comment, level) %}
<dt></dt>
<dd class="comment{% if level > 0 %} lined{% endif %}">
<div class="info">
<p>
{% if comment.author == story.author %}[OP] {% endif %}{{ comment.author or '[Deleted]' }} | <a href="#{{ comment.author }}{{ comment.date }}" id="{{ comment.author }}{{ comment.date }}">{{ comment.date | fromnow }}</a>
</p>
</div>
<div class="text">{{ (comment.text | safe) if comment.text else '<p>[Empty / deleted comment]</p>' }}</div>
{% if comment.comments %}
<dl>
{% for reply in comment.comments %}
{{ render_comment(reply, level + 1) }}
{% endfor %}
</dl>
{% endif %}
</dd>
{% endmacro %}
<dl class="comments">
{% for comment in story.comments %}{{ render_comment(comment, 0) }}{% endfor %}
</dl>
{% endif %}
</div>
<div class='dot toggleDot'>
<div class='button'>
<a href="/{{ story.id }}{{ '/c' if not show_comments else '' }}">
{{ '' if not show_comments else '' }}
</a>
</div>
</div>
</div>
{% elif stories %}
<div class="container">
{% for story in stories %}
<div class='item'>
<div class='title'>
<a class='link' href='/{{ story.id }}'>
<img class='source-logo' src='/logos/{{ story.source }}.png' alt='{{ story.source }}:' /> {{ story.title }}
</a>
<span class='source'>
(<a class='source' href='{{ story.url or story.link }}'>{{ story.hostname }}</a>)
</span>
</div>
<div class='info'>
{{ story.score }} points
by <a href="{{ story.author_link }}">{{ story.author }}</a>
{{ story.date | fromnow }}
on <a href="{{ story.link }}">{{ story.source }}</a> |
<a class="{{ 'hot' if story.num_comments > 99 else '' }}" href="/{{ story.id }}/c">
{{ story.num_comments }} comment{{ 's' if story.num_comments != 1 }}
</a>
</div>
</div>
{% endfor %}
</div>
{% endif %}
</div> </div>
<div id="root"></div>
<!-- <!--
This HTML file is a template. This HTML file is a template.
If you open it directly in the browser, you will see an empty page. If you open it directly in the browser, you will see an empty page.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 981 B

View File

@@ -1,12 +1,10 @@
import React, { useState, useEffect, useRef, useCallback } from 'react'; import React from 'react';
import { BrowserRouter as Router, Route, Link, Switch } from 'react-router-dom'; import { BrowserRouter as Router, Route, Link, Switch } from 'react-router-dom';
import localForage from 'localforage'; import localForage from 'localforage';
import './Style-light.css'; import './Style-light.css';
import './Style-dark.css'; import './Style-dark.css';
import './Style-black.css';
import './Style-red.css';
import './fonts/Fonts.css'; import './fonts/Fonts.css';
import { BackwardDot, ForwardDot } from './utils.js'; import { ForwardDot } from './utils.js';
import Feed from './Feed.js'; import Feed from './Feed.js';
import Article from './Article.js'; import Article from './Article.js';
import Comments from './Comments.js'; import Comments from './Comments.js';
@@ -15,109 +13,65 @@ import Submit from './Submit.js';
import Results from './Results.js'; import Results from './Results.js';
import ScrollToTop from './ScrollToTop.js'; import ScrollToTop from './ScrollToTop.js';
function App() { class App extends React.Component {
const [theme, setTheme] = useState(localStorage.getItem('theme') || ''); constructor(props) {
const cache = useRef({}); super(props);
const [isFullScreen, setIsFullScreen] = useState(!!document.fullscreenElement);
const updateCache = useCallback((key, value) => { this.state = {
cache.current[key] = value; theme: localStorage.getItem('theme') || '',
}, []); };
const light = () => { this.cache = {};
setTheme(''); }
updateCache = (key, value) => {
this.cache[key] = value;
}
light() {
this.setState({ theme: '' });
localStorage.setItem('theme', ''); localStorage.setItem('theme', '');
}; }
const dark = () => { dark() {
setTheme('dark'); this.setState({ theme: 'dark' });
localStorage.setItem('theme', 'dark'); localStorage.setItem('theme', 'dark');
}; }
const black = () => { componentDidMount() {
setTheme('black'); if (!this.cache.length) {
localStorage.setItem('theme', 'black');
};
const red = () => {
setTheme('red');
localStorage.setItem('theme', 'red');
};
useEffect(() => {
if (Object.keys(cache.current).length === 0) {
localForage.iterate((value, key) => { localForage.iterate((value, key) => {
updateCache(key, value); this.updateCache(key, value);
}).then(() => {
console.log('loaded cache from localforage');
}); });
console.log('loaded cache from localforage');
} }
}, [updateCache]);
const goFullScreen = () => {
if ('wakeLock' in navigator) {
navigator.wakeLock.request('screen');
} }
document.body.requestFullscreen({ navigationUI: 'hide' });
};
const exitFullScreen = () => { render() {
document.exitFullscreen(); const theme = this.state.theme;
}; document.body.style.backgroundColor = theme === 'dark' ? '#000' : '#eeeeee';
useEffect(() => {
const onFullScreenChange = () => setIsFullScreen(!!document.fullscreenElement);
document.addEventListener('fullscreenchange', onFullScreenChange);
return () => document.removeEventListener('fullscreenchange', onFullScreenChange);
}, []);
useEffect(() => {
if (theme === 'dark') {
document.body.style.backgroundColor = '#1a1a1a';
} else if (theme === 'black') {
document.body.style.backgroundColor = '#000';
} else if (theme === 'red') {
document.body.style.backgroundColor = '#000';
} else {
document.body.style.backgroundColor = '#eeeeee';
}
}, [theme]);
const fullScreenAvailable = document.fullscreenEnabled ||
document.mozFullscreenEnabled ||
document.webkitFullscreenEnabled ||
document.msFullscreenEnabled;
return ( return (
<div className={theme}> <div className={theme}>
<Router> <Router>
<div className='container menu'> <div className='container menu'>
<p> <p>
<Link to='/'>QotNews</Link> <Link to='/'>QotNews - Feed</Link>
<span className='theme'>Theme: <a href='#' onClick={() => this.light()}>Light</a> - <a href='#' onClick={() => this.dark()}>Dark</a></span>
<span className='theme'><a href='#' onClick={() => light()}>Light</a> - <a href='#' onClick={() => dark()}>Dark</a> - <a href='#' onClick={() => black()}>Black</a> - <a href='#' onClick={() => red()}>Red</a></span>
<br /> <br />
<span className='slogan'>Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode.</span> <span className='slogan'>Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode.</span>
</p> </p>
{fullScreenAvailable &&
<Route path='/(|search)' render={() => !isFullScreen ?
<button className='fullscreen' onClick={() => goFullScreen()}>Enter Fullscreen</button>
:
<button className='fullscreen' onClick={() => exitFullScreen()}>Exit Fullscreen</button>
} />
}
<Route path='/(|search)' component={Search} /> <Route path='/(|search)' component={Search} />
<Route path='/(|search)' component={Submit} /> <Route path='/(|search)' component={Submit} />
</div> </div>
<Route path='/' exact render={(props) => <Feed {...props} updateCache={updateCache} />} /> <Route path='/' exact render={(props) => <Feed {...props} updateCache={this.updateCache} />} />
<Switch> <Switch>
<Route path='/search' component={Results} /> <Route path='/search' component={Results} />
<Route path='/:id' exact render={(props) => <Article {...props} cache={cache.current} />} /> <Route path='/:id' exact render={(props) => <Article {...props} cache={this.cache} />} />
</Switch> </Switch>
<Route path='/:id/c' exact render={(props) => <Comments {...props} cache={cache.current} />} /> <Route path='/:id/c' exact render={(props) => <Comments {...props} cache={this.cache} />} />
<BackwardDot />
<ForwardDot /> <ForwardDot />
<ScrollToTop /> <ScrollToTop />
@@ -125,5 +79,6 @@ function App() {
</div> </div>
); );
} }
}
export default App; export default App;

View File

@@ -1,237 +1,112 @@
import React, { useState, useEffect } from 'react'; import React from 'react';
import { useParams } from 'react-router-dom';
import { Helmet } from 'react-helmet'; import { Helmet } from 'react-helmet';
import localForage from 'localforage'; import localForage from 'localforage';
import { sourceLink, similarLink, infoLine, ToggleDot } from './utils.js'; import { sourceLink, infoLine, ToggleDot } from './utils.js';
import Latex from 'react-latex-next';
import 'katex/dist/katex.min.css';
const VOID_ELEMENTS = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr']; class Article extends React.Component {
const DANGEROUS_TAGS = ['svg', 'math']; constructor(props) {
super(props);
const latexDelimiters = [ const id = this.props.match ? this.props.match.params.id : 'CLOL';
{ left: '$$', right: '$$', display: true }, const cache = this.props.cache;
{ left: '\\[', right: '\\]', display: true },
{ left: '$', right: '$', display: false },
{ left: '\\(', right: '\\)', display: false }
];
function Article({ cache }) {
const { id } = useParams();
if (id in cache) console.log('cache hit'); if (id in cache) console.log('cache hit');
const [story, setStory] = useState(cache[id] || false); this.state = {
const [error, setError] = useState(''); story: cache[id] || false,
const [pConv, setPConv] = useState([]); error: false,
const [copyButtonText, setCopyButtonText] = useState('\ue92c'); pConv: [],
};
}
componentDidMount() {
const id = this.props.match ? this.props.match.params.id : 'CLOL';
useEffect(() => {
localForage.getItem(id) localForage.getItem(id)
.then( .then(
(value) => { (value) => {
if (value) { if (value) {
setStory(value); this.setState({ story: value });
} }
} }
); );
fetch('/api/' + id) fetch('/api/' + id)
.then(res => { .then(res => res.json())
if (!res.ok) {
throw new Error(`Server responded with ${res.status} ${res.statusText}`);
}
return res.json();
})
.then( .then(
(result) => { (result) => {
setStory(result.story); this.setState({ story: result.story });
localForage.setItem(id, result.story); localForage.setItem(id, result.story);
}, },
(error) => { (error) => {
const errorMessage = `Failed to fetch new article content (ID: ${id}). Your connection may be down or the server might be experiencing issues. ${error.toString()}.`; this.setState({ error: true });
setError(errorMessage);
} }
); );
}, [id]);
const copyLink = () => {
navigator.clipboard.writeText(`${story.title}:\n${window.location.href}`).then(() => {
setCopyButtonText('\uea10');
setTimeout(() => setCopyButtonText('\ue92c'), 2000);
}, () => {
setCopyButtonText('\uea0f');
setTimeout(() => setCopyButtonText('\ue92c'), 2000);
});
};
const pConvert = (n) => {
setPConv(prevPConv => [...prevPConv, n]);
};
const isCodeBlock = (v) => {
if (v.localName === 'pre') {
return true;
} }
if (v.localName === 'code') { pConvert = (n) => {
if (v.closest('p')) { this.setState({ pConv: [...this.state.pConv, n]});
return false;
}
const parent = v.parentElement;
if (parent) {
const nonWhitespaceChildren = Array.from(parent.childNodes).filter(n => {
return n.nodeType !== Node.TEXT_NODE || n.textContent.trim() !== '';
});
if (nonWhitespaceChildren.length === 1 && nonWhitespaceChildren[0] === v) {
return true;
}
}
}
return false;
};
const renderNodes = (nodes, keyPrefix = '') => {
return Array.from(nodes).map((v, k) => {
const key = `${keyPrefix}${k}`;
if (pConv.includes(key)) {
return (
<React.Fragment key={key}>
{v.textContent.split('\n\n').map((x, i) =>
<p key={i}>{x}</p>
)}
</React.Fragment>
);
} }
if (v.nodeName === '#text') { render() {
const text = v.data; const id = this.props.match ? this.props.match.params.id : 'CLOL';
if (text.includes('\\[') || text.includes('\\(') || text.includes('$$') || /\$(?:[^$]*[^\s$])\$/.test(text)) { const story = this.state.story;
return <Latex key={key} delimiters={latexDelimiters}>{text}</Latex>; const error = this.state.error;
} const pConv = this.state.pConv;
let nodes = null;
// Only wrap top-level text nodes in <p> if (story.text) {
if (keyPrefix === '' && v.data.trim() !== '') {
return <p key={key}>{v.data}</p>;
}
return v.data;
}
if (v.nodeType !== Node.ELEMENT_NODE) {
return null;
}
if (DANGEROUS_TAGS.includes(v.localName)) {
return <span key={key} dangerouslySetInnerHTML={{ __html: v.outerHTML }} />;
}
const Tag = v.localName;
if (isCodeBlock(v)) {
return (
<React.Fragment key={key}>
<Tag dangerouslySetInnerHTML={{ __html: v.innerHTML }} />
<button onClick={() => pConvert(key)}>Convert Code to Paragraph</button>
</React.Fragment>
);
}
const textContent = v.textContent.trim();
const isMath = (textContent.startsWith('\\(') && textContent.endsWith('\\)')) ||
(textContent.startsWith('\\[') && textContent.endsWith('\\]')) ||
(textContent.startsWith('$$') && textContent.endsWith('$$')) ||
(textContent.startsWith('$') && textContent.endsWith('$') && textContent.indexOf('$') !== textContent.lastIndexOf('$') && !/\s/.test(textContent.charAt(textContent.length - 2)));
const props = { key: key };
if (v.hasAttributes()) {
for (const attr of v.attributes) {
const name = attr.name === 'class' ? 'className' : attr.name;
props[name] = attr.value;
}
}
if (isMath) {
let mathContent = v.textContent;
// align environment requires display math mode
if (mathContent.includes('\\begin{align')) {
const trimmed = mathContent.trim();
if (trimmed.startsWith('\\(')) {
// Replace \( and \) with \[ and \] to switch to display mode
const firstParen = mathContent.indexOf('\\(');
const lastParen = mathContent.lastIndexOf('\\)');
mathContent = mathContent.substring(0, firstParen) + '\\[' + mathContent.substring(firstParen + 2, lastParen) + '\\]' + mathContent.substring(lastParen + 2);
} else if (trimmed.startsWith('$') && !trimmed.startsWith('$$')) {
// Replace $ with $$
const firstDollar = mathContent.indexOf('$');
const lastDollar = mathContent.lastIndexOf('$');
if (firstDollar !== lastDollar) {
mathContent = mathContent.substring(0, firstDollar) + '$$' + mathContent.substring(firstDollar + 1, lastDollar) + '$$' + mathContent.substring(lastDollar + 1);
}
}
}
return <Tag {...props}><Latex delimiters={latexDelimiters}>{mathContent}</Latex></Tag>;
}
if (VOID_ELEMENTS.includes(Tag)) {
return <Tag {...props} />;
}
return (
<Tag {...props}>
{renderNodes(v.childNodes, `${key}-`)}
</Tag>
);
});
};
const nodes = (s) => {
if (s && s.text) {
let div = document.createElement('div'); let div = document.createElement('div');
div.innerHTML = s.text; div.innerHTML = story.text;
return div.childNodes; nodes = div.childNodes;
} }
return null;
};
const storyNodes = nodes(story);
return ( return (
<div className='article-container'> <div className='article-container'>
{error && {error && <p>Connection error?</p>}
<details style={{marginBottom: '1rem'}}>
<summary>Connection error? Click to expand.</summary>
<p>{error}</p>
{story && <p>Loaded article from cache.</p>}
</details>
}
{story ? {story ?
<div className='article'> <div className='article'>
<Helmet> <Helmet>
<title>{story.title} | QotNews</title> <title>{story.title} - QotNews</title>
<meta name="robots" content="noindex" />
</Helmet> </Helmet>
<h1>{story.title} <button className='copy-button' onClick={copyLink}>{copyButtonText}</button></h1> <h1>{story.title}</h1>
<div className='info'> <div className='info'>
Source: {sourceLink(story)} | {similarLink(story)} Source: {sourceLink(story)}
</div> </div>
{infoLine(story)} {infoLine(story)}
{storyNodes ? {nodes ?
<div className='story-text'> <div className='story-text'>
{renderNodes(storyNodes)} {Object.entries(nodes).map(([k, v]) =>
pConv.includes(k) ?
v.innerHTML.split('\n\n').map(x =>
<p dangerouslySetInnerHTML={{ __html: x }} />
)
:
(v.nodeName === '#text' ?
<p>{v.data}</p>
:
<>
<v.localName dangerouslySetInnerHTML={v.innerHTML ? { __html: v.innerHTML } : null} />
{v.localName == 'pre' && <button onClick={() => this.pConvert(k)}>Convert Code to Paragraph</button>}
</>
)
)}
</div> </div>
: :
<p>Problem getting article :(</p> <p>Problem getting article :(</p>
} }
</div> </div>
: :
<p>Loading...</p> <p>loading...</p>
} }
<ToggleDot id={id} article={false} /> <ToggleDot id={id} article={false} />
</div> </div>
); );
} }
}
export default Article; export default Article;

View File

@@ -1,80 +1,83 @@
import React, { useState, useEffect } from 'react'; import React from 'react';
import { Link, useParams } from 'react-router-dom'; import { Link } from 'react-router-dom';
import { HashLink } from 'react-router-hash-link'; import { HashLink } from 'react-router-hash-link';
import { Helmet } from 'react-helmet'; import { Helmet } from 'react-helmet';
import moment from 'moment'; import moment from 'moment';
import localForage from 'localforage'; import localForage from 'localforage';
import { infoLine, ToggleDot } from './utils.js'; import { infoLine, ToggleDot } from './utils.js';
function countComments(c) { class Article extends React.Component {
return c.comments.reduce((sum, x) => sum + countComments(x), 1); constructor(props) {
} super(props);
function Comments({ cache }) { const id = this.props.match.params.id;
const { id } = useParams(); const cache = this.props.cache;
if (id in cache) console.log('cache hit'); if (id in cache) console.log('cache hit');
const [story, setStory] = useState(cache[id] || false); this.state = {
const [error, setError] = useState(''); story: cache[id] || false,
const [collapsed, setCollapsed] = useState([]); error: false,
const [expanded, setExpanded] = useState([]); collapsed: [],
expanded: [],
};
}
componentDidMount() {
const id = this.props.match.params.id;
useEffect(() => {
localForage.getItem(id) localForage.getItem(id)
.then( .then(
(value) => { (value) => {
if (value) { this.setState({ story: value });
setStory(value);
}
} }
); );
fetch('/api/' + id) fetch('/api/' + id)
.then(res => { .then(res => res.json())
if (!res.ok) {
throw new Error(`Server responded with ${res.status} ${res.statusText}`);
}
return res.json();
})
.then( .then(
(result) => { (result) => {
setStory(result.story); this.setState({ story: result.story }, () => {
localForage.setItem(id, result.story);
const hash = window.location.hash.substring(1); const hash = window.location.hash.substring(1);
if (hash) { if (hash) {
setTimeout(() => { document.getElementById(hash).scrollIntoView();
const element = document.getElementById(hash);
if (element) {
element.scrollIntoView();
}
}, 0);
} }
});
localForage.setItem(id, result.story);
}, },
(error) => { (error) => {
const errorMessage = `Failed to fetch comments (ID: ${id}). Your connection may be down or the server might be experiencing issues. ${error.toString()}.`; this.setState({ error: true });
setError(errorMessage);
} }
); );
}, [id]); }
const collapseComment = (cid) => { collapseComment(cid) {
setCollapsed(prev => [...prev, cid]); this.setState(prevState => ({
setExpanded(prev => prev.filter(x => x !== cid)); ...prevState,
}; collapsed: [...prevState.collapsed, cid],
expanded: prevState.expanded.filter(x => x !== cid),
}));
}
const expandComment = (cid) => { expandComment(cid) {
setCollapsed(prev => prev.filter(x => x !== cid)); this.setState(prevState => ({
setExpanded(prev => [...prev, cid]); ...prevState,
}; collapsed: prevState.collapsed.filter(x => x !== cid),
expanded: [...prevState.expanded, cid],
}));
}
const displayComment = (story, c, level) => { countComments(c) {
return c.comments.reduce((sum, x) => sum + this.countComments(x), 1);
}
displayComment(story, c, level) {
const cid = c.author + c.date; const cid = c.author + c.date;
const isCollapsed = collapsed.includes(cid); const collapsed = this.state.collapsed.includes(cid);
const isExpanded = expanded.includes(cid); const expanded = this.state.expanded.includes(cid);
const hidden = isCollapsed || (level == 4 && !isExpanded); const hidden = collapsed || (level == 4 && !expanded);
const hasChildren = c.comments.length !== 0; const hasChildren = c.comments.length !== 0;
return ( return (
@@ -84,37 +87,38 @@ function Comments({ cache }) {
{c.author === story.author ? '[OP]' : ''} {c.author || '[Deleted]'} {c.author === story.author ? '[OP]' : ''} {c.author || '[Deleted]'}
{' '} | <HashLink to={'#' + cid} id={cid}>{moment.unix(c.date).fromNow()}</HashLink> {' '} | <HashLink to={'#' + cid} id={cid}>{moment.unix(c.date).fromNow()}</HashLink>
{hidden || hasChildren && {hasChildren && (
<button className='collapser pointer' onClick={() => collapseComment(cid)}></button> hidden ?
} <span className='collapser expander pointer' onClick={() => this.expandComment(cid)}>+</span>
:
<span className='collapser pointer' onClick={() => this.collapseComment(cid)}></span>
)}
</p> </p>
</div> </div>
<div className={isCollapsed ? 'text hidden' : 'text'} dangerouslySetInnerHTML={{ __html: c.text || '<p>[Empty / deleted comment]</p>'}} /> <div className={collapsed ? 'text hidden' : 'text'} dangerouslySetInnerHTML={{ __html: c.text }} />
{hidden && hasChildren ? {hidden && hasChildren ?
<button className='comment lined info pointer' onClick={() => expandComment(cid)}>[show {countComments(c)-1} more]</button> <div className='comment lined info pointer' onClick={() => this.expandComment(cid)}>[show {this.countComments(c) - 1} more]</div>
: :
c.comments.map(i => displayComment(story, i, level + 1)) c.comments.map(i => this.displayComment(story, i, level + 1))
} }
</div> </div>
); );
}; }
render() {
const id = this.props.match.params.id;
const story = this.state.story;
const error = this.state.error;
return ( return (
<div className='container'> <div className='container'>
{error && {error && <p>Connection error?</p>}
<details style={{marginBottom: '1rem'}}>
<summary>Connection error? Click to expand.</summary>
<p>{error}</p>
{story && <p>Loaded comments from cache.</p>}
</details>
}
{story ? {story ?
<div className='article'> <div className='article'>
<Helmet> <Helmet>
<title>{story.title} | QotNews</title> <title>{story.title} - QotNews Comments</title>
<meta name="robots" content="noindex" />
</Helmet> </Helmet>
<h1>{story.title}</h1> <h1>{story.title}</h1>
@@ -126,7 +130,7 @@ function Comments({ cache }) {
{infoLine(story)} {infoLine(story)}
<div className='comments'> <div className='comments'>
{story.comments.map(c => displayComment(story, c, 0))} {story.comments.map(c => this.displayComment(story, c, 0))}
</div> </div>
</div> </div>
: :
@@ -136,5 +140,6 @@ function Comments({ cache }) {
</div> </div>
); );
} }
}
export default Comments; export default Article;

View File

@@ -1,141 +1,68 @@
import React, { useState, useEffect } from 'react'; import React from 'react';
import { Link } from 'react-router-dom'; import { Link } from 'react-router-dom';
import { Helmet } from 'react-helmet'; import { Helmet } from 'react-helmet';
import localForage from 'localforage'; import localForage from 'localforage';
import { sourceLink, infoLine, logos } from './utils.js'; import { sourceLink, infoLine, getLogoUrl } from './utils.js';
function Feed({ updateCache }) { class Feed extends React.Component {
const [stories, setStories] = useState(() => JSON.parse(localStorage.getItem('stories')) || false); constructor(props) {
const [error, setError] = useState(''); super(props);
const [loadingStatus, setLoadingStatus] = useState(null);
const [filterSmallweb, setFilterSmallweb] = useState(() => localStorage.getItem('filterSmallweb') === 'true');
const handleFilterChange = e => { this.state = {
const isChecked = e.target.checked; stories: JSON.parse(localStorage.getItem('stories')) || false,
setStories(false); error: false,
setFilterSmallweb(isChecked);
localStorage.setItem('filterSmallweb', isChecked);
}; };
useEffect(() => {
const controller = new AbortController();
fetch(filterSmallweb ? '/api?smallweb=true' : '/api', { signal: controller.signal })
.then(res => {
if (!res.ok) {
throw new Error(`Server responded with ${res.status} ${res.statusText}`);
} }
return res.json();
}) componentDidMount() {
fetch('/api')
.then(res => res.json())
.then( .then(
async (result) => { (result) => {
const newApiStories = result.stories; const updated = !this.state.stories || this.state.stories[0].id !== result.stories[0].id;
console.log('updated:', updated);
const updated = !stories || !stories.length || stories[0].id !== newApiStories[0].id; const { stories } = result;
console.log('New stories available:', updated); this.setState({ stories });
localStorage.setItem('stories', JSON.stringify(stories));
if (!updated) return; if (updated) {
localForage.clear();
setLoadingStatus({ current: 0, total: newApiStories.length }); stories.forEach((x, i) => {
fetch('/api/' + x.id)
let currentStories = Array.isArray(stories) ? [...stories] : []; .then(res => res.json())
let preloadedCount = 0; .then(({ story }) => {
localForage.setItem(x.id, story)
for (const [index, newStory] of newApiStories.entries()) { .then(console.log('preloaded', x.id, x.title));
if (controller.signal.aborted) { this.props.updateCache(x.id, story);
break; }, error => { }
);
});
} }
try {
const storyFetchController = new AbortController();
const timeoutId = setTimeout(() => storyFetchController.abort(), 10000); // 10-second timeout
const storyRes = await fetch('/api/' + newStory.id, { signal: storyFetchController.signal });
clearTimeout(timeoutId);
if (!storyRes.ok) {
throw new Error(`Server responded with ${storyRes.status} ${storyRes.statusText}`);
}
const storyResult = await storyRes.json();
const fullStory = storyResult.story;
await localForage.setItem(fullStory.id, fullStory);
console.log('Preloaded story:', fullStory.id, fullStory.title);
updateCache(fullStory.id, fullStory);
preloadedCount++;
setLoadingStatus({ current: preloadedCount, total: newApiStories.length });
const existingStoryIndex = currentStories.findIndex(s => s.id === newStory.id);
if (existingStoryIndex > -1) {
currentStories.splice(existingStoryIndex, 1);
}
currentStories.splice(index, 0, newStory);
localStorage.setItem('stories', JSON.stringify(currentStories));
setStories(currentStories);
} catch (error) {
let errorMessage;
if (error.name === 'AbortError') {
errorMessage = `The request to fetch story '${newStory.title}' (${newStory.id}) timed out after 10 seconds. Your connection may be unstable. (${preloadedCount} / ${newApiStories.length} stories preloaded)`;
console.log('Fetch timed out for story:', newStory.id);
} else {
errorMessage = `An error occurred while fetching story '${newStory.title}' (ID: ${newStory.id}): ${error.toString()}. (${preloadedCount} / ${newApiStories.length} stories preloaded)`;
console.log('Fetch failed for story:', newStory.id, error);
}
setError(errorMessage);
break;
}
}
const finalStories = currentStories.slice(0, newApiStories.length);
const removedStories = currentStories.slice(newApiStories.length);
for (const story of removedStories) {
console.log('Removed story:', story.id, story.title);
localForage.removeItem(story.id);
}
localStorage.setItem('stories', JSON.stringify(finalStories));
setStories(finalStories);
setLoadingStatus(null);
}, },
(error) => { (error) => {
if (error.name === 'AbortError') { this.setState({ error: true });
console.log('Feed fetch aborted.');
return;
}
const errorMessage = `Failed to fetch the main story list from the API. Your connection may be down or the server might be experiencing issues. ${error.toString()}.`;
setError(errorMessage);
} }
); );
}
return () => controller.abort(); render() {
}, [updateCache, filterSmallweb]); const stories = this.state.stories;
const error = this.state.error;
return ( return (
<div className='container'> <div className='container'>
<Helmet> <Helmet>
<title>QotNews</title> <title>Feed - QotNews</title>
<meta name="robots" content="index" />
</Helmet> </Helmet>
{error && <p>Connection error?</p>}
<div style={{marginBottom: '1rem'}}>
<input type="checkbox" id="filter-smallweb" className="checkbox" checked={filterSmallweb} onChange={handleFilterChange} />
<label htmlFor="filter-smallweb">Only Smallweb</label>
</div>
{error &&
<details style={{marginBottom: '1rem'}}>
<summary>Connection error? Click to expand.</summary>
<p>{error}</p>
{stories && <p>Loaded feed from cache.</p>}
</details>
}
{stories ? {stories ?
<div> <div>
{stories.map(x => {stories.map(x =>
<div className='item' key={x.id}> <div className='item' key={x.id}>
<div className='title'> <div className='title'>
<Link className='link' to={'/' + x.id}> <Link className='link' to={'/' + x.id}>
<img className='source-logo' src={logos[x.source]} alt='source logo' /> {x.title} <img className='source-logo' src={getLogoUrl(x)} alt='source logo' /> {x.title}
</Link> </Link>
<span className='source'> <span className='source'>
@@ -148,12 +75,11 @@ function Feed({ updateCache }) {
)} )}
</div> </div>
: :
<p>Loading...</p> <p>loading...</p>
} }
{loadingStatus && <p>Preloading stories {loadingStatus.current} / {loadingStatus.total}...</p>}
</div> </div>
); );
} }
}
export default Feed; export default Feed;

View File

@@ -1,41 +1,62 @@
import React, { useState, useEffect } from 'react'; import React from 'react';
import { Link, useLocation } from 'react-router-dom'; import { Link } from 'react-router-dom';
import { Helmet } from 'react-helmet'; import { Helmet } from 'react-helmet';
import { sourceLink, infoLine, logos } from './utils.js'; import { sourceLink, infoLine, getLogoUrl } from './utils.js';
import AbortController from 'abort-controller'; import AbortController from 'abort-controller';
function Results() { class Results extends React.Component {
const [stories, setStories] = useState(false); constructor(props) {
const [error, setError] = useState(false); super(props);
const location = useLocation();
useEffect(() => { this.state = {
const controller = new AbortController(); stories: false,
const signal = controller.signal; error: false,
};
const search = location.search; this.controller = null;
}
performSearch = () => {
if (this.controller) {
this.controller.abort();
}
this.controller = new AbortController();
const signal = this.controller.signal;
const search = this.props.location.search;
fetch('/api/search' + search, { method: 'get', signal: signal }) fetch('/api/search' + search, { method: 'get', signal: signal })
.then(res => res.json()) .then(res => res.json())
.then( .then(
(result) => { (result) => {
setStories(result.hits); this.setState({ stories: result.results });
}, },
(error) => { (error) => {
if (error.message !== 'The operation was aborted. ') { if (error.message !== 'The operation was aborted. ') {
setError(true); this.setState({ error: true });
} }
} }
); );
}
return () => { componentDidMount() {
controller.abort(); this.performSearch();
}; }
}, [location.search]);
componentDidUpdate(prevProps) {
if (this.props.location.search !== prevProps.location.search) {
this.performSearch();
}
}
render() {
const stories = this.state.stories;
const error = this.state.error;
return ( return (
<div className='container'> <div className='container'>
<Helmet> <Helmet>
<title>Search Results | QotNews</title> <title>Feed - QotNews</title>
</Helmet> </Helmet>
{error && <p>Connection error?</p>} {error && <p>Connection error?</p>}
{stories ? {stories ?
@@ -47,7 +68,7 @@ function Results() {
<div className='item' key={x.id}> <div className='item' key={x.id}>
<div className='title'> <div className='title'>
<Link className='link' to={'/' + x.id}> <Link className='link' to={'/' + x.id}>
<img className='source-logo' src={logos[x.source]} alt='source logo' /> {x.title} <img className='source-logo' src={getLogoUrl(x)} alt='source logo' /> {x.title}
</Link> </Link>
<span className='source'> <span className='source'>
@@ -69,5 +90,6 @@ function Results() {
</div> </div>
); );
} }
}
export default Results; export default Results;

View File

@@ -15,7 +15,6 @@ class ScrollToTop extends React.Component {
} }
window.scrollTo(0, 0); window.scrollTo(0, 0);
document.body.scrollTop = 0;
} }
render() { render() {

View File

@@ -1,46 +1,51 @@
import React, { useState, useRef } from 'react'; import React, { Component } from 'react';
import { useHistory, useLocation } from 'react-router-dom'; import { withRouter } from 'react-router-dom';
import queryString from 'query-string'; import queryString from 'query-string';
const getSearch = location => queryString.parse(location.search).q || ''; const getSearch = props => queryString.parse(props.location.search).q;
function Search() { class Search extends Component {
const history = useHistory(); constructor(props) {
const location = useLocation(); super(props);
const [search, setSearch] = useState(getSearch(location)); this.state = {search: getSearch(this.props)};
const inputRef = useRef(null); this.inputRef = React.createRef();
}
const searchArticles = (event) => { searchArticles = (event) => {
const newSearch = event.target.value; const search = event.target.value;
setSearch(newSearch); this.setState({search: search});
if (newSearch.length >= 3) { if (search.length >= 3) {
const searchQuery = queryString.stringify({ 'q': newSearch }); const searchQuery = queryString.stringify({ 'q': search });
history.replace('/search?' + searchQuery); this.props.history.replace('/search?' + searchQuery);
} else { } else {
history.replace('/'); this.props.history.replace('/');
} }
} }
const searchAgain = (event) => { searchAgain = (event) => {
event.preventDefault(); event.preventDefault();
const searchString = queryString.stringify({ 'q': event.target[0].value }); const searchString = queryString.stringify({ 'q': event.target[0].value });
history.push('/search?' + searchString); this.props.history.push('/search?' + searchString);
inputRef.current.blur(); this.inputRef.current.blur();
} }
render() {
const search = this.state.search;
return ( return (
<span className='search'> <span className='search'>
<form onSubmit={searchAgain}> <form onSubmit={this.searchAgain}>
<input <input
placeholder='Search...' placeholder='Search... (fixed)'
value={search} value={search}
onChange={searchArticles} onChange={this.searchArticles}
ref={inputRef} ref={this.inputRef}
/> />
</form> </form>
</span> </span>
); );
} }
}
export default Search; export default withRouter(Search);

View File

@@ -1,77 +0,0 @@
.black {
color: #ddd;
}
.black a {
color: #ddd;
}
.black input {
color: #ddd;
border: 1px solid #828282;
}
.black .menu button,
.black .story-text button {
background-color: #444444;
border-color: #bbb;
color: #ddd;
}
.black .item {
color: #828282;
}
.black .item .source-logo {
filter: grayscale(1);
}
.black .item a {
color: #828282;
}
.black .item a.link {
color: #ddd;
}
.black .item a.link:visited {
color: #828282;
}
.black .item .info a.hot {
color: #cccccc;
}
.black .article a {
border-bottom: 1px solid #aaaaaa;
}
.black .article u {
border-bottom: 1px solid #aaaaaa;
text-decoration: none;
}
.black .story-text video,
.black .story-text img {
filter: brightness(50%);
}
.black .article .info {
color: #828282;
}
.black .article .info a {
border-bottom: none;
color: #828282;
}
.black .comment.lined {
border-left: 1px solid #444444;
}
.black .checkbox:checked + label::after {
border-color: #eee;
}
.black .copy-button {
color: #828282;
}

View File

@@ -11,17 +11,14 @@
border: 1px solid #828282; border: 1px solid #828282;
} }
.dark .menu button,
.dark .story-text button {
background-color: #444444;
border-color: #bbb;
color: #ddd;
}
.dark .item { .dark .item {
color: #828282; color: #828282;
} }
.dark .item .source-logo {
filter: grayscale(1);
}
.dark .item a { .dark .item a {
color: #828282; color: #828282;
} }
@@ -46,7 +43,6 @@
text-decoration: none; text-decoration: none;
} }
.dark .story-text video,
.dark .story-text img { .dark .story-text img {
filter: brightness(50%); filter: brightness(50%);
} }
@@ -63,11 +59,3 @@
.dark .comment.lined { .dark .comment.lined {
border-left: 1px solid #444444; border-left: 1px solid #444444;
} }
.dark .checkbox:checked + label::after {
border-color: #eee;
}
.dark .copy-button {
color: #828282;
}

View File

@@ -2,30 +2,9 @@ body {
text-rendering: optimizeLegibility; text-rendering: optimizeLegibility;
font: 1rem/1.3 sans-serif; font: 1rem/1.3 sans-serif;
color: #000000; color: #000000;
margin-bottom: 100vh;
word-break: break-word; word-break: break-word;
font-kerning: normal; font-kerning: normal;
margin: 0;
}
::backdrop {
background-color: rgba(0,0,0,0);
}
body:fullscreen {
overflow-y: scroll !important;
}
body:-ms-fullscreen {
overflow-y: scroll !important;
}
body:-webkit-full-screen {
overflow-y: scroll !important;
}
body:-moz-full-screen {
overflow-y: scroll !important;
}
#root {
margin: 8px 8px 100vh 8px !important;
} }
a { a {
@@ -43,21 +22,10 @@ input {
border-radius: 4px; border-radius: 4px;
} }
.fullscreen {
margin: 0.25rem;
padding: 0.25rem;
}
pre { pre {
overflow: auto; overflow: auto;
} }
.comments pre {
overflow: auto;
white-space: pre-wrap;
overflow-wrap: break-word;
}
.container { .container {
margin: 1rem auto; margin: 1rem auto;
max-width: 64rem; max-width: 64rem;
@@ -126,13 +94,6 @@ span.source {
border-bottom: 1px solid #222222; border-bottom: 1px solid #222222;
} }
.article-title {
display: flex;
align-items: center;
margin-top: 0.67em;
margin-bottom: 0.67em;
}
.article h1 { .article h1 {
font-size: 1.6rem; font-size: 1.6rem;
} }
@@ -189,13 +150,6 @@ span.source {
.comments { .comments {
margin-left: -1.25rem; margin-left: -1.25rem;
margin-top: 0;
margin-bottom: 0;
padding: 0;
}
.comments dl, .comments dd {
margin: 0;
} }
.comment { .comment {
@@ -208,11 +162,6 @@ span.source {
.comment .text { .comment .text {
margin-top: -0.5rem; margin-top: -0.5rem;
margin-bottom: 1rem;
}
.comment .text > * {
margin-bottom: 0;
} }
.comment .text.hidden > p { .comment .text.hidden > p {
@@ -232,49 +181,20 @@ span.source {
padding-right: 1.5rem; padding-right: 1.5rem;
} }
button.collapser {
background: transparent;
border: none;
margin: 0;
padding-top: 0;
padding-bottom: 0;
font: inherit;
color: inherit;
}
button.comment {
background: transparent;
border-top: none;
border-right: none;
border-bottom: none;
margin: 0;
padding-top: 0;
padding-right: 0;
padding-bottom: 0;
font: inherit;
color: inherit;
text-align: left;
width: 100%;
}
.comment .pointer { .comment .pointer {
cursor: pointer; cursor: pointer;
} }
.dot { .toggleDot {
cursor: pointer;
position: fixed; position: fixed;
bottom: 1rem;
left: 1rem;
height: 3rem; height: 3rem;
width: 3rem; width: 3rem;
background-color: #828282; background-color: #828282;
border-radius: 50%; border-radius: 50%;
} }
.toggleDot {
bottom: 1rem;
left: 1rem;
}
.toggleDot .button { .toggleDot .button {
font: 2rem/1 'icomoon'; font: 2rem/1 'icomoon';
position: relative; position: relative;
@@ -283,110 +203,23 @@ button.comment {
} }
.forwardDot { .forwardDot {
cursor: pointer;
position: fixed;
bottom: 1rem; bottom: 1rem;
right: 1rem; right: 1rem;
height: 3rem;
width: 3rem;
background-color: #828282;
border-radius: 50%;
} }
.forwardDot .button { .forwardDot .button {
font: 2rem/1 'icomoon'; font: 2.5rem/1 'icomoon';
position: relative; position: relative;
top: 0.5rem; top: 0.25rem;
left: 0.5rem; left: 0.3rem;
}
.backwardDot {
bottom: 1rem;
right: 5rem;
}
.backwardDot .button {
font: 2rem/1 'icomoon';
position: relative;
top: 0.5rem;
left: 0.5rem;
} }
.search form { .search form {
display: inline; display: inline;
} }
.copy-button {
font: 1.5rem/1 'icomoon2';
color: #828282;
background: transparent;
border: none;
cursor: pointer;
vertical-align: middle;
}
.checkbox {
-webkit-appearance: none;
appearance: none;
position: absolute;
opacity: 0;
cursor: pointer;
height: 0;
width: 0;
}
.checkbox + label {
position: relative;
cursor: pointer;
padding-left: 1.75rem;
user-select: none;
}
.checkbox + label::before {
content: '';
position: absolute;
left: 0;
top: 0.1em;
width: 1rem;
height: 1rem;
border: 1px solid #828282;
background-color: transparent;
border-radius: 3px;
}
.checkbox:checked + label::after {
content: "";
position: absolute;
left: 0.35rem;
top: 0.2em;
width: 0.3rem;
height: 0.6rem;
border-style: solid;
border-color: #000;
border-width: 0 2px 2px 0;
transform: rotate(45deg);
}
.tooltip .tooltiptext {
visibility: hidden;
width: 140px;
background-color: #555;
color: #fff;
text-align: center;
border-radius: 6px;
padding: 5px 0;
position: absolute;
z-index: 1;
bottom: 110%;
left: 50%;
margin-left: -70px;
opacity: 0;
transition: opacity 0.2s;
font-size: 0.9rem;
line-height: 1.3;
}
.forwardDot .tooltiptext {
left: auto;
right: 0;
margin-left: 0;
}
.tooltip.show-tooltip .tooltiptext {
visibility: visible;
opacity: 1;
}

View File

@@ -1,95 +0,0 @@
.red {
color: #b00;
scrollbar-color: #b00 #440000;
}
.red a {
color: #b00;
}
.red input {
color: #b00;
border: 1px solid #690000;
}
.red input::placeholder {
color: #690000;
}
.red hr {
background-color: #690000;
}
.red .menu button,
.red .story-text button {
background-color: #440000;
border-color: #b00;
color: #b00;
}
.red .item,
.red .slogan {
color: #690000;
}
.red .item .source-logo {
display: none;
}
.red .item a {
color: #690000;
}
.red .item a.link {
color: #b00;
}
.red .item a.link:visited {
color: #690000;
}
.red .item .info a.hot {
color: #cc0000;
}
.red .article a {
border-bottom: 1px solid #aa0000;
}
.red .article u {
border-bottom: 1px solid #aa0000;
text-decoration: none;
}
.red .story-text video,
.red .story-text img {
filter: grayscale(100%) brightness(20%) sepia(100%) hue-rotate(-50deg) saturate(600%) contrast(0.8);
}
.red .article .info {
color: #690000;
}
.red .article .info a {
border-bottom: none;
color: #690000;
}
.red .comment.lined {
border-left: 1px solid #440000;
}
.red .dot {
background-color: #440000;
}
.red .checkbox + label::before {
border: 1px solid #690000;
}
.red .checkbox:checked + label::after {
border-color: #dd0000;
}
.red .copy-button {
color: #690000;
}

View File

@@ -1,53 +1,54 @@
import React, { useState, useRef } from 'react'; import React, { Component } from 'react';
import { useHistory } from 'react-router-dom'; import { withRouter } from 'react-router-dom';
function Submit() { class Submit extends Component {
const [progress, setProgress] = useState(null); constructor(props) {
const inputRef = useRef(null); super(props);
const history = useHistory();
const submitArticle = async (event) => { this.state = {
progress: null,
};
this.inputRef = React.createRef();
}
submitArticle = (event) => {
event.preventDefault(); event.preventDefault();
const url = event.target[0].value; const url = event.target[0].value;
inputRef.current.blur(); this.inputRef.current.blur();
setProgress('Submitting...'); this.setState({ progress: 'Submitting...' });
let data = new FormData(); let data = new FormData();
data.append('url', url); data.append('url', url);
try { fetch('/api/submit', { method: 'POST', body: data })
const res = await fetch('/api/submit', { method: 'POST', body: data }); .then(res => res.json())
.then(
if (res.ok) { (result) => {
const result = await res.json(); this.props.history.replace('/' + result.nid);
history.replace('/' + result.nid); },
} else { (error) => {
let errorData; this.setState({ progress: 'Error' });
try {
errorData = await res.json();
} catch (jsonError) {
// Not a JSON error from our API, so it's a server issue
throw new Error(`Server responded with ${res.status} ${res.statusText}`);
} }
setProgress(errorData.error || 'An unknown error occurred.');
}
} catch (error) {
setProgress(`Error: ${error.toString()}`);
}
}
return (
<span className='search'>
<form onSubmit={submitArticle}>
<input
placeholder='Submit URL'
ref={inputRef}
/>
</form>
{progress && <p>{progress}</p>}
</span>
); );
} }
export default Submit; render() {
const progress = this.state.progress;
return (
<span className='search'>
<form onSubmit={this.submitArticle}>
<input
placeholder='Submit Article'
ref={this.inputRef}
/>
</form>
{progress ? progress : ''}
</span>
);
}
}
export default withRouter(Submit);

View File

@@ -26,8 +26,3 @@
font-family: 'Icomoon'; font-family: 'Icomoon';
src: url('icomoon.ttf') format('truetype'); src: url('icomoon.ttf') format('truetype');
} }
@font-face {
font-family: 'Icomoon2';
src: url('icomoon2.ttf') format('truetype');
}

Binary file not shown.

Binary file not shown.

View File

@@ -8,4 +8,4 @@ ReactDOM.render(<App />, document.getElementById('root'));
// If you want your app to work offline and load faster, you can change // If you want your app to work offline and load faster, you can change
// // unregister() to register() below. Note this comes with some pitfalls. // // unregister() to register() below. Note this comes with some pitfalls.
// // Learn more about service workers: https://bit.ly/CRA-PWA // // Learn more about service workers: https://bit.ly/CRA-PWA
serviceWorker.unregister(); serviceWorker.register();

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff