Compare commits

..

49 Commits

Author SHA1 Message Date
Jason Schwarzenberger
bfa4108a8e Merge remote-tracking branch 'tanner/master' 2020-11-09 16:08:28 +13:00
Jason Schwarzenberger
0bd0d40a31 use json type in sqlite. 2020-11-09 15:45:10 +13:00
Jason Schwarzenberger
4e04595415 fix search. 2020-11-09 15:44:44 +13:00
Jason
006db2960c change to 3 days 2020-11-09 01:36:51 +00:00
Jason Schwarzenberger
1f063f0dac undo log level change 2020-11-06 11:20:34 +13:00
Jason Schwarzenberger
1658346aa9 fix news.py feed. 2020-11-06 10:37:43 +13:00
Jason Schwarzenberger
2dbc702b40 switch to python-dateutil for parser, reverse sort xml feeds. 2020-11-06 10:02:39 +13:00
Jason Schwarzenberger
1c4764e67d sort sitemap feed by lastmod time. 2020-11-06 09:30:15 +13:00
Jason
ee49d2021e newsroom 2020-11-05 20:28:55 +00:00
Jason
c391c50ab1 use localize 2020-11-05 04:15:31 +00:00
Jason Schwarzenberger
095f0d549a use replace. 2020-11-05 16:57:08 +13:00
Jason Schwarzenberger
c21c71667e fix date issue. 2020-11-05 16:41:15 +13:00
Jason Schwarzenberger
c3a2c91a11 update requirements.txt 2020-11-05 16:33:50 +13:00
Jason Schwarzenberger
0f39446a61 tz aware for use in settings. 2020-11-05 16:30:55 +13:00
Jason Schwarzenberger
351059aab1 fix excludes. 2020-11-05 15:59:13 +13:00
Jason Schwarzenberger
4488e2c292 add an excludes list of substrings for urls in the settings for sitemap/category. 2020-11-05 15:51:59 +13:00
Jason Schwarzenberger
afda5b635c disqus test. 2020-11-05 14:23:51 +13:00
Jason Schwarzenberger
0fc1a44d2b fix issue in substack. 2020-11-04 17:40:29 +13:00
Jason Schwarzenberger
9fff1b9e46 avoid duplicate articles listed on the category page 2020-11-04 17:14:42 +13:00
Jason Schwarzenberger
16b59f6c67 try stop bad pages. 2020-11-04 16:34:31 +13:00
Jason Schwarzenberger
939f4775a7 better settings example. 2020-11-04 15:52:34 +13:00
Jason Schwarzenberger
9bfc6fc6fa scraper settings, ordering and loop. 2020-11-04 15:47:12 +13:00
Jason Schwarzenberger
6ea9844d00 remove useless try blocks. 2020-11-04 15:37:19 +13:00
Jason Schwarzenberger
1318259d3d imply referrer is substack. 2020-11-04 15:21:07 +13:00
Jason Schwarzenberger
98a0c2257c increase declutter timeout. 2020-11-04 15:15:00 +13:00
Jason Schwarzenberger
e6976db25d fix tabs 2020-11-04 15:04:20 +13:00
Jason Schwarzenberger
9edc8b7cca move scraping for article content to files. 2020-11-04 15:00:58 +13:00
Jason Schwarzenberger
33e21e7f30 fix mistake. 2020-11-04 12:45:01 +13:00
Jason Schwarzenberger
892a99eca6 add + expander in place of collapser. 2020-11-04 12:43:15 +13:00
Jason Schwarzenberger
d718d05a04 fix dates for newsroom. 2020-11-04 11:53:16 +13:00
Jason Schwarzenberger
d1795eb1b8 add radionz and newsroom logos. 2020-11-04 11:30:56 +13:00
Jason Schwarzenberger
9f4ff4acf0 remove unnecessary sitemap.xml request. 2020-11-04 11:22:15 +13:00
Jason Schwarzenberger
db6aad84ec fix mistake. 2020-11-04 11:12:01 +13:00
Jason Schwarzenberger
29f8a8b8cc add news site categories feed. 2020-11-04 11:08:50 +13:00
Jason
abf8589e02 fix sitemap 2020-11-03 10:53:40 +00:00
Jason
b759f46582 use extruct for opengraph/json-ld/microdata of articles 2020-11-03 10:31:36 +00:00
Jason Schwarzenberger
736cdc8576 fix mistake. 2020-11-03 17:04:46 +13:00
Jason Schwarzenberger
244d416f6e settings config of sitemap/substack publications. 2020-11-03 17:01:29 +13:00
Jason Schwarzenberger
5f98a2e76a Merge remote-tracking branch 'tanner/master' into master
And adding relevant setings.py.example/etc.
2020-11-03 16:44:02 +13:00
Jason Schwarzenberger
0567cdfd9b move sort to render. 2020-11-03 16:30:22 +13:00
Jason Schwarzenberger
4f90671cec order feed by reverse chronological 2020-11-03 16:21:23 +13:00
Jason Schwarzenberger
e63a1456a5 add logos. 2020-11-03 16:07:07 +13:00
Jason Schwarzenberger
76f1d57702 sitemap based feed. 2020-11-03 16:00:03 +13:00
Jason Schwarzenberger
de80389ed0 add logos. 2020-11-03 12:48:19 +13:00
Jason Schwarzenberger
4e64cf682a add the bulletin. 2020-11-03 12:41:16 +13:00
Jason Schwarzenberger
c5fe5d25a0 add substack.py top sites, replacing webworm.py 2020-11-03 12:28:39 +13:00
Jason
283a2b1545 fix webworm comments 2020-11-02 22:06:43 +00:00
Jason Schwarzenberger
0d6a86ace2 fix webworm dates. 2020-11-03 10:31:14 +13:00
Jason Schwarzenberger
f23bf628e0 add webworm/substack as a feed. 2020-11-02 17:09:59 +13:00
53 changed files with 4940 additions and 7022 deletions

1
.gitignore vendored
View File

@@ -1 +0,0 @@
.aider*

View File

@@ -109,5 +109,4 @@ settings.py
data.db
data.db.bak
data/archive/*
data/backup/*
qotnews.sqlite

View File

@@ -4,8 +4,9 @@ from sqlalchemy import create_engine, Column, String, ForeignKey, Integer
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy.exc import IntegrityError
from sqlalchemy.types import JSON
engine = create_engine('sqlite:///data/qotnews.sqlite', connect_args={'timeout': 360})
engine = create_engine('sqlite:///data/qotnews.sqlite')
Session = sessionmaker(bind=engine)
Base = declarative_base()
@@ -15,8 +16,8 @@ class Story(Base):
sid = Column(String(16), primary_key=True)
ref = Column(String(16), unique=True)
meta_json = Column(String)
full_json = Column(String)
meta = Column(JSON)
data = Column(JSON)
title = Column(String)
class Reflist(Base):
@@ -36,19 +37,21 @@ def get_story(sid):
def put_story(story):
story = story.copy()
full_json = json.dumps(story)
data = {}
data.update(story)
story.pop('text', None)
story.pop('comments', None)
meta_json = json.dumps(story)
meta = {}
meta.update(story)
meta.pop('text', None)
meta.pop('comments', None)
try:
session = Session()
s = Story(
sid=story['id'],
ref=story['ref'],
full_json=full_json,
meta_json=meta_json,
data=data,
meta=meta,
title=story.get('title', None),
)
session.merge(s)
@@ -68,13 +71,12 @@ def get_reflist(amount):
q = session.query(Reflist).order_by(Reflist.rid.desc()).limit(amount)
return [dict(ref=x.ref, sid=x.sid, source=x.source) for x in q.all()]
def get_stories(amount, skip=0):
def get_stories(amount):
session = Session()
q = session.query(Reflist, Story.meta_json).\
order_by(Reflist.rid.desc()).\
q = session.query(Reflist, Story.meta).\
join(Story).\
filter(Story.title != None).\
offset(skip).\
order_by(Story.meta['date'].desc()).\
limit(amount)
return [x[1] for x in q]
@@ -101,22 +103,7 @@ def del_ref(ref):
finally:
session.close()
def count_stories():
try:
session = Session()
return session.query(Story).count()
finally:
session.close()
def get_story_list():
try:
session = Session()
return session.query(Story.sid).all()
finally:
session.close()
if __name__ == '__main__':
init()
#print(get_story_by_ref('hgi3sy'))
print(len(get_reflist(99999)))
print(get_story_by_ref('hgi3sy'))

View File

@@ -1,8 +1,6 @@
import database
import search
import sys
import settings
import logging
import json
import requests
@@ -23,7 +21,7 @@ def database_del_story(sid):
def search_del_story(sid):
try:
r = requests.delete(settings.MEILI_URL + 'indexes/qotnews/documents/'+sid, timeout=2)
r = requests.delete(search.MEILI_URL + 'indexes/qotnews/documents/'+sid, timeout=2)
if r.status_code != 202:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()

View File

@@ -8,82 +8,115 @@ import time
from bs4 import BeautifulSoup
import settings
from feeds import hackernews, reddit, tildes, manual, lobsters
import utils
from feeds import hackernews, reddit, tildes, substack, manual, news
from scrapers import outline, declutter, local
INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com', 'sec.gov']
TWO_DAYS = 60*60*24*2
ONE_HOUR = 60*60
ONE_DAY = 24*ONE_HOUR
INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com']
MAX_AGE_IN_DAYS = 3*ONE_DAY
substacks = {}
for key, value in settings.SUBSTACK.items():
substacks[key] = substack.Publication(value['url'])
categories = {}
for key, value in settings.CATEGORY.items():
categories[key] = news.Category(value['url'], value.get('tz'))
sitemaps = {}
for key, value in settings.SITEMAP.items():
sitemaps[key] = news.Sitemap(value['url'], value.get('tz'))
def list():
feed = []
if settings.NUM_HACKERNEWS:
feed += [(x, 'hackernews') for x in hackernews.feed()[:settings.NUM_HACKERNEWS]]
if settings.NUM_LOBSTERS:
feed += [(x, 'lobsters') for x in lobsters.feed()[:settings.NUM_LOBSTERS]]
if settings.NUM_REDDIT:
feed += [(x, 'reddit') for x in reddit.feed()[:settings.NUM_REDDIT]]
if settings.NUM_TILDES:
feed += [(x, 'tildes') for x in tildes.feed()[:settings.NUM_TILDES]]
if settings.NUM_SUBSTACK:
feed += [(x, 'substack') for x in substack.top.feed()[:settings.NUM_SUBSTACK]]
for key, publication in substacks.items():
count = settings.SUBSTACK[key]['count']
feed += [(x, key) for x in publication.feed()[:count]]
for key, sites in categories.items():
count = settings.CATEGORY[key].get('count') or 0
excludes = settings.CATEGORY[key].get('excludes')
tz = settings.CATEGORY[key].get('tz')
feed += [(x, key) for x in sites.feed(excludes)[:count]]
for key, sites in sitemaps.items():
count = settings.SITEMAP[key].get('count') or 0
excludes = settings.SITEMAP[key].get('excludes')
feed += [(x, key) for x in sites.feed(excludes)[:count]]
return feed
def get_article(url):
if not settings.READER_URL:
logging.info('Readerserver not configured, aborting.')
return ''
scrapers = {
'declutter': declutter,
'outline': outline,
'local': local,
}
available = settings.SCRAPERS or ['local']
if 'local' not in available:
available += ['local']
if url.startswith('https://twitter.com'):
logging.info('Replacing twitter.com url with nitter.net')
url = url.replace('twitter.com', 'nitter.net')
try:
r = requests.post(settings.READER_URL, data=dict(url=url), timeout=20)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.text
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem getting article: {}'.format(str(e)))
return ''
for scraper in available:
if scraper not in scrapers.keys():
continue
try:
html = scrapers[scraper].get_html(url)
if html:
return html
except KeyboardInterrupt:
raise
except:
pass
return ''
def get_content_type(url):
try:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'}
return requests.get(url, headers=headers, timeout=5).headers['content-type']
except:
return ''
try:
headers = {
'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
'X-Forwarded-For': '66.249.66.1',
}
return requests.get(url, headers=headers, timeout=10).headers['content-type']
return requests.get(url, headers=headers, timeout=5).headers['content-type']
except:
pass
try:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'}
return requests.get(url, headers=headers, timeout=10).headers['content-type']
except:
return ''
def update_story(story, is_manual=False):
res = {}
try:
if story['source'] == 'hackernews':
res = hackernews.story(story['ref'])
elif story['source'] == 'lobsters':
res = lobsters.story(story['ref'])
elif story['source'] == 'reddit':
res = reddit.story(story['ref'])
elif story['source'] == 'tildes':
res = tildes.story(story['ref'])
elif story['source'] == 'manual':
res = manual.story(story['ref'])
except BaseException as e:
utils.alert_tanner('Problem updating {} story, ref {}: {}'.format(story['source'], story['ref'], str(e)))
logging.exception(e)
return False
if story['source'] == 'hackernews':
res = hackernews.story(story['ref'])
elif story['source'] == 'reddit':
res = reddit.story(story['ref'])
elif story['source'] == 'tildes':
res = tildes.story(story['ref'])
elif story['source'] == 'substack':
res = substack.top.story(story['ref'])
elif story['source'] in categories.keys():
res = categories[story['source']].story(story['ref'])
elif story['source'] in sitemaps.keys():
res = sitemaps[story['source']].story(story['ref'])
elif story['source'] in substacks.keys():
res = substacks[story['source']].story(story['ref'])
elif story['source'] == 'manual':
res = manual.story(story['ref'])
if res:
story.update(res) # join dicts
@@ -91,8 +124,8 @@ def update_story(story, is_manual=False):
logging.info('Story not ready yet')
return False
if story['date'] and not is_manual and story['date'] + TWO_DAYS < time.time():
logging.info('Story too old, removing. Date: {}'.format(story['date']))
if story['date'] and not is_manual and story['date'] + MAX_AGE_IN_DAYS < time.time():
logging.info('Story too old, removing')
return False
if story.get('url', '') and not story.get('text', ''):
@@ -106,12 +139,6 @@ def update_story(story, is_manual=False):
logging.info(story['url'])
return False
if 'trump' in story['title'].lower() or 'musk' in story['title'].lower() or 'Removed by moderator' in story['title']:
logging.info('Trump / Musk / removed story, skipping')
logging.info(story['url'])
return False
logging.info('Getting article ' + story['url'])
story['text'] = get_article(story['url'])
if not story['text']: return False
@@ -129,7 +156,7 @@ if __name__ == '__main__':
#print(get_article('https://www.bloomberg.com/news/articles/2019-09-23/xi-s-communists-under-pressure-as-high-prices-hit-china-workers'))
a = get_content_type('https://tefkos.comminfo.rutgers.edu/Courses/e530/Readings/Beal%202008%20full%20text%20searching.pdf')
a = get_article('https://blog.joinmastodon.org/2019/10/mastodon-3.0/')
print(a)
print('done')

View File

@@ -12,8 +12,7 @@ import requests
from utils import clean
API_TOPSTORIES = lambda x: 'https://hacker-news.firebaseio.com/v0/topstories.json'
ALG_API_ITEM = lambda x : 'https://hn.algolia.com/api/v1/items/{}'.format(x)
BHN_API_ITEM = lambda x : 'https://api.hnpwa.com/v0/item/{}.json'.format(x)
API_ITEM = lambda x : 'https://hn.algolia.com/api/v1/items/{}'.format(x)
SITE_LINK = lambda x : 'https://news.ycombinator.com/item?id={}'.format(x)
SITE_AUTHOR_LINK = lambda x : 'https://news.ycombinator.com/user?id={}'.format(x)
@@ -43,7 +42,7 @@ def api(route, ref=None):
def feed():
return [str(x) for x in api(API_TOPSTORIES) or []]
def alg_comment(i):
def comment(i):
if 'author' not in i:
return False
@@ -52,25 +51,21 @@ def alg_comment(i):
c['score'] = i.get('points', 0)
c['date'] = i.get('created_at_i', 0)
c['text'] = clean(i.get('text', '') or '')
c['comments'] = [alg_comment(j) for j in i['children']]
c['comments'] = [comment(j) for j in i['children']]
c['comments'] = list(filter(bool, c['comments']))
return c
def alg_comment_count(i):
def comment_count(i):
alive = 1 if i['author'] else 0
return sum([alg_comment_count(c) for c in i['comments']]) + alive
return sum([comment_count(c) for c in i['comments']]) + alive
def alg_story(ref):
r = api(ALG_API_ITEM, ref)
if not r:
logging.info('Bad Algolia Hackernews API response.')
return None
def story(ref):
r = api(API_ITEM, ref)
if not r: return False
if 'deleted' in r:
logging.info('Story was deleted.')
return False
elif r.get('type', '') != 'story':
logging.info('Type "{}" is not "story".'.format(r.get('type', '')))
return False
s = {}
@@ -81,88 +76,17 @@ def alg_story(ref):
s['title'] = r.get('title', '')
s['link'] = SITE_LINK(ref)
s['url'] = r.get('url', '')
s['comments'] = [alg_comment(i) for i in r['children']]
s['comments'] = [comment(i) for i in r['children']]
s['comments'] = list(filter(bool, s['comments']))
s['num_comments'] = alg_comment_count(s) - 1
s['num_comments'] = comment_count(s) - 1
if 'text' in r and r['text']:
s['text'] = clean(r['text'] or '')
return s
def bhn_comment(i):
if 'user' not in i:
return False
c = {}
c['author'] = i.get('user', '')
c['score'] = 0 # Not present?
c['date'] = i.get('time', 0)
c['text'] = clean(i.get('content', '') or '')
c['comments'] = [bhn_comment(j) for j in i['comments']]
c['comments'] = list(filter(bool, c['comments']))
return c
def bhn_story(ref):
r = api(BHN_API_ITEM, ref)
if not r:
logging.info('Bad BetterHN Hackernews API response.')
return None
if 'deleted' in r: # TODO: verify
logging.info('Story was deleted.')
return False
elif r.get('dead', False):
logging.info('Story was deleted.')
return False
elif r.get('type', '') != 'link':
logging.info('Type "{}" is not "link".'.format(r.get('type', '')))
return False
s = {}
s['author'] = r.get('user', '')
s['author_link'] = SITE_AUTHOR_LINK(r.get('user', ''))
s['score'] = r.get('points', 0)
s['date'] = r.get('time', 0)
s['title'] = r.get('title', '')
s['link'] = SITE_LINK(ref)
s['url'] = r.get('url', '')
if s['url'].startswith('item'):
s['url'] = SITE_LINK(ref)
s['comments'] = [bhn_comment(i) for i in r['comments']]
s['comments'] = list(filter(bool, s['comments']))
s['num_comments'] = r.get('comments_count', 0)
if 'content' in r and r['content']:
s['text'] = clean(r['content'] or '')
return s
def story(ref):
s = alg_story(ref)
if s is None:
s = bhn_story(ref)
if not s:
return False
if not s['title']:
return False
if s['score'] < 25 and s['num_comments'] < 10:
logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
return False
return s
# scratchpad so I can quickly develop the parser
if __name__ == '__main__':
print(feed())
#print(story(20763961))
#print(story(20802050))
#print(story(42899834)) # type "job"
#print(story(42900076)) # Ask HN
#print(story(42898201)) # Show HN
#print(story(42899703)) # normal
print(story(42902678)) # bad title?

View File

@@ -1,120 +0,0 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
if __name__ == '__main__':
import sys
sys.path.insert(0,'.')
import requests
from datetime import datetime
from utils import clean
API_HOTTEST = lambda x: 'https://lobste.rs/hottest.json'
API_ITEM = lambda x : 'https://lobste.rs/s/{}.json'.format(x)
SITE_LINK = lambda x : 'https://lobste.rs/s/{}'.format(x)
SITE_AUTHOR_LINK = lambda x : 'https://lobste.rs/u/{}'.format(x)
def api(route, ref=None):
try:
r = requests.get(route(ref), timeout=5)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting lobsters API: {}, trying again'.format(str(e)))
try:
r = requests.get(route(ref), timeout=15)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting lobsters API: {}'.format(str(e)))
return False
def feed():
return [x['short_id'] for x in api(API_HOTTEST) or []]
def unix(date_str):
date_str = date_str.replace(':', '')
return int(datetime.strptime(date_str, '%Y-%m-%dT%H%M%S.%f%z').timestamp())
def make_comment(i):
c = {}
try:
c['author'] = i['commenting_user']
except KeyError:
c['author'] = ''
c['score'] = i.get('score', 0)
try:
c['date'] = unix(i['created_at'])
except KeyError:
c['date'] = 0
c['text'] = clean(i.get('comment', '') or '')
c['comments'] = []
return c
def iter_comments(flat_comments):
nested_comments = []
parent_stack = []
for comment in flat_comments:
c = make_comment(comment)
indent = comment['depth']
if indent == 0:
nested_comments.append(c)
parent_stack = [c]
else:
parent_stack = parent_stack[:indent]
p = parent_stack[-1]
p['comments'].append(c)
parent_stack.append(c)
return nested_comments
def story(ref):
r = api(API_ITEM, ref)
if not r:
logging.info('Bad Lobsters API response.')
return False
s = {}
try:
s['author'] = r['submitter_user']
s['author_link'] = SITE_AUTHOR_LINK(s['author'])
except KeyError:
s['author'] = ''
s['author_link'] = ''
s['score'] = r.get('score', 0)
try:
s['date'] = unix(r['created_at'])
except KeyError:
s['date'] = 0
s['title'] = r.get('title', '')
s['link'] = SITE_LINK(ref)
s['url'] = r.get('url', '')
s['comments'] = iter_comments(r['comments'])
s['num_comments'] = r['comment_count']
if s['score'] < 15 and s['num_comments'] < 10:
logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
return False
if 'description' in r and r['description']:
s['text'] = clean(r['description'] or '')
return s
# scratchpad so I can quickly develop the parser
if __name__ == '__main__':
#print(feed())
import json
print(json.dumps(story('fzvd1v'), indent=4))
#print(json.dumps(story('ixyv5u'), indent=4))

View File

@@ -27,9 +27,7 @@ def api(route):
def story(ref):
html = api(ref)
if not html:
logging.info('Bad http GET response.')
return False
if not html: return False
soup = BeautifulSoup(html, features='html.parser')

229
apiserver/feeds/news.py Normal file
View File

@@ -0,0 +1,229 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
if __name__ == '__main__':
import sys
sys.path.insert(0,'.')
import requests
from datetime import datetime
from bs4 import BeautifulSoup
from scrapers import declutter
import dateutil.parser
import extruct
import pytz
from utils import clean
USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'
#USER_AGENT = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
def unix(date_str, tz=None):
try:
dt = dateutil.parser.parse(date_str)
if tz:
dt = pytz.timezone(tz).localize(dt)
return int(dt.timestamp())
except:
pass
return 0
def xml(route, ref=None):
try:
headers = {'User-Agent': USER_AGENT, 'X-Forwarded-For': '66.249.66.1'}
r = requests.get(route(ref), headers=headers, timeout=5)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.text
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting URL: {}'.format(str(e)))
return False
def parse_extruct(s, data):
for rdfa in data['rdfa']:
for key, props in rdfa.items():
if 'http://ogp.me/ns#title' in props:
for values in props['http://ogp.me/ns#title']:
s['title'] = values['@value']
if 'http://ogp.me/ns/article#modified_time' in props:
for values in props['http://ogp.me/ns/article#modified_time']:
s['date'] = values['@value']
if 'http://ogp.me/ns/article#published_time' in props:
for values in props['http://ogp.me/ns/article#published_time']:
s['date'] = values['@value']
for og in data['opengraph']:
titles = list(filter(None, [value if 'og:title' in key else None for key, value in og['properties']]))
modified = list(filter(None, [value if 'article:modified_time' in key else None for key, value in og['properties']]))
published = list(filter(None, [value if 'article:published_time' in key else None for key, value in og['properties']]))
if len(modified):
s['date'] = modified[0]
if len(published):
s['date'] = published[0]
if len(titles):
s['title'] = titles[0]
for md in data['microdata']:
if md['type'] == 'https://schema.org/NewsArticle':
props = md['properties']
s['title'] = props['headline']
if props['dateModified']:
s['date'] = props['dateModified']
if props['datePublished']:
s['date'] = props['datePublished']
if 'author' in props and props['author']:
s['author'] = props['author']['properties']['name']
for ld in data['json-ld']:
if '@type' in ld and ld['@type'] in ['Article', 'NewsArticle']:
s['title'] = ld['headline']
if ld['dateModified']:
s['date'] = ld['dateModified']
if ld['datePublished']:
s['date'] = ld['datePublished']
if 'author' in ld and ld['author']:
s['author'] = ld['author']['name']
if '@graph' in ld:
for gld in ld['@graph']:
if '@type' in gld and gld['@type'] in ['Article', 'NewsArticle']:
s['title'] = gld['headline']
if gld['dateModified']:
s['date'] = gld['dateModified']
if gld['datePublished']:
s['date'] = gld['datePublished']
return s
def comment(i):
if 'author' not in i:
return False
c = {}
c['author'] = i.get('author', '')
c['score'] = i.get('points', 0)
c['date'] = unix(i.get('date', 0))
c['text'] = clean(i.get('text', '') or '')
c['comments'] = [comment(j) for j in i['children']]
c['comments'] = list(filter(bool, c['comments']))
return c
def comment_count(i):
alive = 1 if i['author'] else 0
return sum([comment_count(c) for c in i['comments']]) + alive
class _Base:
def __init__(url, tz=None):
self.url = url
self.tz = tz
def feed(self, excludes=None):
return []
def story(self, ref):
markup = xml(lambda x: ref)
if not markup:
return False
s = {}
s['author_link'] = ''
s['score'] = 0
s['comments'] = []
s['num_comments'] = 0
s['link'] = ref
s['url'] = ref
s['date'] = 0
data = extruct.extract(markup)
s = parse_extruct(s, data)
if s['date']:
s['date'] = unix(s['date'], tz=self.tz)
if 'disqus' in markup:
try:
s['comments'] = declutter.get_comments(ref)
c['comments'] = list(filter(bool, c['comments']))
s['num_comments'] = comment_count(s['comments'])
except KeyboardInterrupt:
raise
except:
pass
if not s['date']:
return False
return s
def get_sitemap_date(a):
if a.find('lastmod'):
return a.find('lastmod').text
if a.find('news:publication_date'):
return a.find('news:publication_date').text
return ''
class Sitemap(_Base):
def __init__(self, url, tz=None):
self.tz = tz
self.sitemap_url = url
def feed(self, excludes=None):
markup = xml(lambda x: self.sitemap_url)
if not markup: return []
soup = BeautifulSoup(markup, features='lxml')
sitemap = soup.find('urlset').findAll('url')
links = list(filter(None, [a if a.find('loc') else None for a in sitemap]))
links = list(filter(None, [a if get_sitemap_date(a) else None for a in links]))
links.sort(key=lambda a: unix(get_sitemap_date(a)), reverse=True)
links = [x.find('loc').text for x in links] or []
links = list(set(links))
if excludes:
links = list(filter(None, [None if any(e in link for e in excludes) else link for link in links]))
return links
class Category(_Base):
def __init__(self, url, tz=None):
self.tz = tz
self.category_url = url
self.base_url = '/'.join(url.split('/')[:3])
def feed(self, excludes=None):
markup = xml(lambda x: self.category_url)
if not markup: return []
soup = BeautifulSoup(markup, features='html.parser')
links = soup.find_all('a', href=True)
links = [link.get('href') for link in links]
links = [f"{self.base_url}{link}" if link.startswith('/') else link for link in links]
links = list(filter(None, [link if link.startswith(self.category_url) else None for link in links]))
links = list(filter(None, [link if link != self.category_url else None for link in links]))
links = list(set(links))
if excludes:
links = list(filter(None, [None if any(e in link for e in excludes) else link for link in links]))
return links
# scratchpad so I can quickly develop the parser
if __name__ == '__main__':
print("Sitemap: Stuff")
site = Sitemap("https://www.stuff.co.nz/sitemap/news/sitemap.xml")
posts = site.feed()
print(posts[:5])
print(site.story(posts[0]))
print("Category: RadioNZ Te Ao Māori")
site = Category("https://www.rnz.co.nz/news/te-manu-korihi/")
posts = site.feed()
print(posts[:5])
print(site.story(posts[0]))
print("Sitemap: Newsroom")
site = Sitemap("https://www.newsroom.co.nz/sitemap.xml")
posts = site.feed()
print(posts[:5])
print(site.story(posts[0]))

View File

@@ -32,8 +32,11 @@ def feed():
return [x.id for x in reddit.subreddit(subs).hot()]
except KeyboardInterrupt:
raise
except BaseException as e:
logging.critical('Problem hitting reddit API: {}'.format(str(e)))
except PRAWException as e:
logging.error('Problem hitting reddit API: {}'.format(str(e)))
return []
except PrawcoreException as e:
logging.error('Problem hitting reddit API: {}'.format(str(e)))
return []
def comment(i):
@@ -56,9 +59,7 @@ def comment(i):
def story(ref):
try:
r = reddit.submission(ref)
if not r:
logging.info('Bad Reddit API response.')
return False
if not r: return False
s = {}
s['author'] = r.author.name if r.author else '[Deleted]'
@@ -73,7 +74,6 @@ def story(ref):
s['num_comments'] = r.num_comments
if s['score'] < 25 and s['num_comments'] < 10:
logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
return False
if r.selftext:
@@ -84,10 +84,10 @@ def story(ref):
except KeyboardInterrupt:
raise
except PRAWException as e:
logging.critical('Problem hitting reddit API: {}'.format(str(e)))
logging.error('Problem hitting reddit API: {}'.format(str(e)))
return False
except PrawcoreException as e:
logging.critical('Problem hitting reddit API: {}'.format(str(e)))
logging.error('Problem hitting reddit API: {}'.format(str(e)))
return False
# scratchpad so I can quickly develop the parser

165
apiserver/feeds/substack.py Normal file
View File

@@ -0,0 +1,165 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
if __name__ == '__main__':
import sys
sys.path.insert(0,'.')
import requests
from datetime import datetime
from utils import clean
SUBSTACK_REFERER = 'https://substack.com'
SUBSTACK_API_TOP_POSTS = lambda x: "https://substack.com/api/v1/reader/top-posts"
def author_link(author_id, base_url):
return f"{base_url}/people/{author_id}"
def api_comments(post_id, base_url):
return f"{base_url}/api/v1/post/{post_id}/comments?all_comments=true&sort=best_first"
def api_stories(x, base_url):
return f"{base_url}/api/v1/archive?sort=new&search=&offset=0&limit=100"
def unix(date_str):
return int(datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S.%fZ').timestamp())
def api(route, ref=None, referer=None):
headers = {'Referer': referer} if referer else None
try:
r = requests.get(route(ref), headers=headers, timeout=10)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting Substack API: {}, trying again'.format(str(e)))
try:
r = requests.get(route(ref), headers=headers, timeout=20)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting Substack API: {}'.format(str(e)))
return False
def comment(i):
if 'body' not in i:
return False
c = {}
c['date'] = unix(i.get('date'))
c['author'] = i.get('name', '')
c['score'] = i.get('reactions').get('')
c['text'] = clean(i.get('body', '') or '')
c['comments'] = [comment(j) for j in i['children']]
c['comments'] = list(filter(bool, c['comments']))
return c
class Publication:
def __init__(self, domain):
self.BASE_DOMAIN = domain
def feed(self):
stories = api(lambda x: api_stories(x, self.BASE_DOMAIN), referer=self.BASE_DOMAIN)
if not stories: return []
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
return [str(i.get("id")) for i in stories or []]
def story(self, ref):
stories = api(lambda x: api_stories(x, self.BASE_DOMAIN), referer=self.BASE_DOMAIN)
if not stories: return False
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
stories = list(filter(None, [i if str(i.get('id')) == ref else None for i in stories]))
if len(stories) == 0:
return False
r = stories[0]
if not r:
return False
s = {}
s['author'] = ''
s['author_link'] = ''
s['date'] = unix(r.get('post_date'))
s['score'] = r.get('reactions').get('')
s['title'] = r.get('title', '')
s['link'] = r.get('canonical_url', '')
s['url'] = r.get('canonical_url', '')
comments = api(lambda x: api_comments(x, self.BASE_DOMAIN), r.get('id'), referer=self.BASE_DOMAIN)
s['comments'] = [comment(i) for i in comments.get('comments')]
s['comments'] = list(filter(bool, s['comments']))
s['num_comments'] = r.get('comment_count', 0)
authors = list(filter(None, [self._bylines(byline) for byline in r.get('publishedBylines')]))
if len(authors):
s['author'] = authors[0].get('name')
s['author_link'] = authors[0].get('link')
return s
def _bylines(self, b):
if 'id' not in b:
return None
a = {}
a['name'] = b.get('name')
a['link'] = author_link(b.get('id'), self.BASE_DOMAIN)
return a
class Top:
def feed(self):
stories = api(SUBSTACK_API_TOP_POSTS, referer=SUBSTACK_REFERER)
if not stories: return []
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
return [str(i.get("id")) for i in stories or []]
def story(self, ref):
stories = api(SUBSTACK_API_TOP_POSTS, referer=SUBSTACK_REFERER)
if not stories: return False
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
stories = list(filter(None, [i if str(i.get('id')) == ref else None for i in stories]))
if len(stories) == 0:
return False
r = stories[0]
if not r:
return False
s = {}
pub = r.get('pub')
base_url = pub.get('base_url')
s['author'] = pub.get('author_name')
s['author_link'] = author_link(pub.get('author_id'), base_url)
s['date'] = unix(r.get('post_date'))
s['score'] = r.get('score')
s['title'] = r.get('title', '')
s['link'] = r.get('canonical_url', '')
s['url'] = r.get('canonical_url', '')
comments = api(lambda x: api_comments(x, base_url), r.get('id'), referer=SUBSTACK_REFERER)
s['comments'] = [comment(i) for i in comments.get('comments')]
s['comments'] = list(filter(bool, s['comments']))
s['num_comments'] = r.get('comment_count', 0)
return s
top = Top()
# scratchpad so I can quickly develop the parser
if __name__ == '__main__':
top_posts = top.feed()
print(top.story(top_posts[0]))
webworm = Publication("https://www.webworm.co/")
posts = webworm.feed()
print(webworm.story(posts[0]))

View File

@@ -34,7 +34,7 @@ def api(route):
except KeyboardInterrupt:
raise
except BaseException as e:
logging.critical('Problem hitting tildes website: {}'.format(str(e)))
logging.error('Problem hitting tildes website: {}'.format(str(e)))
return False
def feed():
@@ -71,15 +71,11 @@ def story(ref):
html = api(SITE_LINK(group_lookup[ref], ref))
else:
html = api(API_ITEM(ref))
if not html:
logging.info('Bad Tildes API response.')
return False
if not html: return False
soup = BeautifulSoup(html, features='html.parser')
a = soup.find('article', class_='topic-full')
if a is None:
logging.info('Tildes <article> element not found.')
return False
if a is None: return False
h = a.find('header')
lu = h.find('a', class_='link-user')
@@ -87,7 +83,6 @@ def story(ref):
error = a.find('div', class_='text-error')
if error:
if 'deleted' in error.string or 'removed' in error.string:
logging.info('Article was deleted or removed.')
return False
s = {}
@@ -107,21 +102,7 @@ def story(ref):
ch = a.find('header', class_='topic-comments-header')
s['num_comments'] = int(ch.h2.string.split(' ')[0]) if ch else 0
if s['group'].split('.')[0] not in [
'~arts',
'~comp',
'~creative',
'~design',
'~engineering',
'~finance',
'~science',
'~tech',
]:
logging.info('Group ({}) not in whitelist.'.format(s['group']))
return False
if s['score'] < 15 and s['num_comments'] < 10:
logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
if s['score'] < 8 and s['num_comments'] < 6:
return False
td = a.find('div', class_='topic-full-text')
@@ -132,7 +113,7 @@ def story(ref):
# scratchpad so I can quickly develop the parser
if __name__ == '__main__':
print(feed())
#print(feed())
#normal = story('gxt')
#print(normal)
#no_comments = story('gxr')
@@ -141,8 +122,8 @@ if __name__ == '__main__':
#print(self_post)
#li_comment = story('gqx')
#print(li_comment)
#broken = story('q4y')
#print(broken)
broken = story('q4y')
print(broken)
# make sure there's no self-reference
#import copy

View File

@@ -4,19 +4,21 @@ certifi==2020.6.20
chardet==3.0.4
click==7.1.2
commonmark==0.9.1
extruct==0.10.0
Flask==1.1.2
Flask-Cors==3.0.8
gevent==20.6.2
greenlet==0.4.16
humanize==4.10.0
idna==2.10
itsdangerous==1.1.0
Jinja2==2.11.2
lxml==4.6.1
MarkupSafe==1.1.1
packaging==20.4
praw==6.4.0
prawcore==1.4.0
pyparsing==2.4.7
pytz==2020.4
requests==2.24.0
six==1.15.0
soupsieve==2.0.1
@@ -28,3 +30,4 @@ websocket-client==0.57.0
Werkzeug==1.0.1
zope.event==4.4
zope.interface==5.1.0
python-dateutil==2.8.1

View File

@@ -0,0 +1,41 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
import requests
DECLUTTER_API = 'https://declutter.1j.nz/details'
DECLUTTER_COMMENT_API = 'https://declutter.1j.nz/comments'
TIMEOUT = 30
def get_html(url):
logging.info(f"Declutter Scraper: {url}")
details = get_details(url)
if not details:
return ''
return details['content']
def get_details(url):
try:
r = requests.post(DECLUTTER_API, data=dict(url=url), timeout=TIMEOUT)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem decluttering article: {}'.format(str(e)))
return None
def get_comments(url):
try:
r = requests.post(DECLUTTER_COMMENT_API, data=dict(url=url), timeout=TIMEOUT)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem getting comments for article: {}'.format(str(e)))
return None

View File

@@ -0,0 +1,27 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
import requests
READ_API = 'http://127.0.0.1:33843/details'
TIMEOUT = 20
def get_html(url):
logging.info(f"Local Scraper: {url}")
details = get_details(url)
if not details:
return ''
return details['content']
def get_details(url):
try:
r = requests.post(READ_API, data=dict(url=url), timeout=TIMEOUT)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem getting article: {}'.format(str(e)))
return None

View File

@@ -0,0 +1,37 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
import requests
OUTLINE_REFERER = 'https://outline.com/'
OUTLINE_API = 'https://api.outline.com/v3/parse_article'
TIMEOUT = 20
def get_html(url):
details = get_details(url)
if not details:
return ''
return details['html']
def get_details(url):
try:
logging.info(f"Outline Scraper: {url}")
params = {'source_url': url}
headers = {'Referer': OUTLINE_REFERER}
r = requests.get(OUTLINE_API, params=params, headers=headers, timeout=TIMEOUT)
if r.status_code == 429:
logging.info('Rate limited by outline, sleeping 30s and skipping...')
time.sleep(30)
return None
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
data = r.json()['data']
if 'URL is not supported by Outline' in data['html']:
raise Exception('URL not supported by Outline')
return data
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem outlining article: {}'.format(str(e)))
return None

View File

@@ -1,58 +0,0 @@
import time
import json
import logging
import feed
import database
import search
database.init()
def fix_gzip_bug(story_list):
FIX_THRESHOLD = 150
count = 1
for sid in story_list:
try:
sid = sid[0]
story = database.get_story(sid)
full_json = json.loads(story.full_json)
meta_json = json.loads(story.meta_json)
text = full_json.get('text', '')
count = text.count('<EFBFBD>')
if not count: continue
ratio = count / len(text) * 1000
print('Bad story:', sid, 'Num ?:', count, 'Ratio:', ratio)
if ratio < FIX_THRESHOLD: continue
print('Attempting to fix...')
valid = feed.update_story(meta_json, is_manual=True)
if valid:
database.put_story(meta_json)
search.put_story(meta_json)
print('Success')
else:
print('Story was not valid')
time.sleep(3)
except KeyboardInterrupt:
raise
except BaseException as e:
logging.exception(e)
breakpoint()
if __name__ == '__main__':
num_stories = database.count_stories()
print('Fix {} stories?'.format(num_stories))
print('Press ENTER to continue, ctrl-c to cancel')
input()
story_list = database.get_story_list()
fix_gzip_bug(story_list)

View File

@@ -1,62 +0,0 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.INFO)
import database
from sqlalchemy import select
import search
import sys
import time
import json
import requests
database.init()
search.init()
BATCH_SIZE = 5000
def put_stories(stories):
return search.meili_api(requests.post, 'indexes/qotnews/documents', stories)
def get_update(update_id):
return search.meili_api(requests.get, 'tasks/{}'.format(update_id))
if __name__ == '__main__':
num_stories = database.count_stories()
print('Reindex {} stories?'.format(num_stories))
print('Press ENTER to continue, ctrl-c to cancel')
input()
story_list = database.get_story_list()
count = 1
while len(story_list):
stories = []
for _ in range(BATCH_SIZE):
try:
sid = story_list.pop()
except IndexError:
break
story = database.get_story(sid)
print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title))
story_obj = json.loads(story.meta_json)
stories.append(story_obj)
count += 1
res = put_stories(stories)
update_id = res['uid']
print('Waiting for processing', end='')
while get_update(update_id)['status'] != 'succeeded':
time.sleep(0.5)
print('.', end='', flush=True)
print()
print('Done.')

View File

@@ -1,23 +0,0 @@
import time
import requests
def test_search_api():
num_tests = 100
total_time = 0
for i in range(num_tests):
start = time.time()
res = requests.get('http://127.0.0.1:33842/api/search?q=iphone')
res.raise_for_status()
duration = time.time() - start
total_time += duration
avg_time = total_time / num_tests
print('Average search time:', avg_time)
if __name__ == '__main__':
test_search_api()

View File

@@ -4,62 +4,83 @@ logging.basicConfig(
level=logging.DEBUG)
import requests
import settings
SEARCH_ENABLED = bool(settings.MEILI_URL)
MEILI_URL = 'http://127.0.0.1:7700/'
def meili_api(method, route, json=None, params=None, parse_json=True):
def create_index():
try:
r = method(settings.MEILI_URL + route, json=json, params=params, timeout=4)
if r.status_code > 299:
json = dict(name='qotnews', uid='qotnews')
r = requests.post(MEILI_URL + 'indexes', json=json, timeout=2)
if r.status_code != 201:
raise Exception('Bad response code ' + str(r.status_code))
if parse_json:
return r.json()
else:
r.encoding = 'utf-8'
return r.text
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem with MeiliSearch api route: %s: %s', route, str(e))
logging.error('Problem creating MeiliSearch index: {}'.format(str(e)))
return False
def create_index():
json = dict(uid='qotnews', primaryKey='id')
return meili_api(requests.post, 'indexes', json=json)
def update_rankings():
json = ['typo', 'words', 'proximity', 'date:desc', 'exactness']
return meili_api(requests.post, 'indexes/qotnews/settings/ranking-rules', json=json)
try:
json = ['typo', 'words', 'proximity', 'attribute', 'desc(date)', 'wordsPosition', 'exactness']
r = requests.post(MEILI_URL + 'indexes/qotnews/settings/ranking-rules', json=json, timeout=2)
if r.status_code != 202:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem setting MeiliSearch ranking rules: {}'.format(str(e)))
return False
def update_attributes():
json = ['title', 'url', 'author']
r = meili_api(requests.post, 'indexes/qotnews/settings/searchable-attributes', json=json)
json = ['id', 'ref', 'source', 'author', 'author_link', 'score', 'date', 'title', 'link', 'url', 'num_comments']
r = meili_api(requests.post, 'indexes/qotnews/settings/displayed-attributes', json=json)
return r
try:
json = ['title', 'url', 'author', 'link', 'id']
r = requests.post(MEILI_URL + 'indexes/qotnews/settings/searchable-attributes', json=json, timeout=2)
if r.status_code != 202:
raise Exception('Bad response code ' + str(r.status_code))
requests.delete(MEILI_URL + 'indexes/qotnews/settings/displayed-attributes', timeout=2)
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem setting MeiliSearch searchable attributes: {}'.format(str(e)))
return False
def init():
if not SEARCH_ENABLED:
logging.info('Search is not enabled, skipping init.')
return
print(create_index())
create_index()
update_rankings()
update_attributes()
def put_story(story):
if not SEARCH_ENABLED: return
return meili_api(requests.post, 'indexes/qotnews/documents', [story])
story = story.copy()
story.pop('text', None)
story.pop('comments', None)
try:
r = requests.post(MEILI_URL + 'indexes/qotnews/documents', json=[story], timeout=2)
if r.status_code != 202:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem putting MeiliSearch story: {}'.format(str(e)))
return False
def search(q):
if not SEARCH_ENABLED: return []
params = dict(q=q, limit=settings.FEED_LENGTH)
r = meili_api(requests.get, 'indexes/qotnews/search', params=params, parse_json=False)
return r
try:
params = dict(q=q, limit=250)
r = requests.get(MEILI_URL + 'indexes/qotnews/search', params=params, timeout=2)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()['hits']
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem searching MeiliSearch: {}'.format(str(e)))
return False
if __name__ == '__main__':
init()
create_index()
print(update_rankings())
print(search('facebook'))
print(search('the'))

View File

@@ -1,8 +1,7 @@
import os, logging
DEBUG = os.environ.get('DEBUG')
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG if DEBUG else logging.INFO)
level=logging.INFO)
import gevent
from gevent import monkey
@@ -14,46 +13,22 @@ import json
import threading
import traceback
import time
import datetime
import humanize
import urllib.request
from urllib.parse import urlparse, parse_qs
import settings
import database
import search
import feed
from utils import gen_rand_id, NUM_ID_CHARS
from utils import gen_rand_id
from flask import abort, Flask, request, render_template, stream_with_context, Response
from werkzeug.exceptions import NotFound
from flask_cors import CORS
smallweb_set = set()
def load_smallweb_list():
EXCLUDED = [
'github.com',
]
global smallweb_set
try:
url = 'https://raw.githubusercontent.com/kagisearch/smallweb/refs/heads/main/smallweb.txt'
with urllib.request.urlopen(url, timeout=10) as response:
urls = response.read().decode('utf-8').splitlines()
hosts = {urlparse(u).hostname for u in urls if u and urlparse(u).hostname}
smallweb_set = {h.replace('www.', '') for h in hosts if h not in EXCLUDED}
logging.info('Loaded {} smallweb domains.'.format(len(smallweb_set)))
except Exception as e:
logging.error('Failed to load smallweb list: {}'.format(e))
load_smallweb_list()
database.init()
search.init()
FEED_LENGTH = 75
news_index = 0
ref_list = []
current_item = {}
def new_id():
nid = gen_rand_id()
@@ -61,99 +36,32 @@ def new_id():
nid = gen_rand_id()
return nid
def fromnow(ts):
return humanize.naturaltime(datetime.datetime.fromtimestamp(ts))
build_folder = './build'
build_folder = '../webclient/build'
flask_app = Flask(__name__, template_folder=build_folder, static_folder=build_folder, static_url_path='')
flask_app.jinja_env.filters['fromnow'] = fromnow
cors = CORS(flask_app)
@flask_app.route('/api')
def api():
skip = request.args.get('skip', 0)
limit = request.args.get('limit', settings.FEED_LENGTH)
if request.args.get('smallweb') == 'true' and smallweb_set:
limit = int(limit)
skip = int(skip)
filtered_stories = []
current_skip = skip
while len(filtered_stories) < limit:
stories_batch = database.get_stories(limit, current_skip)
if not stories_batch:
break
for story_str in stories_batch:
story = json.loads(story_str)
story_url = story.get('url') or story.get('link') or ''
if not story_url:
continue
hostname = urlparse(story_url).hostname
if hostname:
hostname = hostname.replace('www.', '')
if hostname in smallweb_set:
filtered_stories.append(story_str)
if len(filtered_stories) == limit:
break
if len(filtered_stories) == limit:
break
current_skip += limit
stories = filtered_stories
else:
stories = database.get_stories(limit, skip)
# hacky nested json
res = Response('{"stories":[' + ','.join(stories) + ']}')
stories = database.get_stories(FEED_LENGTH)
res = Response(json.dumps({"stories": stories}))
res.headers['content-type'] = 'application/json'
return res
@flask_app.route('/api/stats', strict_slashes=False)
def apistats():
stats = {
'news_index': news_index,
'ref_list': ref_list,
'len_ref_list': len(ref_list),
'current_item': current_item,
'total_stories': database.count_stories(),
'id_space': 26**NUM_ID_CHARS,
}
return stats
@flask_app.route('/api/search', strict_slashes=False)
def apisearch():
q = request.args.get('q', '')
if len(q) >= 3:
results = search.search(q)
else:
results = '[]'
res = Response(results)
res.headers['content-type'] = 'application/json'
return res
results = []
return dict(results=results)
@flask_app.route('/api/submit', methods=['POST'], strict_slashes=False)
def submit():
try:
url = request.form['url']
for prefix in ['http://', 'https://']:
if url.lower().startswith(prefix):
break
else: # for
url = 'http://' + url
nid = new_id()
logging.info('Manual submission: ' + url)
parse = urlparse(url)
if 'news.ycombinator.com' in parse.hostname:
source = 'hackernews'
@@ -161,9 +69,6 @@ def submit():
elif 'tildes.net' in parse.hostname and '~' in url:
source = 'tildes'
ref = parse.path.split('/')[2]
elif 'lobste.rs' in parse.hostname and '/s/' in url:
source = 'lobsters'
ref = parse.path.split('/')[2]
elif 'reddit.com' in parse.hostname and 'comments' in url:
source = 'reddit'
ref = parse.path.split('/')[4]
@@ -174,11 +79,6 @@ def submit():
ref = url
existing = database.get_story_by_ref(ref)
if existing and DEBUG:
ref = ref + '#' + str(time.time())
existing = False
if existing:
return {'nid': existing.sid}
else:
@@ -187,28 +87,21 @@ def submit():
if valid:
database.put_story(story)
search.put_story(story)
if DEBUG:
logging.info('Adding manual ref: {}, id: {}, source: {}'.format(ref, nid, source))
database.put_ref(ref, nid, source)
return {'nid': nid}
else:
raise Exception('Invalid article')
except Exception as e:
msg = 'Problem with article submission: {} - {}'.format(e.__class__.__name__, str(e))
logging.error(msg)
except BaseException as e:
logging.error('Problem with article submission: {} - {}'.format(e.__class__.__name__, str(e)))
print(traceback.format_exc())
return {'error': msg.split('\n')[0]}, 400
abort(400)
@flask_app.route('/api/<sid>')
def story(sid):
story = database.get_story(sid)
if story:
# hacky nested json
res = Response('{"story":' + story.full_json + '}')
res = Response(json.dumps({"story": story.data}))
res.headers['content-type'] = 'application/json'
return res
else:
@@ -217,19 +110,10 @@ def story(sid):
@flask_app.route('/')
@flask_app.route('/search')
def index():
stories_json = database.get_stories(settings.FEED_LENGTH, 0)
stories = [json.loads(s) for s in stories_json]
for s in stories:
url = urlparse(s.get('url') or s.get('link') or '').hostname or ''
s['hostname'] = url.replace('www.', '')
return render_template('index.html',
title='QotNews',
url='news.t0.vc',
description='Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode',
robots='index',
stories=stories,
)
title='Feed',
url='news.t0.vc',
description='Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode')
@flask_app.route('/<sid>', strict_slashes=False)
@flask_app.route('/<sid>/c', strict_slashes=False)
@@ -239,9 +123,9 @@ def static_story(sid):
except NotFound:
pass
story_obj = database.get_story(sid)
if not story_obj: return abort(404)
story = json.loads(story_obj.full_json)
story = database.get_story(sid)
if not story: return abort(404)
story = story.data
score = story['score']
num_comments = story['num_comments']
@@ -250,22 +134,18 @@ def static_story(sid):
score, 's' if score != 1 else '',
num_comments, 's' if num_comments != 1 else '',
source)
url = urlparse(story.get('url') or story.get('link') or '').hostname or ''
url = urlparse(story['url']).hostname or urlparse(story['link']).hostname or ''
url = url.replace('www.', '')
return render_template('index.html',
title=story['title'] + ' | QotNews',
url=url,
description=description,
robots='noindex',
story=story,
show_comments=request.path.endswith('/c'),
)
title=story['title'],
url=url,
description=description)
http_server = WSGIServer(('', 33842), flask_app)
def feed_thread():
global news_index, ref_list, current_item
global news_index
try:
while True:
@@ -276,51 +156,49 @@ def feed_thread():
continue
try:
nid = new_id()
logging.info('Adding ref: {}, id: {}, source: {}'.format(ref, nid, source))
database.put_ref(ref, nid, source)
logging.info('Added ref ' + ref)
except database.IntegrityError:
logging.info('Already have ID / ref, skipping.')
continue
ref_list = database.get_reflist(settings.FEED_LENGTH)
ref_list = database.get_reflist(FEED_LENGTH)
# update current stories
if news_index < len(ref_list):
current_item = ref_list[news_index]
item = ref_list[news_index]
try:
story_json = database.get_story(current_item['sid']).full_json
story = json.loads(story_json)
story = database.get_story(item['sid']).data
except AttributeError:
story = dict(id=current_item['sid'], ref=current_item['ref'], source=current_item['source'])
story = dict(id=item['sid'], ref=item['ref'], source=item['source'])
logging.info('Updating {} story: {}, index: {}'.format(story['source'], story['ref'], news_index))
logging.info('Updating story: ' + str(story['ref']) + ', index: ' + str(news_index))
valid = feed.update_story(story)
if valid:
database.put_story(story)
search.put_story(story)
else:
database.del_ref(current_item['ref'])
logging.info('Removed ref {}'.format(current_item['ref']))
database.del_ref(item['ref'])
logging.info('Removed ref {}'.format(item['ref']))
else:
logging.info('Skipping index: ' + str(news_index))
gevent.sleep(6)
news_index += 1
if news_index == settings.FEED_LENGTH: news_index = 0
if news_index == FEED_LENGTH: news_index = 0
except KeyboardInterrupt:
logging.info('Ending feed thread...')
except ValueError as e:
logging.critical('feed_thread error: {} {}'.format(e.__class__.__name__, e))
logging.error('feed_thread error: {} {}'.format(e.__class__.__name__, e))
http_server.stop()
logging.info('Starting Feed thread...')
print('Starting Feed thread...')
gevent.spawn(feed_thread)
logging.info('Starting HTTP thread...')
print('Starting HTTP thread...')
try:
http_server.serve_forever()
except KeyboardInterrupt:

View File

@@ -4,20 +4,23 @@
# Feed Lengths
# Number of top items from each site to pull
# set to 0 to disable that site
FEED_LENGTH = 75
NUM_HACKERNEWS = 15
NUM_LOBSTERS = 10
NUM_REDDIT = 15
NUM_REDDIT = 10
NUM_TILDES = 5
NUM_SUBSTACK = 10
# Meilisearch server URL
# Leave blank if not using search
#MEILI_URL = 'http://127.0.0.1:7700/'
MEILI_URL = ''
SITEMAP = {}
# SITEMAP['nzherald'] = { 'url': "https://www.nzherald.co.nz/arcio/news-sitemap/", 'count': 10},
# SITEMAP['stuff'] = { 'url': "https://www.stuff.co.nz/sitemap.xml", 'count': 10},
# Readerserver URL
# Leave blank if not using, but that defeats the whole point
READER_URL = 'http://127.0.0.1:33843/'
SUBSTACK = {}
# SUBSTACK['webworm'] = { 'url': "https://www.webworm.co", 'count': 10},
# SUBSTACK['the bulletin'] = { 'url': "https://thespinoff.substack.com", 'count': 10},
CATEGORY = {}
# CATEGORY['rnz national'] = { 'url': "https://www.rnz.co.nz/news/national", 'count': 10},
SCRAPERS = ['declutter', 'outline', 'local']
# Reddit account info
# leave blank if not using Reddit
@@ -33,9 +36,13 @@ SUBREDDITS = [
'HistoryofIdeas',
'LaymanJournals',
'PhilosophyofScience',
'PoliticsPDFs',
'Scholar',
'StateOfTheUnion',
'TheAgora',
'TrueFilm',
'TrueReddit',
'UniversityofReddit',
'culturalstudies',
'hardscience',
'indepthsports',
@@ -44,7 +51,4 @@ SUBREDDITS = [
'neurophilosophy',
'resilientcommunities',
'worldevents',
'StallmanWasRight',
'EverythingScience',
'longevity',
]

View File

@@ -8,17 +8,8 @@ import string
from bleach.sanitizer import Cleaner
def alert_tanner(message):
try:
logging.info('Alerting Tanner: ' + message)
params = dict(qotnews=message)
requests.get('https://tbot.tannercollin.com/message', params=params, timeout=4)
except BaseException as e:
logging.error('Problem alerting Tanner: ' + str(e))
NUM_ID_CHARS = 4
def gen_rand_id():
return ''.join(random.choice(string.ascii_uppercase) for _ in range(NUM_ID_CHARS))
return ''.join(random.choice(string.ascii_uppercase) for _ in range(4))
def render_md(md):
if md:

View File

@@ -1,53 +1,14 @@
const port = 33843;
const express = require('express');
const app = express();
const port = 33843;
const request = require('request');
const JSDOM = require('jsdom').JSDOM;
const { Readability } = require('readability');
const simple = require('./simple');
app.use(express.urlencoded({ extended: true }));
app.get('/', (req, res) => {
res.send('<form method="POST" accept-charset="UTF-8"><input name="url"><button type="submit">SUBMIT</button></form>');
});
const requestCallback = (url, res) => (error, response, body) => {
if (!error && response.statusCode == 200) {
console.log('Response OK.');
const doc = new JSDOM(body, {url: url});
const reader = new Readability(doc.window.document);
const article = reader.parse();
if (article && article.content) {
res.send(article.content);
} else {
res.sendStatus(404);
}
} else {
console.log('Response error:', error ? error.toString() : response.statusCode);
res.sendStatus(response ? response.statusCode : 404);
}
};
app.post('/', (req, res) => {
const url = req.body.url;
const requestOptions = {
url: url,
gzip: true,
//headers: {'User-Agent': 'Googlebot/2.1 (+http://www.google.com/bot.html)'},
//headers: {'User-Agent': 'Twitterbot/1.0'},
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0',
'X-Forwarded-For': '66.249.66.1',
},
};
console.log('Parse request for:', url);
request(requestOptions, requestCallback(url, res));
});
app.get('/', (req, res) => res.send(simple.FORM));
app.post('/', (req, res) => simple.scrape(req, res));
app.post('/details', (req, res) => simple.details(req, res));
// app.post('/browser', (req, res) => browser.scrape(req, res));
// app.post('/browser/details', (req, res) => browser.details(req, res));
app.listen(port, () => {
console.log(`Example app listening on port ${port}!`);

43
readerserver/simple.js Normal file
View File

@@ -0,0 +1,43 @@
const request = require('request');
const JSDOM = require('jsdom').JSDOM;
const { Readability } = require('readability');
const options = url => ({
url: url,
headers: {
'User-Agent': 'Googlebot/2.1 (+http://www.google.com/bot.html)',
'X-Forwarded-For': '66.249.66.1',
},
});
const extract = (url, body) => {
const doc = new JSDOM(body, { url: url });
const reader = new Readability(doc.window.document);
return reader.parse();
};
module.exports.FORM = '<form method="POST" action="/" accept-charset="UTF-8"><input name="url"><button type="submit">SUBMIT</button></form>';
module.exports.scrape = (req, res) => request(options(req.body.url), (error, response, body) => {
if (error || response.statusCode != 200) {
console.log('Response error:', error ? error.toString() : response.statusCode);
return res.sendStatus(response ? response.statusCode : 404);
}
const article = extract(url, body);
if (article && article.content) {
return res.send(article.content);
}
return res.sendStatus(404);
});
module.exports.details = (req, res) => request(options(req.body.url), (error, response, body) => {
if (error || response.statusCode != 200) {
console.log('Response error:', error ? error.toString() : response.statusCode);
return res.sendStatus(response ? response.statusCode : 404);
}
const article = extract(url, body);
if (article) {
return res.send(article);
}
return res.sendStatus(404);
});

File diff suppressed because it is too large Load Diff

View File

@@ -3,7 +3,7 @@
Download MeiliSearch with:
```
wget https://github.com/meilisearch/meilisearch/releases/download/v0.27.0/meilisearch-linux-amd64
wget https://github.com/meilisearch/MeiliSearch/releases/download/v0.11.1/meilisearch-linux-amd64
chmod +x meilisearch-linux-amd64
```

View File

Before

Width:  |  Height:  |  Size: 538 B

After

Width:  |  Height:  |  Size: 538 B

View File

Before

Width:  |  Height:  |  Size: 6.5 KiB

After

Width:  |  Height:  |  Size: 6.5 KiB

View File

Before

Width:  |  Height:  |  Size: 5.4 KiB

After

Width:  |  Height:  |  Size: 5.4 KiB

View File

Before

Width:  |  Height:  |  Size: 500 B

After

Width:  |  Height:  |  Size: 500 B

View File

@@ -4,14 +4,12 @@
"private": true,
"dependencies": {
"abort-controller": "^3.0.0",
"katex": "^0.16.25",
"localforage": "^1.7.3",
"moment": "^2.24.0",
"query-string": "^6.8.3",
"react": "^16.9.0",
"react-dom": "^16.9.0",
"react-helmet": "^5.2.1",
"react-latex-next": "^3.0.0",
"react-router-dom": "^5.0.1",
"react-router-hash-link": "^1.2.2",
"react-scripts": "3.1.1"

View File

@@ -8,8 +8,6 @@
content="{{ description }}"
/>
<meta content="{{ url }}" name="og:site_name">
<meta name="robots" content="{{ robots }}">
<link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
<link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">
@@ -28,112 +26,26 @@
work correctly both with client-side routing and a non-root public URL.
Learn how to configure a non-root public URL by running `npm run build`.
-->
<title>{{ title }}</title>
<title>{{ title }} - QotNews</title>
<style>
html {
overflow-y: scroll;
}
body {
background: #eeeeee;
background: #000;
}
.nojs {
color: white;
}
</style>
</head>
<body>
<div id="root">
<div class="container menu">
<p>
<a href="/">QotNews</a>
<br />
<span class="slogan">Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode.</span>
</p>
</div>
{% if story %}
<div class="{% if show_comments %}container{% else %}article-container{% endif %}">
<div class="article">
<h1>{{ story.title }}</h1>
{% if show_comments %}
<div class="info">
<a href="/{{ story.id }}">View article</a>
</div>
{% else %}
<div class="info">
Source: <a class="source" href="{{ story.url or story.link }}">{{ url }}</a>
</div>
{% endif %}
<div class="info">
{{ story.score }} points
by <a href="{{ story.author_link }}">{{ story.author }}</a>
{{ story.date | fromnow }}
on <a href="{{ story.link }}">{{ story.source }}</a> |
<a href="/{{ story.id }}/c">
{{ story.num_comments }} comment{{ 's' if story.num_comments != 1 }}
</a>
</div>
{% if not show_comments and story.text %}
<div class="story-text">{{ story.text | safe }}</div>
{% elif show_comments %}
{% macro render_comment(comment, level) %}
<dt></dt>
<dd class="comment{% if level > 0 %} lined{% endif %}">
<div class="info">
<p>
{% if comment.author == story.author %}[OP] {% endif %}{{ comment.author or '[Deleted]' }} | <a href="#{{ comment.author }}{{ comment.date }}" id="{{ comment.author }}{{ comment.date }}">{{ comment.date | fromnow }}</a>
</p>
</div>
<div class="text">{{ (comment.text | safe) if comment.text else '<p>[Empty / deleted comment]</p>' }}</div>
{% if comment.comments %}
<dl>
{% for reply in comment.comments %}
{{ render_comment(reply, level + 1) }}
{% endfor %}
</dl>
{% endif %}
</dd>
{% endmacro %}
<dl class="comments">
{% for comment in story.comments %}{{ render_comment(comment, 0) }}{% endfor %}
</dl>
{% endif %}
</div>
<div class='dot toggleDot'>
<div class='button'>
<a href="/{{ story.id }}{{ '/c' if not show_comments else '' }}">
{{ '' if not show_comments else '' }}
</a>
</div>
</div>
</div>
{% elif stories %}
<div class="container">
{% for story in stories %}
<div class='item'>
<div class='title'>
<a class='link' href='/{{ story.id }}'>
<img class='source-logo' src='/logos/{{ story.source }}.png' alt='{{ story.source }}:' /> {{ story.title }}
</a>
<span class='source'>
(<a class='source' href='{{ story.url or story.link }}'>{{ story.hostname }}</a>)
</span>
</div>
<div class='info'>
{{ story.score }} points
by <a href="{{ story.author_link }}">{{ story.author }}</a>
{{ story.date | fromnow }}
on <a href="{{ story.link }}">{{ story.source }}</a> |
<a class="{{ 'hot' if story.num_comments > 99 else '' }}" href="/{{ story.id }}/c">
{{ story.num_comments }} comment{{ 's' if story.num_comments != 1 }}
</a>
</div>
</div>
{% endfor %}
</div>
{% endif %}
<div class="nojs">
<noscript>You need to enable JavaScript to run this app.</noscript>
</div>
<div id="root"></div>
<!--
This HTML file is a template.
If you open it directly in the browser, you will see an empty page.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 981 B

View File

@@ -1,12 +1,10 @@
import React, { useState, useEffect, useRef, useCallback } from 'react';
import React from 'react';
import { BrowserRouter as Router, Route, Link, Switch } from 'react-router-dom';
import localForage from 'localforage';
import './Style-light.css';
import './Style-dark.css';
import './Style-black.css';
import './Style-red.css';
import './fonts/Fonts.css';
import { BackwardDot, ForwardDot } from './utils.js';
import { ForwardDot } from './utils.js';
import Feed from './Feed.js';
import Article from './Article.js';
import Comments from './Comments.js';
@@ -15,115 +13,72 @@ import Submit from './Submit.js';
import Results from './Results.js';
import ScrollToTop from './ScrollToTop.js';
function App() {
const [theme, setTheme] = useState(localStorage.getItem('theme') || '');
const cache = useRef({});
const [isFullScreen, setIsFullScreen] = useState(!!document.fullscreenElement);
class App extends React.Component {
constructor(props) {
super(props);
const updateCache = useCallback((key, value) => {
cache.current[key] = value;
}, []);
this.state = {
theme: localStorage.getItem('theme') || '',
};
const light = () => {
setTheme('');
this.cache = {};
}
updateCache = (key, value) => {
this.cache[key] = value;
}
light() {
this.setState({ theme: '' });
localStorage.setItem('theme', '');
};
}
const dark = () => {
setTheme('dark');
dark() {
this.setState({ theme: 'dark' });
localStorage.setItem('theme', 'dark');
};
}
const black = () => {
setTheme('black');
localStorage.setItem('theme', 'black');
};
const red = () => {
setTheme('red');
localStorage.setItem('theme', 'red');
};
useEffect(() => {
if (Object.keys(cache.current).length === 0) {
componentDidMount() {
if (!this.cache.length) {
localForage.iterate((value, key) => {
updateCache(key, value);
}).then(() => {
console.log('loaded cache from localforage');
this.updateCache(key, value);
});
console.log('loaded cache from localforage');
}
}, [updateCache]);
}
const goFullScreen = () => {
if ('wakeLock' in navigator) {
navigator.wakeLock.request('screen');
}
document.body.requestFullscreen({ navigationUI: 'hide' });
};
render() {
const theme = this.state.theme;
document.body.style.backgroundColor = theme === 'dark' ? '#000' : '#eeeeee';
const exitFullScreen = () => {
document.exitFullscreen();
};
return (
<div className={theme}>
<Router>
<div className='container menu'>
<p>
<Link to='/'>QotNews - Feed</Link>
<span className='theme'>Theme: <a href='#' onClick={() => this.light()}>Light</a> - <a href='#' onClick={() => this.dark()}>Dark</a></span>
<br />
<span className='slogan'>Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode.</span>
</p>
<Route path='/(|search)' component={Search} />
<Route path='/(|search)' component={Submit} />
</div>
useEffect(() => {
const onFullScreenChange = () => setIsFullScreen(!!document.fullscreenElement);
document.addEventListener('fullscreenchange', onFullScreenChange);
return () => document.removeEventListener('fullscreenchange', onFullScreenChange);
}, []);
<Route path='/' exact render={(props) => <Feed {...props} updateCache={this.updateCache} />} />
<Switch>
<Route path='/search' component={Results} />
<Route path='/:id' exact render={(props) => <Article {...props} cache={this.cache} />} />
</Switch>
<Route path='/:id/c' exact render={(props) => <Comments {...props} cache={this.cache} />} />
useEffect(() => {
if (theme === 'dark') {
document.body.style.backgroundColor = '#1a1a1a';
} else if (theme === 'black') {
document.body.style.backgroundColor = '#000';
} else if (theme === 'red') {
document.body.style.backgroundColor = '#000';
} else {
document.body.style.backgroundColor = '#eeeeee';
}
}, [theme]);
<ForwardDot />
const fullScreenAvailable = document.fullscreenEnabled ||
document.mozFullscreenEnabled ||
document.webkitFullscreenEnabled ||
document.msFullscreenEnabled;
return (
<div className={theme}>
<Router>
<div className='container menu'>
<p>
<Link to='/'>QotNews</Link>
<span className='theme'><a href='#' onClick={() => light()}>Light</a> - <a href='#' onClick={() => dark()}>Dark</a> - <a href='#' onClick={() => black()}>Black</a> - <a href='#' onClick={() => red()}>Red</a></span>
<br />
<span className='slogan'>Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode.</span>
</p>
{fullScreenAvailable &&
<Route path='/(|search)' render={() => !isFullScreen ?
<button className='fullscreen' onClick={() => goFullScreen()}>Enter Fullscreen</button>
:
<button className='fullscreen' onClick={() => exitFullScreen()}>Exit Fullscreen</button>
} />
}
<Route path='/(|search)' component={Search} />
<Route path='/(|search)' component={Submit} />
</div>
<Route path='/' exact render={(props) => <Feed {...props} updateCache={updateCache} />} />
<Switch>
<Route path='/search' component={Results} />
<Route path='/:id' exact render={(props) => <Article {...props} cache={cache.current} />} />
</Switch>
<Route path='/:id/c' exact render={(props) => <Comments {...props} cache={cache.current} />} />
<BackwardDot />
<ForwardDot />
<ScrollToTop />
</Router>
</div>
);
<ScrollToTop />
</Router>
</div>
);
}
}
export default App;

View File

@@ -1,228 +1,112 @@
import React, { useState, useEffect } from 'react';
import { useParams } from 'react-router-dom';
import React from 'react';
import { Helmet } from 'react-helmet';
import localForage from 'localforage';
import { sourceLink, infoLine, ToggleDot } from './utils.js';
import Latex from 'react-latex-next';
import 'katex/dist/katex.min.css';
const VOID_ELEMENTS = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr'];
const DANGEROUS_TAGS = ['svg', 'math'];
class Article extends React.Component {
constructor(props) {
super(props);
const latexDelimiters = [
{ left: '$$', right: '$$', display: true },
{ left: '\\[', right: '\\]', display: true },
{ left: '\\(', right: '\\)', display: false }
];
const id = this.props.match ? this.props.match.params.id : 'CLOL';
const cache = this.props.cache;
function Article({ cache }) {
const { id } = useParams();
if (id in cache) console.log('cache hit');
if (id in cache) console.log('cache hit');
this.state = {
story: cache[id] || false,
error: false,
pConv: [],
};
}
componentDidMount() {
const id = this.props.match ? this.props.match.params.id : 'CLOL';
const [story, setStory] = useState(cache[id] || false);
const [error, setError] = useState('');
const [pConv, setPConv] = useState([]);
const [copyButtonText, setCopyButtonText] = useState('\ue92c');
useEffect(() => {
localForage.getItem(id)
.then(
(value) => {
if (value) {
setStory(value);
this.setState({ story: value });
}
}
);
fetch('/api/' + id)
.then(res => {
if (!res.ok) {
throw new Error(`Server responded with ${res.status} ${res.statusText}`);
}
return res.json();
})
.then(res => res.json())
.then(
(result) => {
setStory(result.story);
this.setState({ story: result.story });
localForage.setItem(id, result.story);
},
(error) => {
const errorMessage = `Failed to fetch new article content (ID: ${id}). Your connection may be down or the server might be experiencing issues. ${error.toString()}.`;
setError(errorMessage);
this.setState({ error: true });
}
);
}, [id]);
}
const copyLink = () => {
navigator.clipboard.writeText(`${story.title}:\n${window.location.href}`).then(() => {
setCopyButtonText('\uea10');
setTimeout(() => setCopyButtonText('\ue92c'), 2000);
}, () => {
setCopyButtonText('\uea0f');
setTimeout(() => setCopyButtonText('\ue92c'), 2000);
});
};
pConvert = (n) => {
this.setState({ pConv: [...this.state.pConv, n]});
}
const pConvert = (n) => {
setPConv(prevPConv => [...prevPConv, n]);
};
render() {
const id = this.props.match ? this.props.match.params.id : 'CLOL';
const story = this.state.story;
const error = this.state.error;
const pConv = this.state.pConv;
let nodes = null;
const isCodeBlock = (v) => {
if (v.localName === 'pre') {
return true;
}
if (v.localName === 'code') {
if (v.closest('p')) {
return false;
}
const parent = v.parentElement;
if (parent) {
const nonWhitespaceChildren = Array.from(parent.childNodes).filter(n => {
return n.nodeType !== Node.TEXT_NODE || n.textContent.trim() !== '';
});
if (nonWhitespaceChildren.length === 1 && nonWhitespaceChildren[0] === v) {
return true;
}
}
}
return false;
};
const renderNodes = (nodes, keyPrefix = '') => {
return Array.from(nodes).map((v, k) => {
const key = `${keyPrefix}${k}`;
if (pConv.includes(key)) {
return (
<React.Fragment key={key}>
{v.textContent.split('\n\n').map((x, i) =>
<p key={i}>{x}</p>
)}
</React.Fragment>
);
}
if (v.nodeName === '#text') {
const text = v.data;
if (text.includes('\\[') || text.includes('\\(') || text.includes('$$')) {
return <Latex key={key} delimiters={latexDelimiters}>{text}</Latex>;
}
// Only wrap top-level text nodes in <p>
if (keyPrefix === '' && v.data.trim() !== '') {
return <p key={key}>{v.data}</p>;
}
return v.data;
}
if (v.nodeType !== Node.ELEMENT_NODE) {
return null;
}
if (DANGEROUS_TAGS.includes(v.localName)) {
return <span key={key} dangerouslySetInnerHTML={{ __html: v.outerHTML }} />;
}
const Tag = v.localName;
if (isCodeBlock(v)) {
return (
<React.Fragment key={key}>
<Tag dangerouslySetInnerHTML={{ __html: v.innerHTML }} />
<button onClick={() => pConvert(key)}>Convert Code to Paragraph</button>
</React.Fragment>
);
}
const textContent = v.textContent.trim();
const isMath = (textContent.startsWith('\\(') && textContent.endsWith('\\)')) ||
(textContent.startsWith('\\[') && textContent.endsWith('\\]')) ||
(textContent.startsWith('$$') && textContent.endsWith('$$'));
const props = { key: key };
if (v.hasAttributes()) {
for (const attr of v.attributes) {
const name = attr.name === 'class' ? 'className' : attr.name;
props[name] = attr.value;
}
}
if (isMath) {
let mathContent = v.textContent;
// align environment requires display math mode
if (mathContent.includes('\\begin{align')) {
const trimmed = mathContent.trim();
if (trimmed.startsWith('\\(')) {
// Replace \( and \) with \[ and \] to switch to display mode
const firstParen = mathContent.indexOf('\\(');
const lastParen = mathContent.lastIndexOf('\\)');
mathContent = mathContent.substring(0, firstParen) + '\\[' + mathContent.substring(firstParen + 2, lastParen) + '\\]' + mathContent.substring(lastParen + 2);
}
}
return <Tag {...props}><Latex delimiters={latexDelimiters}>{mathContent}</Latex></Tag>;
}
if (VOID_ELEMENTS.includes(Tag)) {
return <Tag {...props} />;
}
return (
<Tag {...props}>
{renderNodes(v.childNodes, `${key}-`)}
</Tag>
);
});
};
const nodes = (s) => {
if (s && s.text) {
if (story.text) {
let div = document.createElement('div');
div.innerHTML = s.text;
return div.childNodes;
div.innerHTML = story.text;
nodes = div.childNodes;
}
return null;
};
const storyNodes = nodes(story);
return (
<div className='article-container'>
{error && <p>Connection error?</p>}
{story ?
<div className='article'>
<Helmet>
<title>{story.title} - QotNews</title>
</Helmet>
return (
<div className='article-container'>
{error &&
<details style={{marginBottom: '1rem'}}>
<summary>Connection error? Click to expand.</summary>
<p>{error}</p>
{story && <p>Loaded article from cache.</p>}
</details>
}
{story ?
<div className='article'>
<Helmet>
<title>{story.title} | QotNews</title>
<meta name="robots" content="noindex" />
</Helmet>
<h1>{story.title}</h1>
<h1>{story.title} <button className='copy-button' onClick={copyLink}>{copyButtonText}</button></h1>
<div className='info'>
Source: {sourceLink(story)}
</div>
{infoLine(story)}
{storyNodes ?
<div className='story-text'>
{renderNodes(storyNodes)}
<div className='info'>
Source: {sourceLink(story)}
</div>
:
<p>Problem getting article :(</p>
}
</div>
:
<p>Loading...</p>
}
<ToggleDot id={id} article={false} />
</div>
);
{infoLine(story)}
{nodes ?
<div className='story-text'>
{Object.entries(nodes).map(([k, v]) =>
pConv.includes(k) ?
v.innerHTML.split('\n\n').map(x =>
<p dangerouslySetInnerHTML={{ __html: x }} />
)
:
(v.nodeName === '#text' ?
<p>{v.data}</p>
:
<>
<v.localName dangerouslySetInnerHTML={v.innerHTML ? { __html: v.innerHTML } : null} />
{v.localName == 'pre' && <button onClick={() => this.pConvert(k)}>Convert Code to Paragraph</button>}
</>
)
)}
</div>
:
<p>Problem getting article :(</p>
}
</div>
:
<p>loading...</p>
}
<ToggleDot id={id} article={false} />
</div>
);
}
}
export default Article;

View File

@@ -1,80 +1,83 @@
import React, { useState, useEffect } from 'react';
import { Link, useParams } from 'react-router-dom';
import React from 'react';
import { Link } from 'react-router-dom';
import { HashLink } from 'react-router-hash-link';
import { Helmet } from 'react-helmet';
import moment from 'moment';
import localForage from 'localforage';
import { infoLine, ToggleDot } from './utils.js';
function countComments(c) {
return c.comments.reduce((sum, x) => sum + countComments(x), 1);
}
class Article extends React.Component {
constructor(props) {
super(props);
function Comments({ cache }) {
const { id } = useParams();
const id = this.props.match.params.id;
const cache = this.props.cache;
if (id in cache) console.log('cache hit');
if (id in cache) console.log('cache hit');
const [story, setStory] = useState(cache[id] || false);
const [error, setError] = useState('');
const [collapsed, setCollapsed] = useState([]);
const [expanded, setExpanded] = useState([]);
this.state = {
story: cache[id] || false,
error: false,
collapsed: [],
expanded: [],
};
}
componentDidMount() {
const id = this.props.match.params.id;
useEffect(() => {
localForage.getItem(id)
.then(
(value) => {
if (value) {
setStory(value);
}
this.setState({ story: value });
}
);
fetch('/api/' + id)
.then(res => {
if (!res.ok) {
throw new Error(`Server responded with ${res.status} ${res.statusText}`);
}
return res.json();
})
.then(res => res.json())
.then(
(result) => {
setStory(result.story);
this.setState({ story: result.story }, () => {
const hash = window.location.hash.substring(1);
if (hash) {
document.getElementById(hash).scrollIntoView();
}
});
localForage.setItem(id, result.story);
const hash = window.location.hash.substring(1);
if (hash) {
setTimeout(() => {
const element = document.getElementById(hash);
if (element) {
element.scrollIntoView();
}
}, 0);
}
},
(error) => {
const errorMessage = `Failed to fetch comments (ID: ${id}). Your connection may be down or the server might be experiencing issues. ${error.toString()}.`;
setError(errorMessage);
this.setState({ error: true });
}
);
}, [id]);
}
const collapseComment = (cid) => {
setCollapsed(prev => [...prev, cid]);
setExpanded(prev => prev.filter(x => x !== cid));
};
collapseComment(cid) {
this.setState(prevState => ({
...prevState,
collapsed: [...prevState.collapsed, cid],
expanded: prevState.expanded.filter(x => x !== cid),
}));
}
const expandComment = (cid) => {
setCollapsed(prev => prev.filter(x => x !== cid));
setExpanded(prev => [...prev, cid]);
};
expandComment(cid) {
this.setState(prevState => ({
...prevState,
collapsed: prevState.collapsed.filter(x => x !== cid),
expanded: [...prevState.expanded, cid],
}));
}
const displayComment = (story, c, level) => {
const cid = c.author+c.date;
countComments(c) {
return c.comments.reduce((sum, x) => sum + this.countComments(x), 1);
}
const isCollapsed = collapsed.includes(cid);
const isExpanded = expanded.includes(cid);
displayComment(story, c, level) {
const cid = c.author + c.date;
const hidden = isCollapsed || (level == 4 && !isExpanded);
const collapsed = this.state.collapsed.includes(cid);
const expanded = this.state.expanded.includes(cid);
const hidden = collapsed || (level == 4 && !expanded);
const hasChildren = c.comments.length !== 0;
return (
@@ -82,59 +85,61 @@ function Comments({ cache }) {
<div className='info'>
<p>
{c.author === story.author ? '[OP]' : ''} {c.author || '[Deleted]'}
{' '} | <HashLink to={'#'+cid} id={cid}>{moment.unix(c.date).fromNow()}</HashLink>
{' '} | <HashLink to={'#' + cid} id={cid}>{moment.unix(c.date).fromNow()}</HashLink>
{hidden || hasChildren &&
<button className='collapser pointer' onClick={() => collapseComment(cid)}></button>
}
{hasChildren && (
hidden ?
<span className='collapser expander pointer' onClick={() => this.expandComment(cid)}>+</span>
:
<span className='collapser pointer' onClick={() => this.collapseComment(cid)}></span>
)}
</p>
</div>
<div className={isCollapsed ? 'text hidden' : 'text'} dangerouslySetInnerHTML={{ __html: c.text || '<p>[Empty / deleted comment]</p>'}} />
<div className={collapsed ? 'text hidden' : 'text'} dangerouslySetInnerHTML={{ __html: c.text }} />
{hidden && hasChildren ?
<button className='comment lined info pointer' onClick={() => expandComment(cid)}>[show {countComments(c)-1} more]</button>
:
c.comments.map(i => displayComment(story, i, level + 1))
<div className='comment lined info pointer' onClick={() => this.expandComment(cid)}>[show {this.countComments(c) - 1} more]</div>
:
c.comments.map(i => this.displayComment(story, i, level + 1))
}
</div>
);
};
}
return (
<div className='container'>
{error &&
<details style={{marginBottom: '1rem'}}>
<summary>Connection error? Click to expand.</summary>
<p>{error}</p>
{story && <p>Loaded comments from cache.</p>}
</details>
}
{story ?
<div className='article'>
<Helmet>
<title>{story.title} | QotNews</title>
<meta name="robots" content="noindex" />
</Helmet>
render() {
const id = this.props.match.params.id;
const story = this.state.story;
const error = this.state.error;
<h1>{story.title}</h1>
return (
<div className='container'>
{error && <p>Connection error?</p>}
{story ?
<div className='article'>
<Helmet>
<title>{story.title} - QotNews Comments</title>
</Helmet>
<div className='info'>
<Link to={'/' + story.id}>View article</Link>
<h1>{story.title}</h1>
<div className='info'>
<Link to={'/' + story.id}>View article</Link>
</div>
{infoLine(story)}
<div className='comments'>
{story.comments.map(c => this.displayComment(story, c, 0))}
</div>
</div>
{infoLine(story)}
<div className='comments'>
{story.comments.map(c => displayComment(story, c, 0))}
</div>
</div>
:
<p>loading...</p>
}
<ToggleDot id={id} article={true} />
</div>
);
:
<p>loading...</p>
}
<ToggleDot id={id} article={true} />
</div>
);
}
}
export default Comments;
export default Article;

View File

@@ -1,159 +1,85 @@
import React, { useState, useEffect } from 'react';
import React from 'react';
import { Link } from 'react-router-dom';
import { Helmet } from 'react-helmet';
import localForage from 'localforage';
import { sourceLink, infoLine, logos } from './utils.js';
function Feed({ updateCache }) {
const [stories, setStories] = useState(() => JSON.parse(localStorage.getItem('stories')) || false);
const [error, setError] = useState('');
const [loadingStatus, setLoadingStatus] = useState(null);
const [filterSmallweb, setFilterSmallweb] = useState(() => localStorage.getItem('filterSmallweb') === 'true');
class Feed extends React.Component {
constructor(props) {
super(props);
const handleFilterChange = e => {
const isChecked = e.target.checked;
setStories(false);
setFilterSmallweb(isChecked);
localStorage.setItem('filterSmallweb', isChecked);
};
this.state = {
stories: JSON.parse(localStorage.getItem('stories')) || false,
error: false,
};
}
useEffect(() => {
const controller = new AbortController();
fetch(filterSmallweb ? '/api?smallweb=true' : '/api', { signal: controller.signal })
.then(res => {
if (!res.ok) {
throw new Error(`Server responded with ${res.status} ${res.statusText}`);
}
return res.json();
})
componentDidMount() {
fetch('/api')
.then(res => res.json())
.then(
async (result) => {
const newApiStories = result.stories;
(result) => {
const updated = !this.state.stories || this.state.stories[0].id !== result.stories[0].id;
console.log('updated:', updated);
const updated = !stories || !stories.length || stories[0].id !== newApiStories[0].id;
console.log('New stories available:', updated);
const { stories } = result;
this.setState({ stories });
localStorage.setItem('stories', JSON.stringify(stories));
if (!updated) return;
setLoadingStatus({ current: 0, total: newApiStories.length });
let currentStories = Array.isArray(stories) ? [...stories] : [];
let preloadedCount = 0;
for (const [index, newStory] of newApiStories.entries()) {
if (controller.signal.aborted) {
break;
}
try {
const storyFetchController = new AbortController();
const timeoutId = setTimeout(() => storyFetchController.abort(), 10000); // 10-second timeout
const storyRes = await fetch('/api/' + newStory.id, { signal: storyFetchController.signal });
clearTimeout(timeoutId);
if (!storyRes.ok) {
throw new Error(`Server responded with ${storyRes.status} ${storyRes.statusText}`);
}
const storyResult = await storyRes.json();
const fullStory = storyResult.story;
await localForage.setItem(fullStory.id, fullStory);
console.log('Preloaded story:', fullStory.id, fullStory.title);
updateCache(fullStory.id, fullStory);
preloadedCount++;
setLoadingStatus({ current: preloadedCount, total: newApiStories.length });
const existingStoryIndex = currentStories.findIndex(s => s.id === newStory.id);
if (existingStoryIndex > -1) {
currentStories.splice(existingStoryIndex, 1);
}
currentStories.splice(index, 0, newStory);
localStorage.setItem('stories', JSON.stringify(currentStories));
setStories(currentStories);
} catch (error) {
let errorMessage;
if (error.name === 'AbortError') {
errorMessage = `The request to fetch story '${newStory.title}' (${newStory.id}) timed out after 10 seconds. Your connection may be unstable. (${preloadedCount} / ${newApiStories.length} stories preloaded)`;
console.log('Fetch timed out for story:', newStory.id);
} else {
errorMessage = `An error occurred while fetching story '${newStory.title}' (ID: ${newStory.id}): ${error.toString()}. (${preloadedCount} / ${newApiStories.length} stories preloaded)`;
console.log('Fetch failed for story:', newStory.id, error);
}
setError(errorMessage);
break;
}
if (updated) {
localForage.clear();
stories.forEach((x, i) => {
fetch('/api/' + x.id)
.then(res => res.json())
.then(({ story }) => {
localForage.setItem(x.id, story)
.then(console.log('preloaded', x.id, x.title));
this.props.updateCache(x.id, story);
}, error => { }
);
});
}
const finalStories = currentStories.slice(0, newApiStories.length);
const removedStories = currentStories.slice(newApiStories.length);
for (const story of removedStories) {
console.log('Removed story:', story.id, story.title);
localForage.removeItem(story.id);
}
localStorage.setItem('stories', JSON.stringify(finalStories));
setStories(finalStories);
setLoadingStatus(null);
},
(error) => {
if (error.name === 'AbortError') {
console.log('Feed fetch aborted.');
return;
}
const errorMessage = `Failed to fetch the main story list from the API. Your connection may be down or the server might be experiencing issues. ${error.toString()}.`;
setError(errorMessage);
this.setState({ error: true });
}
);
}
return () => controller.abort();
}, [updateCache, filterSmallweb]);
render() {
const stories = this.state.stories;
const error = this.state.error;
return (
<div className='container'>
<Helmet>
<title>QotNews</title>
<meta name="robots" content="index" />
</Helmet>
return (
<div className='container'>
<Helmet>
<title>Feed - QotNews</title>
</Helmet>
{error && <p>Connection error?</p>}
{stories ?
<div>
{stories.map(x =>
<div className='item' key={x.id}>
<div className='title'>
<Link className='link' to={'/' + x.id}>
<img className='source-logo' src={logos[x.source] || logos[x.source.split(' ')[0]]} alt='source logo' /> {x.title}
</Link>
<div style={{marginBottom: '1rem'}}>
<input type="checkbox" id="filter-smallweb" className="checkbox" checked={filterSmallweb} onChange={handleFilterChange} />
<label htmlFor="filter-smallweb">Only Smallweb</label>
</div>
<span className='source'>
({sourceLink(x)})
</span>
</div>
{error &&
<details style={{marginBottom: '1rem'}}>
<summary>Connection error? Click to expand.</summary>
<p>{error}</p>
{stories && <p>Loaded feed from cache.</p>}
</details>
}
{stories ?
<div>
{stories.map(x =>
<div className='item' key={x.id}>
<div className='title'>
<Link className='link' to={'/' + x.id}>
<img className='source-logo' src={logos[x.source]} alt='source logo' /> {x.title}
</Link>
<span className='source'>
({sourceLink(x)})
</span>
{infoLine(x)}
</div>
{infoLine(x)}
</div>
)}
</div>
:
<p>Loading...</p>
}
{loadingStatus && <p>Preloading stories {loadingStatus.current} / {loadingStatus.total}...</p>}
</div>
);
)}
</div>
:
<p>loading...</p>
}
</div>
);
}
}
export default Feed;

View File

@@ -1,73 +1,95 @@
import React, { useState, useEffect } from 'react';
import { Link, useLocation } from 'react-router-dom';
import React from 'react';
import { Link } from 'react-router-dom';
import { Helmet } from 'react-helmet';
import { sourceLink, infoLine, logos } from './utils.js';
import AbortController from 'abort-controller';
function Results() {
const [stories, setStories] = useState(false);
const [error, setError] = useState(false);
const location = useLocation();
class Results extends React.Component {
constructor(props) {
super(props);
useEffect(() => {
const controller = new AbortController();
const signal = controller.signal;
this.state = {
stories: false,
error: false,
};
const search = location.search;
this.controller = null;
}
performSearch = () => {
if (this.controller) {
this.controller.abort();
}
this.controller = new AbortController();
const signal = this.controller.signal;
const search = this.props.location.search;
fetch('/api/search' + search, { method: 'get', signal: signal })
.then(res => res.json())
.then(
(result) => {
setStories(result.hits);
this.setState({ stories: result.results });
},
(error) => {
if (error.message !== 'The operation was aborted. ') {
setError(true);
this.setState({ error: true });
}
}
);
}
return () => {
controller.abort();
};
}, [location.search]);
componentDidMount() {
this.performSearch();
}
return (
<div className='container'>
<Helmet>
<title>Search Results | QotNews</title>
</Helmet>
{error && <p>Connection error?</p>}
{stories ?
<>
<p>Search results:</p>
<div className='comment lined'>
{stories.length ?
stories.map(x =>
<div className='item' key={x.id}>
<div className='title'>
<Link className='link' to={'/' + x.id}>
<img className='source-logo' src={logos[x.source]} alt='source logo' /> {x.title}
</Link>
componentDidUpdate(prevProps) {
if (this.props.location.search !== prevProps.location.search) {
this.performSearch();
}
}
<span className='source'>
({sourceLink(x)})
</span>
render() {
const stories = this.state.stories;
const error = this.state.error;
return (
<div className='container'>
<Helmet>
<title>Feed - QotNews</title>
</Helmet>
{error && <p>Connection error?</p>}
{stories ?
<>
<p>Search results:</p>
<div className='comment lined'>
{stories.length ?
stories.map(x =>
<div className='item' key={x.id}>
<div className='title'>
<Link className='link' to={'/' + x.id}>
<img className='source-logo' src={logos[x.source]} alt='source logo' /> {x.title}
</Link>
<span className='source'>
({sourceLink(x)})
</span>
</div>
{infoLine(x)}
</div>
{infoLine(x)}
</div>
)
:
<p>none</p>
}
</div>
</>
:
<p>loading...</p>
}
</div>
);
)
:
<p>none</p>
}
</div>
</>
:
<p>loading...</p>
}
</div>
);
}
}
export default Results;

View File

@@ -15,7 +15,6 @@ class ScrollToTop extends React.Component {
}
window.scrollTo(0, 0);
document.body.scrollTop = 0;
}
render() {

View File

@@ -1,46 +1,51 @@
import React, { useState, useRef } from 'react';
import { useHistory, useLocation } from 'react-router-dom';
import React, { Component } from 'react';
import { withRouter } from 'react-router-dom';
import queryString from 'query-string';
const getSearch = location => queryString.parse(location.search).q || '';
const getSearch = props => queryString.parse(props.location.search).q;
function Search() {
const history = useHistory();
const location = useLocation();
class Search extends Component {
constructor(props) {
super(props);
const [search, setSearch] = useState(getSearch(location));
const inputRef = useRef(null);
this.state = {search: getSearch(this.props)};
this.inputRef = React.createRef();
}
const searchArticles = (event) => {
const newSearch = event.target.value;
setSearch(newSearch);
if (newSearch.length >= 3) {
const searchQuery = queryString.stringify({ 'q': newSearch });
history.replace('/search?' + searchQuery);
searchArticles = (event) => {
const search = event.target.value;
this.setState({search: search});
if (search.length >= 3) {
const searchQuery = queryString.stringify({ 'q': search });
this.props.history.replace('/search?' + searchQuery);
} else {
history.replace('/');
this.props.history.replace('/');
}
}
const searchAgain = (event) => {
searchAgain = (event) => {
event.preventDefault();
const searchString = queryString.stringify({ 'q': event.target[0].value });
history.push('/search?' + searchString);
inputRef.current.blur();
this.props.history.push('/search?' + searchString);
this.inputRef.current.blur();
}
return (
<span className='search'>
<form onSubmit={searchAgain}>
<input
placeholder='Search...'
value={search}
onChange={searchArticles}
ref={inputRef}
/>
</form>
</span>
);
render() {
const search = this.state.search;
return (
<span className='search'>
<form onSubmit={this.searchAgain}>
<input
placeholder='Search... (fixed)'
value={search}
onChange={this.searchArticles}
ref={this.inputRef}
/>
</form>
</span>
);
}
}
export default Search;
export default withRouter(Search);

View File

@@ -1,77 +0,0 @@
.black {
color: #ddd;
}
.black a {
color: #ddd;
}
.black input {
color: #ddd;
border: 1px solid #828282;
}
.black .menu button,
.black .story-text button {
background-color: #444444;
border-color: #bbb;
color: #ddd;
}
.black .item {
color: #828282;
}
.black .item .source-logo {
filter: grayscale(1);
}
.black .item a {
color: #828282;
}
.black .item a.link {
color: #ddd;
}
.black .item a.link:visited {
color: #828282;
}
.black .item .info a.hot {
color: #cccccc;
}
.black .article a {
border-bottom: 1px solid #aaaaaa;
}
.black .article u {
border-bottom: 1px solid #aaaaaa;
text-decoration: none;
}
.black .story-text video,
.black .story-text img {
filter: brightness(50%);
}
.black .article .info {
color: #828282;
}
.black .article .info a {
border-bottom: none;
color: #828282;
}
.black .comment.lined {
border-left: 1px solid #444444;
}
.black .checkbox:checked + label::after {
border-color: #ddd;
}
.black .copy-button {
color: #828282;
}

View File

@@ -11,17 +11,14 @@
border: 1px solid #828282;
}
.dark .menu button,
.dark .story-text button {
background-color: #444444;
border-color: #bbb;
color: #ddd;
}
.dark .item {
color: #828282;
}
.dark .item .source-logo {
filter: grayscale(1);
}
.dark .item a {
color: #828282;
}
@@ -46,7 +43,6 @@
text-decoration: none;
}
.dark .story-text video,
.dark .story-text img {
filter: brightness(50%);
}
@@ -63,11 +59,3 @@
.dark .comment.lined {
border-left: 1px solid #444444;
}
.dark .checkbox:checked + label::after {
border-color: #ddd;
}
.dark .copy-button {
color: #828282;
}

View File

@@ -2,30 +2,9 @@ body {
text-rendering: optimizeLegibility;
font: 1rem/1.3 sans-serif;
color: #000000;
margin-bottom: 100vh;
word-break: break-word;
font-kerning: normal;
margin: 0;
}
::backdrop {
background-color: rgba(0,0,0,0);
}
body:fullscreen {
overflow-y: scroll !important;
}
body:-ms-fullscreen {
overflow-y: scroll !important;
}
body:-webkit-full-screen {
overflow-y: scroll !important;
}
body:-moz-full-screen {
overflow-y: scroll !important;
}
#root {
margin: 8px 8px 100vh 8px !important;
}
a {
@@ -43,21 +22,10 @@ input {
border-radius: 4px;
}
.fullscreen {
margin: 0.25rem;
padding: 0.25rem;
}
pre {
overflow: auto;
}
.comments pre {
overflow: auto;
white-space: pre-wrap;
overflow-wrap: break-word;
}
.container {
margin: 1rem auto;
max-width: 64rem;
@@ -126,13 +94,6 @@ span.source {
border-bottom: 1px solid #222222;
}
.article-title {
display: flex;
align-items: center;
margin-top: 0.67em;
margin-bottom: 0.67em;
}
.article h1 {
font-size: 1.6rem;
}
@@ -189,13 +150,6 @@ span.source {
.comments {
margin-left: -1.25rem;
margin-top: 0;
margin-bottom: 0;
padding: 0;
}
.comments dl, .comments dd {
margin: 0;
}
.comment {
@@ -208,11 +162,6 @@ span.source {
.comment .text {
margin-top: -0.5rem;
margin-bottom: 1rem;
}
.comment .text > * {
margin-bottom: 0;
}
.comment .text.hidden > p {
@@ -232,49 +181,20 @@ span.source {
padding-right: 1.5rem;
}
button.collapser {
background: transparent;
border: none;
margin: 0;
padding-top: 0;
padding-bottom: 0;
font: inherit;
color: inherit;
}
button.comment {
background: transparent;
border-top: none;
border-right: none;
border-bottom: none;
margin: 0;
padding-top: 0;
padding-right: 0;
padding-bottom: 0;
font: inherit;
color: inherit;
text-align: left;
width: 100%;
}
.comment .pointer {
cursor: pointer;
}
.dot {
cursor: pointer;
.toggleDot {
position: fixed;
bottom: 1rem;
left: 1rem;
height: 3rem;
width: 3rem;
background-color: #828282;
border-radius: 50%;
}
.toggleDot {
bottom: 1rem;
left: 1rem;
}
.toggleDot .button {
font: 2rem/1 'icomoon';
position: relative;
@@ -283,79 +203,23 @@ button.comment {
}
.forwardDot {
cursor: pointer;
position: fixed;
bottom: 1rem;
right: 1rem;
height: 3rem;
width: 3rem;
background-color: #828282;
border-radius: 50%;
}
.forwardDot .button {
font: 2rem/1 'icomoon';
font: 2.5rem/1 'icomoon';
position: relative;
top: 0.5rem;
left: 0.5rem;
}
.backwardDot {
bottom: 1rem;
right: 5rem;
}
.backwardDot .button {
font: 2rem/1 'icomoon';
position: relative;
top: 0.5rem;
left: 0.5rem;
top: 0.25rem;
left: 0.3rem;
}
.search form {
display: inline;
}
.copy-button {
font: 1.5rem/1 'icomoon2';
color: #828282;
background: transparent;
border: none;
cursor: pointer;
vertical-align: middle;
}
.checkbox {
-webkit-appearance: none;
appearance: none;
position: absolute;
opacity: 0;
cursor: pointer;
height: 0;
width: 0;
}
.checkbox + label {
position: relative;
cursor: pointer;
padding-left: 1.75rem;
user-select: none;
}
.checkbox + label::before {
content: '';
position: absolute;
left: 0;
top: 0.1em;
width: 1rem;
height: 1rem;
border: 1px solid #828282;
background-color: transparent;
border-radius: 3px;
}
.checkbox:checked + label::after {
content: "";
position: absolute;
left: 0.35rem;
top: 0.2em;
width: 0.3rem;
height: 0.6rem;
border: solid #000;
border-width: 0 2px 2px 0;
transform: rotate(45deg);
}

View File

@@ -1,95 +0,0 @@
.red {
color: #b00;
scrollbar-color: #b00 #440000;
}
.red a {
color: #b00;
}
.red input {
color: #b00;
border: 1px solid #690000;
}
.red input::placeholder {
color: #690000;
}
.red hr {
background-color: #690000;
}
.red .menu button,
.red .story-text button {
background-color: #440000;
border-color: #b00;
color: #b00;
}
.red .item,
.red .slogan {
color: #690000;
}
.red .item .source-logo {
display: none;
}
.red .item a {
color: #690000;
}
.red .item a.link {
color: #b00;
}
.red .item a.link:visited {
color: #690000;
}
.red .item .info a.hot {
color: #cc0000;
}
.red .article a {
border-bottom: 1px solid #aa0000;
}
.red .article u {
border-bottom: 1px solid #aa0000;
text-decoration: none;
}
.red .story-text video,
.red .story-text img {
filter: grayscale(100%) brightness(20%) sepia(100%) hue-rotate(-50deg) saturate(600%) contrast(0.8);
}
.red .article .info {
color: #690000;
}
.red .article .info a {
border-bottom: none;
color: #690000;
}
.red .comment.lined {
border-left: 1px solid #440000;
}
.red .dot {
background-color: #440000;
}
.red .checkbox + label::before {
border: 1px solid #690000;
}
.red .checkbox:checked + label::after {
border-color: #aa0000;
}
.red .copy-button {
color: #690000;
}

View File

@@ -1,53 +1,54 @@
import React, { useState, useRef } from 'react';
import { useHistory } from 'react-router-dom';
import React, { Component } from 'react';
import { withRouter } from 'react-router-dom';
function Submit() {
const [progress, setProgress] = useState(null);
const inputRef = useRef(null);
const history = useHistory();
class Submit extends Component {
constructor(props) {
super(props);
const submitArticle = async (event) => {
this.state = {
progress: null,
};
this.inputRef = React.createRef();
}
submitArticle = (event) => {
event.preventDefault();
const url = event.target[0].value;
inputRef.current.blur();
this.inputRef.current.blur();
setProgress('Submitting...');
this.setState({ progress: 'Submitting...' });
let data = new FormData();
data.append('url', url);
try {
const res = await fetch('/api/submit', { method: 'POST', body: data });
if (res.ok) {
const result = await res.json();
history.replace('/' + result.nid);
} else {
let errorData;
try {
errorData = await res.json();
} catch (jsonError) {
// Not a JSON error from our API, so it's a server issue
throw new Error(`Server responded with ${res.status} ${res.statusText}`);
fetch('/api/submit', { method: 'POST', body: data })
.then(res => res.json())
.then(
(result) => {
this.props.history.replace('/' + result.nid);
},
(error) => {
this.setState({ progress: 'Error' });
}
setProgress(errorData.error || 'An unknown error occurred.');
}
} catch (error) {
setProgress(`Error: ${error.toString()}`);
}
);
}
return (
<span className='search'>
<form onSubmit={submitArticle}>
<input
placeholder='Submit URL'
ref={inputRef}
/>
</form>
{progress && <p>{progress}</p>}
</span>
);
render() {
const progress = this.state.progress;
return (
<span className='search'>
<form onSubmit={this.submitArticle}>
<input
placeholder='Submit Article'
ref={this.inputRef}
/>
</form>
{progress ? progress : ''}
</span>
);
}
}
export default Submit;
export default withRouter(Submit);

View File

@@ -26,8 +26,3 @@
font-family: 'Icomoon';
src: url('icomoon.ttf') format('truetype');
}
@font-face {
font-family: 'Icomoon2';
src: url('icomoon2.ttf') format('truetype');
}

Binary file not shown.

Binary file not shown.

View File

@@ -8,4 +8,4 @@ ReactDOM.render(<App />, document.getElementById('root'));
// If you want your app to work offline and load faster, you can change
// // unregister() to register() below. Note this comes with some pitfalls.
// // Learn more about service workers: https://bit.ly/CRA-PWA
serviceWorker.unregister();
serviceWorker.register();

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff