Compare commits

..

18 Commits

Author SHA1 Message Date
Jason Schwarzenberger
9f4ff4acf0 remove unnecessary sitemap.xml request. 2020-11-04 11:22:15 +13:00
Jason Schwarzenberger
db6aad84ec fix mistake. 2020-11-04 11:12:01 +13:00
Jason Schwarzenberger
29f8a8b8cc add news site categories feed. 2020-11-04 11:08:50 +13:00
Jason
abf8589e02 fix sitemap 2020-11-03 10:53:40 +00:00
Jason
b759f46582 use extruct for opengraph/json-ld/microdata of articles 2020-11-03 10:31:36 +00:00
Jason Schwarzenberger
736cdc8576 fix mistake. 2020-11-03 17:04:46 +13:00
Jason Schwarzenberger
244d416f6e settings config of sitemap/substack publications. 2020-11-03 17:01:29 +13:00
Jason Schwarzenberger
5f98a2e76a Merge remote-tracking branch 'tanner/master' into master
And adding relevant setings.py.example/etc.
2020-11-03 16:44:02 +13:00
Jason Schwarzenberger
0567cdfd9b move sort to render. 2020-11-03 16:30:22 +13:00
Jason Schwarzenberger
4f90671cec order feed by reverse chronological 2020-11-03 16:21:23 +13:00
Jason Schwarzenberger
e63a1456a5 add logos. 2020-11-03 16:07:07 +13:00
Jason Schwarzenberger
76f1d57702 sitemap based feed. 2020-11-03 16:00:03 +13:00
Jason Schwarzenberger
de80389ed0 add logos. 2020-11-03 12:48:19 +13:00
Jason Schwarzenberger
4e64cf682a add the bulletin. 2020-11-03 12:41:16 +13:00
Jason Schwarzenberger
c5fe5d25a0 add substack.py top sites, replacing webworm.py 2020-11-03 12:28:39 +13:00
Jason
283a2b1545 fix webworm comments 2020-11-02 22:06:43 +00:00
Jason Schwarzenberger
0d6a86ace2 fix webworm dates. 2020-11-03 10:31:14 +13:00
Jason Schwarzenberger
f23bf628e0 add webworm/substack as a feed. 2020-11-02 17:09:59 +13:00
49 changed files with 4706 additions and 6948 deletions

1
.gitignore vendored
View File

@@ -1 +0,0 @@
.aider*

View File

@@ -109,5 +109,4 @@ settings.py
data.db
data.db.bak
data/archive/*
data/backup/*
qotnews.sqlite

View File

@@ -5,7 +5,7 @@ from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy.exc import IntegrityError
engine = create_engine('sqlite:///data/qotnews.sqlite', connect_args={'timeout': 360})
engine = create_engine('sqlite:///data/qotnews.sqlite')
Session = sessionmaker(bind=engine)
Base = declarative_base()
@@ -68,13 +68,12 @@ def get_reflist(amount):
q = session.query(Reflist).order_by(Reflist.rid.desc()).limit(amount)
return [dict(ref=x.ref, sid=x.sid, source=x.source) for x in q.all()]
def get_stories(amount, skip=0):
def get_stories(amount):
session = Session()
q = session.query(Reflist, Story.meta_json).\
order_by(Reflist.rid.desc()).\
join(Story).\
filter(Story.title != None).\
offset(skip).\
limit(amount)
return [x[1] for x in q]
@@ -101,22 +100,7 @@ def del_ref(ref):
finally:
session.close()
def count_stories():
try:
session = Session()
return session.query(Story).count()
finally:
session.close()
def get_story_list():
try:
session = Session()
return session.query(Story.sid).all()
finally:
session.close()
if __name__ == '__main__':
init()
#print(get_story_by_ref('hgi3sy'))
print(len(get_reflist(99999)))
print(get_story_by_ref('hgi3sy'))

View File

@@ -1,8 +1,6 @@
import database
import search
import sys
import settings
import logging
import json
import requests
@@ -23,7 +21,7 @@ def database_del_story(sid):
def search_del_story(sid):
try:
r = requests.delete(settings.MEILI_URL + 'indexes/qotnews/documents/'+sid, timeout=2)
r = requests.delete(search.MEILI_URL + 'indexes/qotnews/documents/'+sid, timeout=2)
if r.status_code != 202:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()

View File

@@ -8,39 +8,77 @@ import time
from bs4 import BeautifulSoup
import settings
from feeds import hackernews, reddit, tildes, manual, lobsters
import utils
from feeds import hackernews, reddit, tildes, substack, manual, news
INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com', 'sec.gov']
OUTLINE_API = 'https://api.outline.com/v3/parse_article'
READ_API = 'http://127.0.0.1:33843'
INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com']
TWO_DAYS = 60*60*24*2
substacks = {}
for key, value in settings.SUBSTACK.items():
substacks[key] = substack.Publication(value['url'])
categories = {}
for key, value in settings.CATEGORY.items():
categories[key] = news.Category(value['url'])
sitemaps = {}
for key, value in settings.SITEMAP.items():
sitemaps[key] = news.Sitemap(value['url'])
def list():
feed = []
if settings.NUM_HACKERNEWS:
feed += [(x, 'hackernews') for x in hackernews.feed()[:settings.NUM_HACKERNEWS]]
if settings.NUM_LOBSTERS:
feed += [(x, 'lobsters') for x in lobsters.feed()[:settings.NUM_LOBSTERS]]
if settings.NUM_REDDIT:
feed += [(x, 'reddit') for x in reddit.feed()[:settings.NUM_REDDIT]]
if settings.NUM_TILDES:
feed += [(x, 'tildes') for x in tildes.feed()[:settings.NUM_TILDES]]
if settings.NUM_SUBSTACK:
feed += [(x, 'substack') for x in substack.top.feed()[:settings.NUM_SUBSTACK]]
for key, publication in substacks.items():
count = settings.SUBSTACK[key]['count']
feed += [(x, key) for x in publication.feed()[:count]]
for key, sites in categories.items():
count = settings.CATEGORY[key]['count']
feed += [(x, key) for x in sites.feed()[:count]]
for key, sites in sitemaps.items():
count = settings.SITEMAP[key]['count']
feed += [(x, key) for x in sites.feed()[:count]]
return feed
def get_article(url):
if not settings.READER_URL:
logging.info('Readerserver not configured, aborting.')
return ''
try:
params = {'source_url': url}
headers = {'Referer': 'https://outline.com/'}
r = requests.get(OUTLINE_API, params=params, headers=headers, timeout=20)
if r.status_code == 429:
logging.info('Rate limited by outline, sleeping 30s and skipping...')
time.sleep(30)
return ''
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
html = r.json()['data']['html']
if 'URL is not supported by Outline' in html:
raise Exception('URL not supported by Outline')
return html
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem outlining article: {}'.format(str(e)))
if url.startswith('https://twitter.com'):
logging.info('Replacing twitter.com url with nitter.net')
url = url.replace('twitter.com', 'nitter.net')
logging.info('Trying our server instead...')
try:
r = requests.post(settings.READER_URL, data=dict(url=url), timeout=20)
r = requests.post(READ_API, data=dict(url=url), timeout=20)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.text
@@ -52,38 +90,36 @@ def get_article(url):
def get_content_type(url):
try:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'}
return requests.get(url, headers=headers, timeout=5).headers['content-type']
except:
return ''
try:
headers = {
'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
'X-Forwarded-For': '66.249.66.1',
}
return requests.get(url, headers=headers, timeout=10).headers['content-type']
headers = {'User-Agent': 'Twitterbot/1.0'}
return requests.get(url, headers=headers, timeout=2).headers['content-type']
except:
pass
try:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'}
return requests.get(url, headers=headers, timeout=10).headers['content-type']
except:
return ''
def update_story(story, is_manual=False):
res = {}
try:
if story['source'] == 'hackernews':
res = hackernews.story(story['ref'])
elif story['source'] == 'lobsters':
res = lobsters.story(story['ref'])
elif story['source'] == 'reddit':
res = reddit.story(story['ref'])
elif story['source'] == 'tildes':
res = tildes.story(story['ref'])
elif story['source'] == 'manual':
res = manual.story(story['ref'])
except BaseException as e:
utils.alert_tanner('Problem updating {} story, ref {}: {}'.format(story['source'], story['ref'], str(e)))
logging.exception(e)
return False
if story['source'] == 'hackernews':
res = hackernews.story(story['ref'])
elif story['source'] == 'reddit':
res = reddit.story(story['ref'])
elif story['source'] == 'tildes':
res = tildes.story(story['ref'])
elif story['source'] == 'substack':
res = substack.top.story(story['ref'])
elif story['source'] in categories.keys():
res = categories[story['source']].story(story['ref'])
elif story['source'] in sitemaps.keys():
res = sitemaps[story['source']].story(story['ref'])
elif story['source'] in substacks.keys():
res = substacks[story['source']].story(story['ref'])
elif story['source'] == 'manual':
res = manual.story(story['ref'])
if res:
story.update(res) # join dicts
@@ -92,7 +128,7 @@ def update_story(story, is_manual=False):
return False
if story['date'] and not is_manual and story['date'] + TWO_DAYS < time.time():
logging.info('Story too old, removing. Date: {}'.format(story['date']))
logging.info('Story too old, removing')
return False
if story.get('url', '') and not story.get('text', ''):
@@ -106,12 +142,6 @@ def update_story(story, is_manual=False):
logging.info(story['url'])
return False
if 'trump' in story['title'].lower() or 'musk' in story['title'].lower() or 'Removed by moderator' in story['title']:
logging.info('Trump / Musk / removed story, skipping')
logging.info(story['url'])
return False
logging.info('Getting article ' + story['url'])
story['text'] = get_article(story['url'])
if not story['text']: return False
@@ -129,7 +159,7 @@ if __name__ == '__main__':
#print(get_article('https://www.bloomberg.com/news/articles/2019-09-23/xi-s-communists-under-pressure-as-high-prices-hit-china-workers'))
a = get_content_type('https://tefkos.comminfo.rutgers.edu/Courses/e530/Readings/Beal%202008%20full%20text%20searching.pdf')
a = get_article('https://blog.joinmastodon.org/2019/10/mastodon-3.0/')
print(a)
print('done')

View File

@@ -12,8 +12,7 @@ import requests
from utils import clean
API_TOPSTORIES = lambda x: 'https://hacker-news.firebaseio.com/v0/topstories.json'
ALG_API_ITEM = lambda x : 'https://hn.algolia.com/api/v1/items/{}'.format(x)
BHN_API_ITEM = lambda x : 'https://api.hnpwa.com/v0/item/{}.json'.format(x)
API_ITEM = lambda x : 'https://hn.algolia.com/api/v1/items/{}'.format(x)
SITE_LINK = lambda x : 'https://news.ycombinator.com/item?id={}'.format(x)
SITE_AUTHOR_LINK = lambda x : 'https://news.ycombinator.com/user?id={}'.format(x)
@@ -43,7 +42,7 @@ def api(route, ref=None):
def feed():
return [str(x) for x in api(API_TOPSTORIES) or []]
def alg_comment(i):
def comment(i):
if 'author' not in i:
return False
@@ -52,25 +51,21 @@ def alg_comment(i):
c['score'] = i.get('points', 0)
c['date'] = i.get('created_at_i', 0)
c['text'] = clean(i.get('text', '') or '')
c['comments'] = [alg_comment(j) for j in i['children']]
c['comments'] = [comment(j) for j in i['children']]
c['comments'] = list(filter(bool, c['comments']))
return c
def alg_comment_count(i):
def comment_count(i):
alive = 1 if i['author'] else 0
return sum([alg_comment_count(c) for c in i['comments']]) + alive
return sum([comment_count(c) for c in i['comments']]) + alive
def alg_story(ref):
r = api(ALG_API_ITEM, ref)
if not r:
logging.info('Bad Algolia Hackernews API response.')
return None
def story(ref):
r = api(API_ITEM, ref)
if not r: return False
if 'deleted' in r:
logging.info('Story was deleted.')
return False
elif r.get('type', '') != 'story':
logging.info('Type "{}" is not "story".'.format(r.get('type', '')))
return False
s = {}
@@ -81,88 +76,17 @@ def alg_story(ref):
s['title'] = r.get('title', '')
s['link'] = SITE_LINK(ref)
s['url'] = r.get('url', '')
s['comments'] = [alg_comment(i) for i in r['children']]
s['comments'] = [comment(i) for i in r['children']]
s['comments'] = list(filter(bool, s['comments']))
s['num_comments'] = alg_comment_count(s) - 1
s['num_comments'] = comment_count(s) - 1
if 'text' in r and r['text']:
s['text'] = clean(r['text'] or '')
return s
def bhn_comment(i):
if 'user' not in i:
return False
c = {}
c['author'] = i.get('user', '')
c['score'] = 0 # Not present?
c['date'] = i.get('time', 0)
c['text'] = clean(i.get('content', '') or '')
c['comments'] = [bhn_comment(j) for j in i['comments']]
c['comments'] = list(filter(bool, c['comments']))
return c
def bhn_story(ref):
r = api(BHN_API_ITEM, ref)
if not r:
logging.info('Bad BetterHN Hackernews API response.')
return None
if 'deleted' in r: # TODO: verify
logging.info('Story was deleted.')
return False
elif r.get('dead', False):
logging.info('Story was deleted.')
return False
elif r.get('type', '') != 'link':
logging.info('Type "{}" is not "link".'.format(r.get('type', '')))
return False
s = {}
s['author'] = r.get('user', '')
s['author_link'] = SITE_AUTHOR_LINK(r.get('user', ''))
s['score'] = r.get('points', 0)
s['date'] = r.get('time', 0)
s['title'] = r.get('title', '')
s['link'] = SITE_LINK(ref)
s['url'] = r.get('url', '')
if s['url'].startswith('item'):
s['url'] = SITE_LINK(ref)
s['comments'] = [bhn_comment(i) for i in r['comments']]
s['comments'] = list(filter(bool, s['comments']))
s['num_comments'] = r.get('comments_count', 0)
if 'content' in r and r['content']:
s['text'] = clean(r['content'] or '')
return s
def story(ref):
s = alg_story(ref)
if s is None:
s = bhn_story(ref)
if not s:
return False
if not s['title']:
return False
if s['score'] < 25 and s['num_comments'] < 10:
logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
return False
return s
# scratchpad so I can quickly develop the parser
if __name__ == '__main__':
print(feed())
#print(story(20763961))
#print(story(20802050))
#print(story(42899834)) # type "job"
#print(story(42900076)) # Ask HN
#print(story(42898201)) # Show HN
#print(story(42899703)) # normal
print(story(42902678)) # bad title?

View File

@@ -1,120 +0,0 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
if __name__ == '__main__':
import sys
sys.path.insert(0,'.')
import requests
from datetime import datetime
from utils import clean
API_HOTTEST = lambda x: 'https://lobste.rs/hottest.json'
API_ITEM = lambda x : 'https://lobste.rs/s/{}.json'.format(x)
SITE_LINK = lambda x : 'https://lobste.rs/s/{}'.format(x)
SITE_AUTHOR_LINK = lambda x : 'https://lobste.rs/u/{}'.format(x)
def api(route, ref=None):
try:
r = requests.get(route(ref), timeout=5)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting lobsters API: {}, trying again'.format(str(e)))
try:
r = requests.get(route(ref), timeout=15)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting lobsters API: {}'.format(str(e)))
return False
def feed():
return [x['short_id'] for x in api(API_HOTTEST) or []]
def unix(date_str):
date_str = date_str.replace(':', '')
return int(datetime.strptime(date_str, '%Y-%m-%dT%H%M%S.%f%z').timestamp())
def make_comment(i):
c = {}
try:
c['author'] = i['commenting_user']
except KeyError:
c['author'] = ''
c['score'] = i.get('score', 0)
try:
c['date'] = unix(i['created_at'])
except KeyError:
c['date'] = 0
c['text'] = clean(i.get('comment', '') or '')
c['comments'] = []
return c
def iter_comments(flat_comments):
nested_comments = []
parent_stack = []
for comment in flat_comments:
c = make_comment(comment)
indent = comment['depth']
if indent == 0:
nested_comments.append(c)
parent_stack = [c]
else:
parent_stack = parent_stack[:indent]
p = parent_stack[-1]
p['comments'].append(c)
parent_stack.append(c)
return nested_comments
def story(ref):
r = api(API_ITEM, ref)
if not r:
logging.info('Bad Lobsters API response.')
return False
s = {}
try:
s['author'] = r['submitter_user']
s['author_link'] = SITE_AUTHOR_LINK(s['author'])
except KeyError:
s['author'] = ''
s['author_link'] = ''
s['score'] = r.get('score', 0)
try:
s['date'] = unix(r['created_at'])
except KeyError:
s['date'] = 0
s['title'] = r.get('title', '')
s['link'] = SITE_LINK(ref)
s['url'] = r.get('url', '')
s['comments'] = iter_comments(r['comments'])
s['num_comments'] = r['comment_count']
if s['score'] < 15 and s['num_comments'] < 10:
logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
return False
if 'description' in r and r['description']:
s['text'] = clean(r['description'] or '')
return s
# scratchpad so I can quickly develop the parser
if __name__ == '__main__':
#print(feed())
import json
print(json.dumps(story('fzvd1v'), indent=4))
#print(json.dumps(story('ixyv5u'), indent=4))

View File

@@ -27,9 +27,7 @@ def api(route):
def story(ref):
html = api(ref)
if not html:
logging.info('Bad http GET response.')
return False
if not html: return False
soup = BeautifulSoup(html, features='html.parser')

180
apiserver/feeds/news.py Normal file
View File

@@ -0,0 +1,180 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
if __name__ == '__main__':
import sys
sys.path.insert(0,'.')
import requests
from datetime import datetime
from bs4 import BeautifulSoup
import extruct
from utils import clean
OUTLINE_API = 'https://api.outline.com/v3/parse_article'
USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'
def unix(date_str):
date_tzfix = date_str
if ":" == date_tzfix[-3]:
date_tzfix = date_tzfix[:-3]+date_tzfix[-2:]
formats = ['%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%dT%H:%M:%S%z', '%Y-%m-%dT%H:%M:%S.%fZ', '%Y-%m-%dT%H:%M:%S.%f%z']
for f in formats:
try:
return int(datetime.strptime(date_str, f).timestamp())
except:
pass
try:
return int(datetime.strptime(date_tzfix, f).timestamp())
except:
pass
return 0
def xml(route, ref=None):
try:
headers = {'User-Agent': USER_AGENT, 'X-Forwarded-For': '66.249.66.1'}
r = requests.get(route(ref), headers=headers, timeout=5)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.text
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting URL: {}'.format(str(e)))
return False
def parse_extruct(s, data):
for rdfa in data['rdfa']:
for key, props in rdfa.items():
if 'http://ogp.me/ns#title' in props:
for values in props['http://ogp.me/ns#title']:
s['title'] = values['@value']
if 'http://ogp.me/ns/article#modified_time' in props:
for values in props['http://ogp.me/ns/article#modified_time']:
print(f"modified_time: {values['@value']}")
s['date'] = unix(values['@value'])
if 'http://ogp.me/ns/article#published_time' in props:
for values in props['http://ogp.me/ns/article#published_time']:
print(f"published_time: {values['@value']}")
s['date'] = unix(values['@value'])
for og in data['opengraph']:
titles = list(filter(None, [value if 'og:title' in key else None for key, value in og['properties']]))
modified = list(filter(None, [value if 'article:modified_time' in key else None for key, value in og['properties']]))
published = list(filter(None, [value if 'article:published_time' in key else None for key, value in og['properties']]))
if len(modified):
s['date'] = unix(modified[0])
if len(published):
s['date'] = unix(published[0])
s['date'] = unix(published[0] or modified[0] or '')
if len(titles):
s['title'] = titles[0]
for md in data['microdata']:
if md['type'] == 'https://schema.org/NewsArticle':
props = md['properties']
s['title'] = props['headline']
if props['dateModified']:
s['date'] = unix(props['dateModified'])
if props['datePublished']:
s['date'] = unix(props['datePublished'])
if 'author' in props and props['author']:
s['author'] = props['author']['properties']['name']
for ld in data['json-ld']:
if ld['@type'] == 'Article':
s['title'] = ld['headline']
if ld['dateModified']:
s['date'] = unix(ld['dateModified'])
if ld['datePublished']:
s['date'] = unix(ld['datePublished'])
if 'author' in ld and ld['author']:
s['author'] = ld['author']['name']
return s
class Sitemap:
def __init__(self, url):
self.sitemap_url = url
def feed(self):
markup = xml(lambda x: self.sitemap_url)
if not markup: return []
soup = BeautifulSoup(markup, features='lxml')
articles = soup.find('urlset').findAll('url')
articles = list(filter(None, [a if a.find('lastmod') is not None else None for a in articles]))
return [x.find('loc').text for x in articles] or []
def story(self, ref):
markup = xml(lambda x: ref)
if not markup:
return False
s = {}
s['author_link'] = ''
s['score'] = 0
s['comments'] = []
s['num_comments'] = 0
s['link'] = ref
s['url'] = ref
s['date'] = 0
data = extruct.extract(markup)
s = parse_extruct(s, data)
return s
class Category:
def __init__(self, url):
self.category_url = url
self.base_url = '/'.join(url.split('/')[:3])
def feed(self):
markup = xml(lambda x: self.category_url)
if not markup: return []
soup = BeautifulSoup(markup, features='html.parser')
links = soup.find_all('a', href=True)
links = [link.get('href') for link in links]
links = [f"{self.base_url}{link}" if link.startswith('/') else link for link in links]
links = list(filter(None, [link if link.startswith(self.category_url) else None for link in links]))
return links
def story(self, ref):
markup = xml(lambda x: ref)
if not markup:
return False
s = {}
s['author_link'] = ''
s['score'] = 0
s['comments'] = []
s['num_comments'] = 0
s['link'] = ref
s['url'] = ref
s['date'] = 0
data = extruct.extract(markup)
s = parse_extruct(s, data)
return s
# scratchpad so I can quickly develop the parser
if __name__ == '__main__':
print("Sitemap: Stuff")
site = Sitemap("https://www.stuff.co.nz/sitemap.xml")
posts = site.feed()
print(posts[:1])
print(site.story(posts[0]))
print("Sitemap: NZ Herald")
site = Sitemap("https://www.nzherald.co.nz/arcio/news-sitemap/")
posts = site.feed()
print(posts[:1])
print(site.story(posts[0]))
print("Category: RadioNZ Te Ao Māori")
site = Category("https://www.rnz.co.nz/news/te-manu-korihi/")
posts = site.feed()
print(posts[:1])
print(site.story(posts[0]))

View File

@@ -32,8 +32,11 @@ def feed():
return [x.id for x in reddit.subreddit(subs).hot()]
except KeyboardInterrupt:
raise
except BaseException as e:
logging.critical('Problem hitting reddit API: {}'.format(str(e)))
except PRAWException as e:
logging.error('Problem hitting reddit API: {}'.format(str(e)))
return []
except PrawcoreException as e:
logging.error('Problem hitting reddit API: {}'.format(str(e)))
return []
def comment(i):
@@ -56,9 +59,7 @@ def comment(i):
def story(ref):
try:
r = reddit.submission(ref)
if not r:
logging.info('Bad Reddit API response.')
return False
if not r: return False
s = {}
s['author'] = r.author.name if r.author else '[Deleted]'
@@ -73,7 +74,6 @@ def story(ref):
s['num_comments'] = r.num_comments
if s['score'] < 25 and s['num_comments'] < 10:
logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
return False
if r.selftext:
@@ -84,10 +84,10 @@ def story(ref):
except KeyboardInterrupt:
raise
except PRAWException as e:
logging.critical('Problem hitting reddit API: {}'.format(str(e)))
logging.error('Problem hitting reddit API: {}'.format(str(e)))
return False
except PrawcoreException as e:
logging.critical('Problem hitting reddit API: {}'.format(str(e)))
logging.error('Problem hitting reddit API: {}'.format(str(e)))
return False
# scratchpad so I can quickly develop the parser

160
apiserver/feeds/substack.py Normal file
View File

@@ -0,0 +1,160 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
if __name__ == '__main__':
import sys
sys.path.insert(0,'.')
import requests
from datetime import datetime
from utils import clean
SUBSTACK_API_TOP_POSTS = lambda x: "https://substack.com/api/v1/reader/top-posts"
def author_link(author_id, base_url):
return f"{base_url}/people/{author_id}"
def api_comments(post_id, base_url):
return f"{base_url}/api/v1/post/{post_id}/comments?all_comments=true&sort=best_first"
def api_stories(x, base_url):
return f"{base_url}/api/v1/archive?sort=new&search=&offset=0&limit=100"
def unix(date_str):
return int(datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S.%fZ').timestamp())
def api(route, ref=None):
try:
r = requests.get(route(ref), timeout=5)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting Substack API: {}, trying again'.format(str(e)))
try:
r = requests.get(route(ref), timeout=15)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting Substack API: {}'.format(str(e)))
return False
def comment(i):
if 'body' not in i:
return False
c = {}
c['date'] = unix(i.get('date'))
c['author'] = i.get('name', '')
c['score'] = i.get('reactions').get('')
c['text'] = clean(i.get('body', '') or '')
c['comments'] = [comment(j) for j in i['children']]
c['comments'] = list(filter(bool, c['comments']))
return c
class Publication:
def __init__(self, domain):
self.BASE_DOMAIN = domain
def feed(self):
stories = api(lambda x: api_stories(x, self.BASE_DOMAIN))
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
return [str(i.get("id")) for i in stories or []]
def story(self, ref):
stories = api(lambda x: api_stories(x, self.BASE_DOMAIN))
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
stories = list(filter(None, [i if str(i.get('id')) == ref else None for i in stories]))
if len(stories) == 0:
return False
r = stories[0]
if not r:
return False
s = {}
s['author'] = ''
s['author_link'] = ''
s['date'] = unix(r.get('post_date'))
s['score'] = r.get('reactions').get('')
s['title'] = r.get('title', '')
s['link'] = r.get('canonical_url', '')
s['url'] = r.get('canonical_url', '')
comments = api(lambda x: api_comments(x, self.BASE_DOMAIN), r.get('id'))
s['comments'] = [comment(i) for i in comments.get('comments')]
s['comments'] = list(filter(bool, s['comments']))
s['num_comments'] = r.get('comment_count', 0)
authors = list(filter(None, [self._bylines(byline) for byline in r.get('publishedBylines')]))
if len(authors):
s['author'] = authors[0].get('name')
s['author_link'] = authors[0].get('link')
return s
def _bylines(self, b):
if 'id' not in b:
return None
a = {}
a['name'] = b.get('name')
a['link'] = author_link(b.get('id'), self.BASE_DOMAIN)
return a
class Top:
def feed(self):
stories = api(SUBSTACK_API_TOP_POSTS)
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
return [str(i.get("id")) for i in stories or []]
def story(self, ref):
stories = api(SUBSTACK_API_TOP_POSTS)
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
stories = list(filter(None, [i if str(i.get('id')) == ref else None for i in stories]))
if len(stories) == 0:
return False
r = stories[0]
if not r:
return False
s = {}
pub = r.get('pub')
base_url = pub.get('base_url')
s['author'] = pub.get('author_name')
s['author_link'] = author_link(pub.get('author_id'), base_url)
s['date'] = unix(r.get('post_date'))
s['score'] = r.get('score')
s['title'] = r.get('title', '')
s['link'] = r.get('canonical_url', '')
s['url'] = r.get('canonical_url', '')
comments = api(lambda x: api_comments(x, base_url), r.get('id'))
s['comments'] = [comment(i) for i in comments.get('comments')]
s['comments'] = list(filter(bool, s['comments']))
s['num_comments'] = r.get('comment_count', 0)
return s
top = Top()
# scratchpad so I can quickly develop the parser
if __name__ == '__main__':
top_posts = top.feed()
print(top.story(top_posts[0]))
webworm = Publication("https://www.webworm.co/")
posts = webworm.feed()
print(posts[:1])
print(webworm.story(posts[0]))

View File

@@ -34,7 +34,7 @@ def api(route):
except KeyboardInterrupt:
raise
except BaseException as e:
logging.critical('Problem hitting tildes website: {}'.format(str(e)))
logging.error('Problem hitting tildes website: {}'.format(str(e)))
return False
def feed():
@@ -71,15 +71,11 @@ def story(ref):
html = api(SITE_LINK(group_lookup[ref], ref))
else:
html = api(API_ITEM(ref))
if not html:
logging.info('Bad Tildes API response.')
return False
if not html: return False
soup = BeautifulSoup(html, features='html.parser')
a = soup.find('article', class_='topic-full')
if a is None:
logging.info('Tildes <article> element not found.')
return False
if a is None: return False
h = a.find('header')
lu = h.find('a', class_='link-user')
@@ -87,7 +83,6 @@ def story(ref):
error = a.find('div', class_='text-error')
if error:
if 'deleted' in error.string or 'removed' in error.string:
logging.info('Article was deleted or removed.')
return False
s = {}
@@ -107,21 +102,7 @@ def story(ref):
ch = a.find('header', class_='topic-comments-header')
s['num_comments'] = int(ch.h2.string.split(' ')[0]) if ch else 0
if s['group'].split('.')[0] not in [
'~arts',
'~comp',
'~creative',
'~design',
'~engineering',
'~finance',
'~science',
'~tech',
]:
logging.info('Group ({}) not in whitelist.'.format(s['group']))
return False
if s['score'] < 15 and s['num_comments'] < 10:
logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
if s['score'] < 8 and s['num_comments'] < 6:
return False
td = a.find('div', class_='topic-full-text')
@@ -132,7 +113,7 @@ def story(ref):
# scratchpad so I can quickly develop the parser
if __name__ == '__main__':
print(feed())
#print(feed())
#normal = story('gxt')
#print(normal)
#no_comments = story('gxr')
@@ -141,8 +122,8 @@ if __name__ == '__main__':
#print(self_post)
#li_comment = story('gqx')
#print(li_comment)
#broken = story('q4y')
#print(broken)
broken = story('q4y')
print(broken)
# make sure there's no self-reference
#import copy

View File

@@ -4,14 +4,15 @@ certifi==2020.6.20
chardet==3.0.4
click==7.1.2
commonmark==0.9.1
extruct==0.10.0
Flask==1.1.2
Flask-Cors==3.0.8
gevent==20.6.2
greenlet==0.4.16
humanize==4.10.0
idna==2.10
itsdangerous==1.1.0
Jinja2==2.11.2
lxml==4.6.1
MarkupSafe==1.1.1
packaging==20.4
praw==6.4.0

View File

@@ -1,58 +0,0 @@
import time
import json
import logging
import feed
import database
import search
database.init()
def fix_gzip_bug(story_list):
FIX_THRESHOLD = 150
count = 1
for sid in story_list:
try:
sid = sid[0]
story = database.get_story(sid)
full_json = json.loads(story.full_json)
meta_json = json.loads(story.meta_json)
text = full_json.get('text', '')
count = text.count('<EFBFBD>')
if not count: continue
ratio = count / len(text) * 1000
print('Bad story:', sid, 'Num ?:', count, 'Ratio:', ratio)
if ratio < FIX_THRESHOLD: continue
print('Attempting to fix...')
valid = feed.update_story(meta_json, is_manual=True)
if valid:
database.put_story(meta_json)
search.put_story(meta_json)
print('Success')
else:
print('Story was not valid')
time.sleep(3)
except KeyboardInterrupt:
raise
except BaseException as e:
logging.exception(e)
breakpoint()
if __name__ == '__main__':
num_stories = database.count_stories()
print('Fix {} stories?'.format(num_stories))
print('Press ENTER to continue, ctrl-c to cancel')
input()
story_list = database.get_story_list()
fix_gzip_bug(story_list)

View File

@@ -1,62 +0,0 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.INFO)
import database
from sqlalchemy import select
import search
import sys
import time
import json
import requests
database.init()
search.init()
BATCH_SIZE = 5000
def put_stories(stories):
return search.meili_api(requests.post, 'indexes/qotnews/documents', stories)
def get_update(update_id):
return search.meili_api(requests.get, 'tasks/{}'.format(update_id))
if __name__ == '__main__':
num_stories = database.count_stories()
print('Reindex {} stories?'.format(num_stories))
print('Press ENTER to continue, ctrl-c to cancel')
input()
story_list = database.get_story_list()
count = 1
while len(story_list):
stories = []
for _ in range(BATCH_SIZE):
try:
sid = story_list.pop()
except IndexError:
break
story = database.get_story(sid)
print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title))
story_obj = json.loads(story.meta_json)
stories.append(story_obj)
count += 1
res = put_stories(stories)
update_id = res['uid']
print('Waiting for processing', end='')
while get_update(update_id)['status'] != 'succeeded':
time.sleep(0.5)
print('.', end='', flush=True)
print()
print('Done.')

View File

@@ -1,23 +0,0 @@
import time
import requests
def test_search_api():
num_tests = 100
total_time = 0
for i in range(num_tests):
start = time.time()
res = requests.get('http://127.0.0.1:33842/api/search?q=iphone')
res.raise_for_status()
duration = time.time() - start
total_time += duration
avg_time = total_time / num_tests
print('Average search time:', avg_time)
if __name__ == '__main__':
test_search_api()

View File

@@ -4,62 +4,86 @@ logging.basicConfig(
level=logging.DEBUG)
import requests
import settings
SEARCH_ENABLED = bool(settings.MEILI_URL)
MEILI_URL = 'http://127.0.0.1:7700/'
def meili_api(method, route, json=None, params=None, parse_json=True):
def create_index():
try:
r = method(settings.MEILI_URL + route, json=json, params=params, timeout=4)
if r.status_code > 299:
json = dict(name='qotnews', uid='qotnews')
r = requests.post(MEILI_URL + 'indexes', json=json, timeout=2)
if r.status_code != 201:
raise Exception('Bad response code ' + str(r.status_code))
if parse_json:
return r.json()
else:
r.encoding = 'utf-8'
return r.text
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem with MeiliSearch api route: %s: %s', route, str(e))
logging.error('Problem creating MeiliSearch index: {}'.format(str(e)))
return False
def create_index():
json = dict(uid='qotnews', primaryKey='id')
return meili_api(requests.post, 'indexes', json=json)
def update_rankings():
json = ['typo', 'words', 'proximity', 'date:desc', 'exactness']
return meili_api(requests.post, 'indexes/qotnews/settings/ranking-rules', json=json)
try:
json = ['typo', 'words', 'proximity', 'attribute', 'desc(date)', 'wordsPosition', 'exactness']
r = requests.post(MEILI_URL + 'indexes/qotnews/settings/ranking-rules', json=json, timeout=2)
if r.status_code != 202:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem setting MeiliSearch ranking rules: {}'.format(str(e)))
return False
def update_attributes():
json = ['title', 'url', 'author']
r = meili_api(requests.post, 'indexes/qotnews/settings/searchable-attributes', json=json)
json = ['id', 'ref', 'source', 'author', 'author_link', 'score', 'date', 'title', 'link', 'url', 'num_comments']
r = meili_api(requests.post, 'indexes/qotnews/settings/displayed-attributes', json=json)
return r
try:
json = ['title', 'url', 'author', 'link', 'id']
r = requests.post(MEILI_URL + 'indexes/qotnews/settings/searchable-attributes', json=json, timeout=2)
if r.status_code != 202:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
r = requests.delete(MEILI_URL + 'indexes/qotnews/settings/displayed-attributes', timeout=2)
if r.status_code != 202:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem setting MeiliSearch searchable attributes: {}'.format(str(e)))
return False
def init():
if not SEARCH_ENABLED:
logging.info('Search is not enabled, skipping init.')
return
print(create_index())
create_index()
update_rankings()
update_attributes()
def put_story(story):
if not SEARCH_ENABLED: return
return meili_api(requests.post, 'indexes/qotnews/documents', [story])
story = story.copy()
story.pop('text', None)
story.pop('comments', None)
try:
r = requests.post(MEILI_URL + 'indexes/qotnews/documents', json=[story], timeout=2)
if r.status_code != 202:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem putting MeiliSearch story: {}'.format(str(e)))
return False
def search(q):
if not SEARCH_ENABLED: return []
params = dict(q=q, limit=settings.FEED_LENGTH)
r = meili_api(requests.get, 'indexes/qotnews/search', params=params, parse_json=False)
return r
try:
params = dict(q=q, limit=250)
r = requests.get(MEILI_URL + 'indexes/qotnews/search', params=params, timeout=2)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()['hits']
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem searching MeiliSearch: {}'.format(str(e)))
return False
if __name__ == '__main__':
init()
create_index()
print(update_rankings())
print(search('facebook'))
print(search('the'))

View File

@@ -1,8 +1,7 @@
import os, logging
DEBUG = os.environ.get('DEBUG')
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG if DEBUG else logging.INFO)
level=logging.INFO)
import gevent
from gevent import monkey
@@ -14,46 +13,22 @@ import json
import threading
import traceback
import time
import datetime
import humanize
import urllib.request
from urllib.parse import urlparse, parse_qs
import settings
import database
import search
import feed
from utils import gen_rand_id, NUM_ID_CHARS
from utils import gen_rand_id
from flask import abort, Flask, request, render_template, stream_with_context, Response
from werkzeug.exceptions import NotFound
from flask_cors import CORS
smallweb_set = set()
def load_smallweb_list():
EXCLUDED = [
'github.com',
]
global smallweb_set
try:
url = 'https://raw.githubusercontent.com/kagisearch/smallweb/refs/heads/main/smallweb.txt'
with urllib.request.urlopen(url, timeout=10) as response:
urls = response.read().decode('utf-8').splitlines()
hosts = {urlparse(u).hostname for u in urls if u and urlparse(u).hostname}
smallweb_set = {h.replace('www.', '') for h in hosts if h not in EXCLUDED}
logging.info('Loaded {} smallweb domains.'.format(len(smallweb_set)))
except Exception as e:
logging.error('Failed to load smallweb list: {}'.format(e))
load_smallweb_list()
database.init()
search.init()
FEED_LENGTH = 75
news_index = 0
ref_list = []
current_item = {}
def new_id():
nid = gen_rand_id()
@@ -61,99 +36,33 @@ def new_id():
nid = gen_rand_id()
return nid
def fromnow(ts):
return humanize.naturaltime(datetime.datetime.fromtimestamp(ts))
build_folder = './build'
build_folder = '../webclient/build'
flask_app = Flask(__name__, template_folder=build_folder, static_folder=build_folder, static_url_path='')
flask_app.jinja_env.filters['fromnow'] = fromnow
cors = CORS(flask_app)
@flask_app.route('/api')
def api():
skip = request.args.get('skip', 0)
limit = request.args.get('limit', settings.FEED_LENGTH)
if request.args.get('smallweb') == 'true' and smallweb_set:
limit = int(limit)
skip = int(skip)
filtered_stories = []
current_skip = skip
while len(filtered_stories) < limit:
stories_batch = database.get_stories(limit, current_skip)
if not stories_batch:
break
for story_str in stories_batch:
story = json.loads(story_str)
story_url = story.get('url') or story.get('link') or ''
if not story_url:
continue
hostname = urlparse(story_url).hostname
if hostname:
hostname = hostname.replace('www.', '')
if hostname in smallweb_set:
filtered_stories.append(story_str)
if len(filtered_stories) == limit:
break
if len(filtered_stories) == limit:
break
current_skip += limit
stories = filtered_stories
else:
stories = database.get_stories(limit, skip)
stories = database.get_stories(FEED_LENGTH)
# hacky nested json
res = Response('{"stories":[' + ','.join(stories) + ']}')
res.headers['content-type'] = 'application/json'
return res
@flask_app.route('/api/stats', strict_slashes=False)
def apistats():
stats = {
'news_index': news_index,
'ref_list': ref_list,
'len_ref_list': len(ref_list),
'current_item': current_item,
'total_stories': database.count_stories(),
'id_space': 26**NUM_ID_CHARS,
}
return stats
@flask_app.route('/api/search', strict_slashes=False)
def apisearch():
q = request.args.get('q', '')
if len(q) >= 3:
results = search.search(q)
else:
results = '[]'
res = Response(results)
res.headers['content-type'] = 'application/json'
return res
results = []
return dict(results=results)
@flask_app.route('/api/submit', methods=['POST'], strict_slashes=False)
def submit():
try:
url = request.form['url']
for prefix in ['http://', 'https://']:
if url.lower().startswith(prefix):
break
else: # for
url = 'http://' + url
nid = new_id()
logging.info('Manual submission: ' + url)
parse = urlparse(url)
if 'news.ycombinator.com' in parse.hostname:
source = 'hackernews'
@@ -161,9 +70,6 @@ def submit():
elif 'tildes.net' in parse.hostname and '~' in url:
source = 'tildes'
ref = parse.path.split('/')[2]
elif 'lobste.rs' in parse.hostname and '/s/' in url:
source = 'lobsters'
ref = parse.path.split('/')[2]
elif 'reddit.com' in parse.hostname and 'comments' in url:
source = 'reddit'
ref = parse.path.split('/')[4]
@@ -174,11 +80,6 @@ def submit():
ref = url
existing = database.get_story_by_ref(ref)
if existing and DEBUG:
ref = ref + '#' + str(time.time())
existing = False
if existing:
return {'nid': existing.sid}
else:
@@ -187,20 +88,14 @@ def submit():
if valid:
database.put_story(story)
search.put_story(story)
if DEBUG:
logging.info('Adding manual ref: {}, id: {}, source: {}'.format(ref, nid, source))
database.put_ref(ref, nid, source)
return {'nid': nid}
else:
raise Exception('Invalid article')
except Exception as e:
msg = 'Problem with article submission: {} - {}'.format(e.__class__.__name__, str(e))
logging.error(msg)
except BaseException as e:
logging.error('Problem with article submission: {} - {}'.format(e.__class__.__name__, str(e)))
print(traceback.format_exc())
return {'error': msg.split('\n')[0]}, 400
abort(400)
@flask_app.route('/api/<sid>')
@@ -217,19 +112,10 @@ def story(sid):
@flask_app.route('/')
@flask_app.route('/search')
def index():
stories_json = database.get_stories(settings.FEED_LENGTH, 0)
stories = [json.loads(s) for s in stories_json]
for s in stories:
url = urlparse(s.get('url') or s.get('link') or '').hostname or ''
s['hostname'] = url.replace('www.', '')
return render_template('index.html',
title='QotNews',
url='news.t0.vc',
description='Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode',
robots='index',
stories=stories,
)
title='Feed',
url='news.t0.vc',
description='Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode')
@flask_app.route('/<sid>', strict_slashes=False)
@flask_app.route('/<sid>/c', strict_slashes=False)
@@ -239,9 +125,9 @@ def static_story(sid):
except NotFound:
pass
story_obj = database.get_story(sid)
if not story_obj: return abort(404)
story = json.loads(story_obj.full_json)
story = database.get_story(sid)
if not story: return abort(404)
story = json.loads(story.full_json)
score = story['score']
num_comments = story['num_comments']
@@ -250,22 +136,18 @@ def static_story(sid):
score, 's' if score != 1 else '',
num_comments, 's' if num_comments != 1 else '',
source)
url = urlparse(story.get('url') or story.get('link') or '').hostname or ''
url = urlparse(story['url']).hostname or urlparse(story['link']).hostname or ''
url = url.replace('www.', '')
return render_template('index.html',
title=story['title'] + ' | QotNews',
url=url,
description=description,
robots='noindex',
story=story,
show_comments=request.path.endswith('/c'),
)
title=story['title'],
url=url,
description=description)
http_server = WSGIServer(('', 33842), flask_app)
def feed_thread():
global news_index, ref_list, current_item
global news_index
try:
while True:
@@ -276,51 +158,50 @@ def feed_thread():
continue
try:
nid = new_id()
logging.info('Adding ref: {}, id: {}, source: {}'.format(ref, nid, source))
database.put_ref(ref, nid, source)
logging.info('Added ref ' + ref)
except database.IntegrityError:
logging.info('Already have ID / ref, skipping.')
continue
ref_list = database.get_reflist(settings.FEED_LENGTH)
ref_list = database.get_reflist(FEED_LENGTH)
# update current stories
if news_index < len(ref_list):
current_item = ref_list[news_index]
item = ref_list[news_index]
try:
story_json = database.get_story(current_item['sid']).full_json
story_json = database.get_story(item['sid']).full_json
story = json.loads(story_json)
except AttributeError:
story = dict(id=current_item['sid'], ref=current_item['ref'], source=current_item['source'])
story = dict(id=item['sid'], ref=item['ref'], source=item['source'])
logging.info('Updating {} story: {}, index: {}'.format(story['source'], story['ref'], news_index))
logging.info('Updating story: ' + str(story['ref']) + ', index: ' + str(news_index))
valid = feed.update_story(story)
if valid:
database.put_story(story)
search.put_story(story)
else:
database.del_ref(current_item['ref'])
logging.info('Removed ref {}'.format(current_item['ref']))
database.del_ref(item['ref'])
logging.info('Removed ref {}'.format(item['ref']))
else:
logging.info('Skipping index: ' + str(news_index))
gevent.sleep(6)
news_index += 1
if news_index == settings.FEED_LENGTH: news_index = 0
if news_index == FEED_LENGTH: news_index = 0
except KeyboardInterrupt:
logging.info('Ending feed thread...')
except ValueError as e:
logging.critical('feed_thread error: {} {}'.format(e.__class__.__name__, e))
logging.error('feed_thread error: {} {}'.format(e.__class__.__name__, e))
http_server.stop()
logging.info('Starting Feed thread...')
print('Starting Feed thread...')
gevent.spawn(feed_thread)
logging.info('Starting HTTP thread...')
print('Starting HTTP thread...')
try:
http_server.serve_forever()
except KeyboardInterrupt:

View File

@@ -4,20 +4,24 @@
# Feed Lengths
# Number of top items from each site to pull
# set to 0 to disable that site
FEED_LENGTH = 75
NUM_HACKERNEWS = 15
NUM_LOBSTERS = 10
NUM_REDDIT = 15
NUM_REDDIT = 10
NUM_TILDES = 5
NUM_SUBSTACK = 10
# Meilisearch server URL
# Leave blank if not using search
#MEILI_URL = 'http://127.0.0.1:7700/'
MEILI_URL = ''
# SITEMAP = {
# 'nzherald': { 'url': "https://www.nzherald.co.nz/arcio/news-sitemap/", 'count': 10},
# 'stuff': { 'url': "https://www.stuff.co.nz/sitemap.xml", 'count': 10},
# }
# Readerserver URL
# Leave blank if not using, but that defeats the whole point
READER_URL = 'http://127.0.0.1:33843/'
# SUBSTACK = {
# 'webworm': { 'url': "https://www.webworm.co", 'count': 10},
# 'the bulletin': { 'url': "https://thespinoff.substack.com", 'count': 10},
# }
# CATEGORY = {
# 'rnz national': { 'url': "https://www.rnz.co.nz/news/national", 'count': 10},
# }
# Reddit account info
# leave blank if not using Reddit
@@ -33,9 +37,13 @@ SUBREDDITS = [
'HistoryofIdeas',
'LaymanJournals',
'PhilosophyofScience',
'PoliticsPDFs',
'Scholar',
'StateOfTheUnion',
'TheAgora',
'TrueFilm',
'TrueReddit',
'UniversityofReddit',
'culturalstudies',
'hardscience',
'indepthsports',
@@ -44,7 +52,4 @@ SUBREDDITS = [
'neurophilosophy',
'resilientcommunities',
'worldevents',
'StallmanWasRight',
'EverythingScience',
'longevity',
]

View File

@@ -8,17 +8,8 @@ import string
from bleach.sanitizer import Cleaner
def alert_tanner(message):
try:
logging.info('Alerting Tanner: ' + message)
params = dict(qotnews=message)
requests.get('https://tbot.tannercollin.com/message', params=params, timeout=4)
except BaseException as e:
logging.error('Problem alerting Tanner: ' + str(e))
NUM_ID_CHARS = 4
def gen_rand_id():
return ''.join(random.choice(string.ascii_uppercase) for _ in range(NUM_ID_CHARS))
return ''.join(random.choice(string.ascii_uppercase) for _ in range(4))
def render_md(md):
if md:

View File

@@ -35,7 +35,6 @@ app.post('/', (req, res) => {
const url = req.body.url;
const requestOptions = {
url: url,
gzip: true,
//headers: {'User-Agent': 'Googlebot/2.1 (+http://www.google.com/bot.html)'},
//headers: {'User-Agent': 'Twitterbot/1.0'},
headers: {

File diff suppressed because it is too large Load Diff

View File

@@ -3,7 +3,7 @@
Download MeiliSearch with:
```
wget https://github.com/meilisearch/meilisearch/releases/download/v0.27.0/meilisearch-linux-amd64
wget https://github.com/meilisearch/MeiliSearch/releases/download/v0.11.1/meilisearch-linux-amd64
chmod +x meilisearch-linux-amd64
```

View File

Before

Width:  |  Height:  |  Size: 538 B

After

Width:  |  Height:  |  Size: 538 B

View File

Before

Width:  |  Height:  |  Size: 6.5 KiB

After

Width:  |  Height:  |  Size: 6.5 KiB

View File

Before

Width:  |  Height:  |  Size: 5.4 KiB

After

Width:  |  Height:  |  Size: 5.4 KiB

View File

Before

Width:  |  Height:  |  Size: 500 B

After

Width:  |  Height:  |  Size: 500 B

View File

@@ -4,14 +4,12 @@
"private": true,
"dependencies": {
"abort-controller": "^3.0.0",
"katex": "^0.16.25",
"localforage": "^1.7.3",
"moment": "^2.24.0",
"query-string": "^6.8.3",
"react": "^16.9.0",
"react-dom": "^16.9.0",
"react-helmet": "^5.2.1",
"react-latex-next": "^3.0.0",
"react-router-dom": "^5.0.1",
"react-router-hash-link": "^1.2.2",
"react-scripts": "3.1.1"

View File

@@ -8,8 +8,6 @@
content="{{ description }}"
/>
<meta content="{{ url }}" name="og:site_name">
<meta name="robots" content="{{ robots }}">
<link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
<link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">
@@ -28,112 +26,26 @@
work correctly both with client-side routing and a non-root public URL.
Learn how to configure a non-root public URL by running `npm run build`.
-->
<title>{{ title }}</title>
<title>{{ title }} - QotNews</title>
<style>
html {
overflow-y: scroll;
}
body {
background: #eeeeee;
background: #000;
}
.nojs {
color: white;
}
</style>
</head>
<body>
<div id="root">
<div class="container menu">
<p>
<a href="/">QotNews</a>
<br />
<span class="slogan">Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode.</span>
</p>
</div>
{% if story %}
<div class="{% if show_comments %}container{% else %}article-container{% endif %}">
<div class="article">
<h1>{{ story.title }}</h1>
{% if show_comments %}
<div class="info">
<a href="/{{ story.id }}">View article</a>
</div>
{% else %}
<div class="info">
Source: <a class="source" href="{{ story.url or story.link }}">{{ url }}</a>
</div>
{% endif %}
<div class="info">
{{ story.score }} points
by <a href="{{ story.author_link }}">{{ story.author }}</a>
{{ story.date | fromnow }}
on <a href="{{ story.link }}">{{ story.source }}</a> |
<a href="/{{ story.id }}/c">
{{ story.num_comments }} comment{{ 's' if story.num_comments != 1 }}
</a>
</div>
{% if not show_comments and story.text %}
<div class="story-text">{{ story.text | safe }}</div>
{% elif show_comments %}
{% macro render_comment(comment, level) %}
<dt></dt>
<dd class="comment{% if level > 0 %} lined{% endif %}">
<div class="info">
<p>
{% if comment.author == story.author %}[OP] {% endif %}{{ comment.author or '[Deleted]' }} | <a href="#{{ comment.author }}{{ comment.date }}" id="{{ comment.author }}{{ comment.date }}">{{ comment.date | fromnow }}</a>
</p>
</div>
<div class="text">{{ (comment.text | safe) if comment.text else '<p>[Empty / deleted comment]</p>' }}</div>
{% if comment.comments %}
<dl>
{% for reply in comment.comments %}
{{ render_comment(reply, level + 1) }}
{% endfor %}
</dl>
{% endif %}
</dd>
{% endmacro %}
<dl class="comments">
{% for comment in story.comments %}{{ render_comment(comment, 0) }}{% endfor %}
</dl>
{% endif %}
</div>
<div class='dot toggleDot'>
<div class='button'>
<a href="/{{ story.id }}{{ '/c' if not show_comments else '' }}">
{{ '' if not show_comments else '' }}
</a>
</div>
</div>
</div>
{% elif stories %}
<div class="container">
{% for story in stories %}
<div class='item'>
<div class='title'>
<a class='link' href='/{{ story.id }}'>
<img class='source-logo' src='/logos/{{ story.source }}.png' alt='{{ story.source }}:' /> {{ story.title }}
</a>
<span class='source'>
(<a class='source' href='{{ story.url or story.link }}'>{{ story.hostname }}</a>)
</span>
</div>
<div class='info'>
{{ story.score }} points
by <a href="{{ story.author_link }}">{{ story.author }}</a>
{{ story.date | fromnow }}
on <a href="{{ story.link }}">{{ story.source }}</a> |
<a class="{{ 'hot' if story.num_comments > 99 else '' }}" href="/{{ story.id }}/c">
{{ story.num_comments }} comment{{ 's' if story.num_comments != 1 }}
</a>
</div>
</div>
{% endfor %}
</div>
{% endif %}
<div class="nojs">
<noscript>You need to enable JavaScript to run this app.</noscript>
</div>
<div id="root"></div>
<!--
This HTML file is a template.
If you open it directly in the browser, you will see an empty page.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 981 B

View File

@@ -1,12 +1,10 @@
import React, { useState, useEffect, useRef, useCallback } from 'react';
import React from 'react';
import { BrowserRouter as Router, Route, Link, Switch } from 'react-router-dom';
import localForage from 'localforage';
import './Style-light.css';
import './Style-dark.css';
import './Style-black.css';
import './Style-red.css';
import './fonts/Fonts.css';
import { BackwardDot, ForwardDot } from './utils.js';
import { ForwardDot } from './utils.js';
import Feed from './Feed.js';
import Article from './Article.js';
import Comments from './Comments.js';
@@ -15,115 +13,72 @@ import Submit from './Submit.js';
import Results from './Results.js';
import ScrollToTop from './ScrollToTop.js';
function App() {
const [theme, setTheme] = useState(localStorage.getItem('theme') || '');
const cache = useRef({});
const [isFullScreen, setIsFullScreen] = useState(!!document.fullscreenElement);
class App extends React.Component {
constructor(props) {
super(props);
const updateCache = useCallback((key, value) => {
cache.current[key] = value;
}, []);
this.state = {
theme: localStorage.getItem('theme') || '',
};
const light = () => {
setTheme('');
this.cache = {};
}
updateCache = (key, value) => {
this.cache[key] = value;
}
light() {
this.setState({ theme: '' });
localStorage.setItem('theme', '');
};
}
const dark = () => {
setTheme('dark');
dark() {
this.setState({ theme: 'dark' });
localStorage.setItem('theme', 'dark');
};
}
const black = () => {
setTheme('black');
localStorage.setItem('theme', 'black');
};
const red = () => {
setTheme('red');
localStorage.setItem('theme', 'red');
};
useEffect(() => {
if (Object.keys(cache.current).length === 0) {
componentDidMount() {
if (!this.cache.length) {
localForage.iterate((value, key) => {
updateCache(key, value);
}).then(() => {
console.log('loaded cache from localforage');
this.updateCache(key, value);
});
console.log('loaded cache from localforage');
}
}, [updateCache]);
}
const goFullScreen = () => {
if ('wakeLock' in navigator) {
navigator.wakeLock.request('screen');
}
document.body.requestFullscreen({ navigationUI: 'hide' });
};
render() {
const theme = this.state.theme;
document.body.style.backgroundColor = theme === 'dark' ? '#000' : '#eeeeee';
const exitFullScreen = () => {
document.exitFullscreen();
};
return (
<div className={theme}>
<Router>
<div className='container menu'>
<p>
<Link to='/'>QotNews - Feed</Link>
<span className='theme'>Theme: <a href='#' onClick={() => this.light()}>Light</a> - <a href='#' onClick={() => this.dark()}>Dark</a></span>
<br />
<span className='slogan'>Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode.</span>
</p>
<Route path='/(|search)' component={Search} />
<Route path='/(|search)' component={Submit} />
</div>
useEffect(() => {
const onFullScreenChange = () => setIsFullScreen(!!document.fullscreenElement);
document.addEventListener('fullscreenchange', onFullScreenChange);
return () => document.removeEventListener('fullscreenchange', onFullScreenChange);
}, []);
<Route path='/' exact render={(props) => <Feed {...props} updateCache={this.updateCache} />} />
<Switch>
<Route path='/search' component={Results} />
<Route path='/:id' exact render={(props) => <Article {...props} cache={this.cache} />} />
</Switch>
<Route path='/:id/c' exact render={(props) => <Comments {...props} cache={this.cache} />} />
useEffect(() => {
if (theme === 'dark') {
document.body.style.backgroundColor = '#1a1a1a';
} else if (theme === 'black') {
document.body.style.backgroundColor = '#000';
} else if (theme === 'red') {
document.body.style.backgroundColor = '#000';
} else {
document.body.style.backgroundColor = '#eeeeee';
}
}, [theme]);
<ForwardDot />
const fullScreenAvailable = document.fullscreenEnabled ||
document.mozFullscreenEnabled ||
document.webkitFullscreenEnabled ||
document.msFullscreenEnabled;
return (
<div className={theme}>
<Router>
<div className='container menu'>
<p>
<Link to='/'>QotNews</Link>
<span className='theme'><a href='#' onClick={() => light()}>Light</a> - <a href='#' onClick={() => dark()}>Dark</a> - <a href='#' onClick={() => black()}>Black</a> - <a href='#' onClick={() => red()}>Red</a></span>
<br />
<span className='slogan'>Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode.</span>
</p>
{fullScreenAvailable &&
<Route path='/(|search)' render={() => !isFullScreen ?
<button className='fullscreen' onClick={() => goFullScreen()}>Enter Fullscreen</button>
:
<button className='fullscreen' onClick={() => exitFullScreen()}>Exit Fullscreen</button>
} />
}
<Route path='/(|search)' component={Search} />
<Route path='/(|search)' component={Submit} />
</div>
<Route path='/' exact render={(props) => <Feed {...props} updateCache={updateCache} />} />
<Switch>
<Route path='/search' component={Results} />
<Route path='/:id' exact render={(props) => <Article {...props} cache={cache.current} />} />
</Switch>
<Route path='/:id/c' exact render={(props) => <Comments {...props} cache={cache.current} />} />
<BackwardDot />
<ForwardDot />
<ScrollToTop />
</Router>
</div>
);
<ScrollToTop />
</Router>
</div>
);
}
}
export default App;

View File

@@ -1,228 +1,112 @@
import React, { useState, useEffect } from 'react';
import { useParams } from 'react-router-dom';
import React from 'react';
import { Helmet } from 'react-helmet';
import localForage from 'localforage';
import { sourceLink, infoLine, ToggleDot } from './utils.js';
import Latex from 'react-latex-next';
import 'katex/dist/katex.min.css';
const VOID_ELEMENTS = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr'];
const DANGEROUS_TAGS = ['svg', 'math'];
class Article extends React.Component {
constructor(props) {
super(props);
const latexDelimiters = [
{ left: '$$', right: '$$', display: true },
{ left: '\\[', right: '\\]', display: true },
{ left: '\\(', right: '\\)', display: false }
];
const id = this.props.match ? this.props.match.params.id : 'CLOL';
const cache = this.props.cache;
function Article({ cache }) {
const { id } = useParams();
if (id in cache) console.log('cache hit');
if (id in cache) console.log('cache hit');
this.state = {
story: cache[id] || false,
error: false,
pConv: [],
};
}
componentDidMount() {
const id = this.props.match ? this.props.match.params.id : 'CLOL';
const [story, setStory] = useState(cache[id] || false);
const [error, setError] = useState('');
const [pConv, setPConv] = useState([]);
const [copyButtonText, setCopyButtonText] = useState('\ue92c');
useEffect(() => {
localForage.getItem(id)
.then(
(value) => {
if (value) {
setStory(value);
this.setState({ story: value });
}
}
);
fetch('/api/' + id)
.then(res => {
if (!res.ok) {
throw new Error(`Server responded with ${res.status} ${res.statusText}`);
}
return res.json();
})
.then(res => res.json())
.then(
(result) => {
setStory(result.story);
this.setState({ story: result.story });
localForage.setItem(id, result.story);
},
(error) => {
const errorMessage = `Failed to fetch new article content (ID: ${id}). Your connection may be down or the server might be experiencing issues. ${error.toString()}.`;
setError(errorMessage);
this.setState({ error: true });
}
);
}, [id]);
}
const copyLink = () => {
navigator.clipboard.writeText(`${story.title}:\n${window.location.href}`).then(() => {
setCopyButtonText('\uea10');
setTimeout(() => setCopyButtonText('\ue92c'), 2000);
}, () => {
setCopyButtonText('\uea0f');
setTimeout(() => setCopyButtonText('\ue92c'), 2000);
});
};
pConvert = (n) => {
this.setState({ pConv: [...this.state.pConv, n]});
}
const pConvert = (n) => {
setPConv(prevPConv => [...prevPConv, n]);
};
render() {
const id = this.props.match ? this.props.match.params.id : 'CLOL';
const story = this.state.story;
const error = this.state.error;
const pConv = this.state.pConv;
let nodes = null;
const isCodeBlock = (v) => {
if (v.localName === 'pre') {
return true;
}
if (v.localName === 'code') {
if (v.closest('p')) {
return false;
}
const parent = v.parentElement;
if (parent) {
const nonWhitespaceChildren = Array.from(parent.childNodes).filter(n => {
return n.nodeType !== Node.TEXT_NODE || n.textContent.trim() !== '';
});
if (nonWhitespaceChildren.length === 1 && nonWhitespaceChildren[0] === v) {
return true;
}
}
}
return false;
};
const renderNodes = (nodes, keyPrefix = '') => {
return Array.from(nodes).map((v, k) => {
const key = `${keyPrefix}${k}`;
if (pConv.includes(key)) {
return (
<React.Fragment key={key}>
{v.textContent.split('\n\n').map((x, i) =>
<p key={i}>{x}</p>
)}
</React.Fragment>
);
}
if (v.nodeName === '#text') {
const text = v.data;
if (text.includes('\\[') || text.includes('\\(') || text.includes('$$')) {
return <Latex key={key} delimiters={latexDelimiters}>{text}</Latex>;
}
// Only wrap top-level text nodes in <p>
if (keyPrefix === '' && v.data.trim() !== '') {
return <p key={key}>{v.data}</p>;
}
return v.data;
}
if (v.nodeType !== Node.ELEMENT_NODE) {
return null;
}
if (DANGEROUS_TAGS.includes(v.localName)) {
return <span key={key} dangerouslySetInnerHTML={{ __html: v.outerHTML }} />;
}
const Tag = v.localName;
if (isCodeBlock(v)) {
return (
<React.Fragment key={key}>
<Tag dangerouslySetInnerHTML={{ __html: v.innerHTML }} />
<button onClick={() => pConvert(key)}>Convert Code to Paragraph</button>
</React.Fragment>
);
}
const textContent = v.textContent.trim();
const isMath = (textContent.startsWith('\\(') && textContent.endsWith('\\)')) ||
(textContent.startsWith('\\[') && textContent.endsWith('\\]')) ||
(textContent.startsWith('$$') && textContent.endsWith('$$'));
const props = { key: key };
if (v.hasAttributes()) {
for (const attr of v.attributes) {
const name = attr.name === 'class' ? 'className' : attr.name;
props[name] = attr.value;
}
}
if (isMath) {
let mathContent = v.textContent;
// align environment requires display math mode
if (mathContent.includes('\\begin{align')) {
const trimmed = mathContent.trim();
if (trimmed.startsWith('\\(')) {
// Replace \( and \) with \[ and \] to switch to display mode
const firstParen = mathContent.indexOf('\\(');
const lastParen = mathContent.lastIndexOf('\\)');
mathContent = mathContent.substring(0, firstParen) + '\\[' + mathContent.substring(firstParen + 2, lastParen) + '\\]' + mathContent.substring(lastParen + 2);
}
}
return <Tag {...props}><Latex delimiters={latexDelimiters}>{mathContent}</Latex></Tag>;
}
if (VOID_ELEMENTS.includes(Tag)) {
return <Tag {...props} />;
}
return (
<Tag {...props}>
{renderNodes(v.childNodes, `${key}-`)}
</Tag>
);
});
};
const nodes = (s) => {
if (s && s.text) {
if (story.text) {
let div = document.createElement('div');
div.innerHTML = s.text;
return div.childNodes;
div.innerHTML = story.text;
nodes = div.childNodes;
}
return null;
};
const storyNodes = nodes(story);
return (
<div className='article-container'>
{error && <p>Connection error?</p>}
{story ?
<div className='article'>
<Helmet>
<title>{story.title} - QotNews</title>
</Helmet>
return (
<div className='article-container'>
{error &&
<details style={{marginBottom: '1rem'}}>
<summary>Connection error? Click to expand.</summary>
<p>{error}</p>
{story && <p>Loaded article from cache.</p>}
</details>
}
{story ?
<div className='article'>
<Helmet>
<title>{story.title} | QotNews</title>
<meta name="robots" content="noindex" />
</Helmet>
<h1>{story.title}</h1>
<h1>{story.title} <button className='copy-button' onClick={copyLink}>{copyButtonText}</button></h1>
<div className='info'>
Source: {sourceLink(story)}
</div>
{infoLine(story)}
{storyNodes ?
<div className='story-text'>
{renderNodes(storyNodes)}
<div className='info'>
Source: {sourceLink(story)}
</div>
:
<p>Problem getting article :(</p>
}
</div>
:
<p>Loading...</p>
}
<ToggleDot id={id} article={false} />
</div>
);
{infoLine(story)}
{nodes ?
<div className='story-text'>
{Object.entries(nodes).map(([k, v]) =>
pConv.includes(k) ?
v.innerHTML.split('\n\n').map(x =>
<p dangerouslySetInnerHTML={{ __html: x }} />
)
:
(v.nodeName === '#text' ?
<p>{v.data}</p>
:
<>
<v.localName dangerouslySetInnerHTML={v.innerHTML ? { __html: v.innerHTML } : null} />
{v.localName == 'pre' && <button onClick={() => this.pConvert(k)}>Convert Code to Paragraph</button>}
</>
)
)}
</div>
:
<p>Problem getting article :(</p>
}
</div>
:
<p>loading...</p>
}
<ToggleDot id={id} article={false} />
</div>
);
}
}
export default Article;

View File

@@ -1,80 +1,83 @@
import React, { useState, useEffect } from 'react';
import { Link, useParams } from 'react-router-dom';
import React from 'react';
import { Link } from 'react-router-dom';
import { HashLink } from 'react-router-hash-link';
import { Helmet } from 'react-helmet';
import moment from 'moment';
import localForage from 'localforage';
import { infoLine, ToggleDot } from './utils.js';
function countComments(c) {
return c.comments.reduce((sum, x) => sum + countComments(x), 1);
}
class Article extends React.Component {
constructor(props) {
super(props);
function Comments({ cache }) {
const { id } = useParams();
const id = this.props.match.params.id;
const cache = this.props.cache;
if (id in cache) console.log('cache hit');
if (id in cache) console.log('cache hit');
const [story, setStory] = useState(cache[id] || false);
const [error, setError] = useState('');
const [collapsed, setCollapsed] = useState([]);
const [expanded, setExpanded] = useState([]);
this.state = {
story: cache[id] || false,
error: false,
collapsed: [],
expanded: [],
};
}
componentDidMount() {
const id = this.props.match.params.id;
useEffect(() => {
localForage.getItem(id)
.then(
(value) => {
if (value) {
setStory(value);
}
this.setState({ story: value });
}
);
fetch('/api/' + id)
.then(res => {
if (!res.ok) {
throw new Error(`Server responded with ${res.status} ${res.statusText}`);
}
return res.json();
})
.then(res => res.json())
.then(
(result) => {
setStory(result.story);
this.setState({ story: result.story }, () => {
const hash = window.location.hash.substring(1);
if (hash) {
document.getElementById(hash).scrollIntoView();
}
});
localForage.setItem(id, result.story);
const hash = window.location.hash.substring(1);
if (hash) {
setTimeout(() => {
const element = document.getElementById(hash);
if (element) {
element.scrollIntoView();
}
}, 0);
}
},
(error) => {
const errorMessage = `Failed to fetch comments (ID: ${id}). Your connection may be down or the server might be experiencing issues. ${error.toString()}.`;
setError(errorMessage);
this.setState({ error: true });
}
);
}, [id]);
}
const collapseComment = (cid) => {
setCollapsed(prev => [...prev, cid]);
setExpanded(prev => prev.filter(x => x !== cid));
};
collapseComment(cid) {
this.setState(prevState => ({
...prevState,
collapsed: [...prevState.collapsed, cid],
expanded: prevState.expanded.filter(x => x !== cid),
}));
}
const expandComment = (cid) => {
setCollapsed(prev => prev.filter(x => x !== cid));
setExpanded(prev => [...prev, cid]);
};
expandComment(cid) {
this.setState(prevState => ({
...prevState,
collapsed: prevState.collapsed.filter(x => x !== cid),
expanded: [...prevState.expanded, cid],
}));
}
const displayComment = (story, c, level) => {
countComments(c) {
return c.comments.reduce((sum, x) => sum + this.countComments(x), 1);
}
displayComment(story, c, level) {
const cid = c.author+c.date;
const isCollapsed = collapsed.includes(cid);
const isExpanded = expanded.includes(cid);
const collapsed = this.state.collapsed.includes(cid);
const expanded = this.state.expanded.includes(cid);
const hidden = isCollapsed || (level == 4 && !isExpanded);
const hidden = collapsed || (level == 4 && !expanded);
const hasChildren = c.comments.length !== 0;
return (
@@ -85,56 +88,55 @@ function Comments({ cache }) {
{' '} | <HashLink to={'#'+cid} id={cid}>{moment.unix(c.date).fromNow()}</HashLink>
{hidden || hasChildren &&
<button className='collapser pointer' onClick={() => collapseComment(cid)}></button>
<span className='collapser pointer' onClick={() => this.collapseComment(cid)}></span>
}
</p>
</div>
<div className={isCollapsed ? 'text hidden' : 'text'} dangerouslySetInnerHTML={{ __html: c.text || '<p>[Empty / deleted comment]</p>'}} />
<div className={collapsed ? 'text hidden' : 'text'} dangerouslySetInnerHTML={{ __html: c.text }} />
{hidden && hasChildren ?
<button className='comment lined info pointer' onClick={() => expandComment(cid)}>[show {countComments(c)-1} more]</button>
<div className='comment lined info pointer' onClick={() => this.expandComment(cid)}>[show {this.countComments(c)-1} more]</div>
:
c.comments.map(i => displayComment(story, i, level + 1))
c.comments.map(i => this.displayComment(story, i, level + 1))
}
</div>
);
};
}
return (
<div className='container'>
{error &&
<details style={{marginBottom: '1rem'}}>
<summary>Connection error? Click to expand.</summary>
<p>{error}</p>
{story && <p>Loaded comments from cache.</p>}
</details>
}
{story ?
<div className='article'>
<Helmet>
<title>{story.title} | QotNews</title>
<meta name="robots" content="noindex" />
</Helmet>
render() {
const id = this.props.match.params.id;
const story = this.state.story;
const error = this.state.error;
<h1>{story.title}</h1>
return (
<div className='container'>
{error && <p>Connection error?</p>}
{story ?
<div className='article'>
<Helmet>
<title>{story.title} - QotNews Comments</title>
</Helmet>
<div className='info'>
<Link to={'/' + story.id}>View article</Link>
<h1>{story.title}</h1>
<div className='info'>
<Link to={'/' + story.id}>View article</Link>
</div>
{infoLine(story)}
<div className='comments'>
{story.comments.map(c => this.displayComment(story, c, 0))}
</div>
</div>
{infoLine(story)}
<div className='comments'>
{story.comments.map(c => displayComment(story, c, 0))}
</div>
</div>
:
<p>loading...</p>
}
<ToggleDot id={id} article={true} />
</div>
);
:
<p>loading...</p>
}
<ToggleDot id={id} article={true} />
</div>
);
}
}
export default Comments;
export default Article;

View File

@@ -1,159 +1,89 @@
import React, { useState, useEffect } from 'react';
import React from 'react';
import { Link } from 'react-router-dom';
import { Helmet } from 'react-helmet';
import localForage from 'localforage';
import { sourceLink, infoLine, logos } from './utils.js';
function Feed({ updateCache }) {
const [stories, setStories] = useState(() => JSON.parse(localStorage.getItem('stories')) || false);
const [error, setError] = useState('');
const [loadingStatus, setLoadingStatus] = useState(null);
const [filterSmallweb, setFilterSmallweb] = useState(() => localStorage.getItem('filterSmallweb') === 'true');
class Feed extends React.Component {
constructor(props) {
super(props);
const handleFilterChange = e => {
const isChecked = e.target.checked;
setStories(false);
setFilterSmallweb(isChecked);
localStorage.setItem('filterSmallweb', isChecked);
};
this.state = {
stories: JSON.parse(localStorage.getItem('stories')) || false,
error: false,
};
}
useEffect(() => {
const controller = new AbortController();
fetch(filterSmallweb ? '/api?smallweb=true' : '/api', { signal: controller.signal })
.then(res => {
if (!res.ok) {
throw new Error(`Server responded with ${res.status} ${res.statusText}`);
}
return res.json();
})
componentDidMount() {
fetch('/api')
.then(res => res.json())
.then(
async (result) => {
const newApiStories = result.stories;
(result) => {
const updated = !this.state.stories || this.state.stories[0].id !== result.stories[0].id;
console.log('updated:', updated);
const updated = !stories || !stories.length || stories[0].id !== newApiStories[0].id;
console.log('New stories available:', updated);
const { stories } = result;
this.setState({ stories });
localStorage.setItem('stories', JSON.stringify(stories));
if (!updated) return;
setLoadingStatus({ current: 0, total: newApiStories.length });
let currentStories = Array.isArray(stories) ? [...stories] : [];
let preloadedCount = 0;
for (const [index, newStory] of newApiStories.entries()) {
if (controller.signal.aborted) {
break;
}
try {
const storyFetchController = new AbortController();
const timeoutId = setTimeout(() => storyFetchController.abort(), 10000); // 10-second timeout
const storyRes = await fetch('/api/' + newStory.id, { signal: storyFetchController.signal });
clearTimeout(timeoutId);
if (!storyRes.ok) {
throw new Error(`Server responded with ${storyRes.status} ${storyRes.statusText}`);
}
const storyResult = await storyRes.json();
const fullStory = storyResult.story;
await localForage.setItem(fullStory.id, fullStory);
console.log('Preloaded story:', fullStory.id, fullStory.title);
updateCache(fullStory.id, fullStory);
preloadedCount++;
setLoadingStatus({ current: preloadedCount, total: newApiStories.length });
const existingStoryIndex = currentStories.findIndex(s => s.id === newStory.id);
if (existingStoryIndex > -1) {
currentStories.splice(existingStoryIndex, 1);
}
currentStories.splice(index, 0, newStory);
localStorage.setItem('stories', JSON.stringify(currentStories));
setStories(currentStories);
} catch (error) {
let errorMessage;
if (error.name === 'AbortError') {
errorMessage = `The request to fetch story '${newStory.title}' (${newStory.id}) timed out after 10 seconds. Your connection may be unstable. (${preloadedCount} / ${newApiStories.length} stories preloaded)`;
console.log('Fetch timed out for story:', newStory.id);
} else {
errorMessage = `An error occurred while fetching story '${newStory.title}' (ID: ${newStory.id}): ${error.toString()}. (${preloadedCount} / ${newApiStories.length} stories preloaded)`;
console.log('Fetch failed for story:', newStory.id, error);
}
setError(errorMessage);
break;
}
if (updated) {
localForage.clear();
stories.forEach((x, i) => {
fetch('/api/' + x.id)
.then(res => res.json())
.then(({ story }) => {
localForage.setItem(x.id, story)
.then(console.log('preloaded', x.id, x.title));
this.props.updateCache(x.id, story);
}, error => { }
);
});
}
const finalStories = currentStories.slice(0, newApiStories.length);
const removedStories = currentStories.slice(newApiStories.length);
for (const story of removedStories) {
console.log('Removed story:', story.id, story.title);
localForage.removeItem(story.id);
}
localStorage.setItem('stories', JSON.stringify(finalStories));
setStories(finalStories);
setLoadingStatus(null);
},
(error) => {
if (error.name === 'AbortError') {
console.log('Feed fetch aborted.');
return;
}
const errorMessage = `Failed to fetch the main story list from the API. Your connection may be down or the server might be experiencing issues. ${error.toString()}.`;
setError(errorMessage);
this.setState({ error: true });
}
);
}
return () => controller.abort();
}, [updateCache, filterSmallweb]);
render() {
const stories = this.state.stories;
const error = this.state.error;
return (
<div className='container'>
<Helmet>
<title>QotNews</title>
<meta name="robots" content="index" />
</Helmet>
if (stories) {
stories.sort((a, b) => b.date - a.date);
}
<div style={{marginBottom: '1rem'}}>
<input type="checkbox" id="filter-smallweb" className="checkbox" checked={filterSmallweb} onChange={handleFilterChange} />
<label htmlFor="filter-smallweb">Only Smallweb</label>
</div>
return (
<div className='container'>
<Helmet>
<title>Feed - QotNews</title>
</Helmet>
{error && <p>Connection error?</p>}
{stories ?
<div>
{stories.map((x, i) =>
<div className='item' key={i}>
<div className='title'>
<Link className='link' to={'/' + x.id}>
<img className='source-logo' src={logos[x.source]} alt='source logo' /> {x.title}
</Link>
{error &&
<details style={{marginBottom: '1rem'}}>
<summary>Connection error? Click to expand.</summary>
<p>{error}</p>
{stories && <p>Loaded feed from cache.</p>}
</details>
}
<span className='source'>
&#8203;({sourceLink(x)})
</span>
</div>
{stories ?
<div>
{stories.map(x =>
<div className='item' key={x.id}>
<div className='title'>
<Link className='link' to={'/' + x.id}>
<img className='source-logo' src={logos[x.source]} alt='source logo' /> {x.title}
</Link>
<span className='source'>
({sourceLink(x)})
</span>
{infoLine(x)}
</div>
{infoLine(x)}
</div>
)}
</div>
:
<p>Loading...</p>
}
{loadingStatus && <p>Preloading stories {loadingStatus.current} / {loadingStatus.total}...</p>}
</div>
);
)}
</div>
:
<p>loading...</p>
}
</div>
);
}
}
export default Feed;

View File

@@ -1,73 +1,95 @@
import React, { useState, useEffect } from 'react';
import { Link, useLocation } from 'react-router-dom';
import React from 'react';
import { Link } from 'react-router-dom';
import { Helmet } from 'react-helmet';
import { sourceLink, infoLine, logos } from './utils.js';
import AbortController from 'abort-controller';
function Results() {
const [stories, setStories] = useState(false);
const [error, setError] = useState(false);
const location = useLocation();
class Results extends React.Component {
constructor(props) {
super(props);
useEffect(() => {
const controller = new AbortController();
const signal = controller.signal;
this.state = {
stories: false,
error: false,
};
const search = location.search;
this.controller = null;
}
performSearch = () => {
if (this.controller) {
this.controller.abort();
}
this.controller = new AbortController();
const signal = this.controller.signal;
const search = this.props.location.search;
fetch('/api/search' + search, { method: 'get', signal: signal })
.then(res => res.json())
.then(
(result) => {
setStories(result.hits);
this.setState({ stories: result.results });
},
(error) => {
if (error.message !== 'The operation was aborted. ') {
setError(true);
this.setState({ error: true });
}
}
);
}
return () => {
controller.abort();
};
}, [location.search]);
componentDidMount() {
this.performSearch();
}
return (
<div className='container'>
<Helmet>
<title>Search Results | QotNews</title>
</Helmet>
{error && <p>Connection error?</p>}
{stories ?
<>
<p>Search results:</p>
<div className='comment lined'>
{stories.length ?
stories.map(x =>
<div className='item' key={x.id}>
<div className='title'>
<Link className='link' to={'/' + x.id}>
<img className='source-logo' src={logos[x.source]} alt='source logo' /> {x.title}
</Link>
componentDidUpdate(prevProps) {
if (this.props.location.search !== prevProps.location.search) {
this.performSearch();
}
}
<span className='source'>
({sourceLink(x)})
</span>
render() {
const stories = this.state.stories;
const error = this.state.error;
return (
<div className='container'>
<Helmet>
<title>Feed - QotNews</title>
</Helmet>
{error && <p>Connection error?</p>}
{stories ?
<>
<p>Search results:</p>
<div className='comment lined'>
{stories.length ?
stories.map((x, i) =>
<div className='item' key={i}>
<div className='title'>
<Link className='link' to={'/' + x.id}>
<img className='source-logo' src={logos[x.source]} alt='source logo' /> {x.title}
</Link>
<span className='source'>
&#8203;({sourceLink(x)})
</span>
</div>
{infoLine(x)}
</div>
{infoLine(x)}
</div>
)
:
<p>none</p>
}
</div>
</>
:
<p>loading...</p>
}
</div>
);
)
:
<p>none</p>
}
</div>
</>
:
<p>loading...</p>
}
</div>
);
}
}
export default Results;

View File

@@ -15,7 +15,6 @@ class ScrollToTop extends React.Component {
}
window.scrollTo(0, 0);
document.body.scrollTop = 0;
}
render() {

View File

@@ -1,46 +1,51 @@
import React, { useState, useRef } from 'react';
import { useHistory, useLocation } from 'react-router-dom';
import React, { Component } from 'react';
import { withRouter } from 'react-router-dom';
import queryString from 'query-string';
const getSearch = location => queryString.parse(location.search).q || '';
const getSearch = props => queryString.parse(props.location.search).q;
function Search() {
const history = useHistory();
const location = useLocation();
class Search extends Component {
constructor(props) {
super(props);
const [search, setSearch] = useState(getSearch(location));
const inputRef = useRef(null);
this.state = {search: getSearch(this.props)};
this.inputRef = React.createRef();
}
const searchArticles = (event) => {
const newSearch = event.target.value;
setSearch(newSearch);
if (newSearch.length >= 3) {
const searchQuery = queryString.stringify({ 'q': newSearch });
history.replace('/search?' + searchQuery);
searchArticles = (event) => {
const search = event.target.value;
this.setState({search: search});
if (search.length >= 3) {
const searchQuery = queryString.stringify({ 'q': search });
this.props.history.replace('/search?' + searchQuery);
} else {
history.replace('/');
this.props.history.replace('/');
}
}
const searchAgain = (event) => {
searchAgain = (event) => {
event.preventDefault();
const searchString = queryString.stringify({ 'q': event.target[0].value });
history.push('/search?' + searchString);
inputRef.current.blur();
this.props.history.push('/search?' + searchString);
this.inputRef.current.blur();
}
return (
<span className='search'>
<form onSubmit={searchAgain}>
<input
placeholder='Search...'
value={search}
onChange={searchArticles}
ref={inputRef}
/>
</form>
</span>
);
render() {
const search = this.state.search;
return (
<span className='search'>
<form onSubmit={this.searchAgain}>
<input
placeholder='Search... (fixed)'
value={search}
onChange={this.searchArticles}
ref={this.inputRef}
/>
</form>
</span>
);
}
}
export default Search;
export default withRouter(Search);

View File

@@ -1,77 +0,0 @@
.black {
color: #ddd;
}
.black a {
color: #ddd;
}
.black input {
color: #ddd;
border: 1px solid #828282;
}
.black .menu button,
.black .story-text button {
background-color: #444444;
border-color: #bbb;
color: #ddd;
}
.black .item {
color: #828282;
}
.black .item .source-logo {
filter: grayscale(1);
}
.black .item a {
color: #828282;
}
.black .item a.link {
color: #ddd;
}
.black .item a.link:visited {
color: #828282;
}
.black .item .info a.hot {
color: #cccccc;
}
.black .article a {
border-bottom: 1px solid #aaaaaa;
}
.black .article u {
border-bottom: 1px solid #aaaaaa;
text-decoration: none;
}
.black .story-text video,
.black .story-text img {
filter: brightness(50%);
}
.black .article .info {
color: #828282;
}
.black .article .info a {
border-bottom: none;
color: #828282;
}
.black .comment.lined {
border-left: 1px solid #444444;
}
.black .checkbox:checked + label::after {
border-color: #ddd;
}
.black .copy-button {
color: #828282;
}

View File

@@ -11,17 +11,14 @@
border: 1px solid #828282;
}
.dark .menu button,
.dark .story-text button {
background-color: #444444;
border-color: #bbb;
color: #ddd;
}
.dark .item {
color: #828282;
}
.dark .item .source-logo {
filter: grayscale(1);
}
.dark .item a {
color: #828282;
}
@@ -46,7 +43,6 @@
text-decoration: none;
}
.dark .story-text video,
.dark .story-text img {
filter: brightness(50%);
}
@@ -63,11 +59,3 @@
.dark .comment.lined {
border-left: 1px solid #444444;
}
.dark .checkbox:checked + label::after {
border-color: #ddd;
}
.dark .copy-button {
color: #828282;
}

View File

@@ -2,30 +2,9 @@ body {
text-rendering: optimizeLegibility;
font: 1rem/1.3 sans-serif;
color: #000000;
margin-bottom: 100vh;
word-break: break-word;
font-kerning: normal;
margin: 0;
}
::backdrop {
background-color: rgba(0,0,0,0);
}
body:fullscreen {
overflow-y: scroll !important;
}
body:-ms-fullscreen {
overflow-y: scroll !important;
}
body:-webkit-full-screen {
overflow-y: scroll !important;
}
body:-moz-full-screen {
overflow-y: scroll !important;
}
#root {
margin: 8px 8px 100vh 8px !important;
}
a {
@@ -43,21 +22,10 @@ input {
border-radius: 4px;
}
.fullscreen {
margin: 0.25rem;
padding: 0.25rem;
}
pre {
overflow: auto;
}
.comments pre {
overflow: auto;
white-space: pre-wrap;
overflow-wrap: break-word;
}
.container {
margin: 1rem auto;
max-width: 64rem;
@@ -126,13 +94,6 @@ span.source {
border-bottom: 1px solid #222222;
}
.article-title {
display: flex;
align-items: center;
margin-top: 0.67em;
margin-bottom: 0.67em;
}
.article h1 {
font-size: 1.6rem;
}
@@ -189,13 +150,6 @@ span.source {
.comments {
margin-left: -1.25rem;
margin-top: 0;
margin-bottom: 0;
padding: 0;
}
.comments dl, .comments dd {
margin: 0;
}
.comment {
@@ -208,11 +162,6 @@ span.source {
.comment .text {
margin-top: -0.5rem;
margin-bottom: 1rem;
}
.comment .text > * {
margin-bottom: 0;
}
.comment .text.hidden > p {
@@ -232,49 +181,20 @@ span.source {
padding-right: 1.5rem;
}
button.collapser {
background: transparent;
border: none;
margin: 0;
padding-top: 0;
padding-bottom: 0;
font: inherit;
color: inherit;
}
button.comment {
background: transparent;
border-top: none;
border-right: none;
border-bottom: none;
margin: 0;
padding-top: 0;
padding-right: 0;
padding-bottom: 0;
font: inherit;
color: inherit;
text-align: left;
width: 100%;
}
.comment .pointer {
cursor: pointer;
}
.dot {
cursor: pointer;
.toggleDot {
position: fixed;
bottom: 1rem;
left: 1rem;
height: 3rem;
width: 3rem;
background-color: #828282;
border-radius: 50%;
}
.toggleDot {
bottom: 1rem;
left: 1rem;
}
.toggleDot .button {
font: 2rem/1 'icomoon';
position: relative;
@@ -283,79 +203,23 @@ button.comment {
}
.forwardDot {
cursor: pointer;
position: fixed;
bottom: 1rem;
right: 1rem;
height: 3rem;
width: 3rem;
background-color: #828282;
border-radius: 50%;
}
.forwardDot .button {
font: 2rem/1 'icomoon';
font: 2.5rem/1 'icomoon';
position: relative;
top: 0.5rem;
left: 0.5rem;
}
.backwardDot {
bottom: 1rem;
right: 5rem;
}
.backwardDot .button {
font: 2rem/1 'icomoon';
position: relative;
top: 0.5rem;
left: 0.5rem;
top: 0.25rem;
left: 0.3rem;
}
.search form {
display: inline;
}
.copy-button {
font: 1.5rem/1 'icomoon2';
color: #828282;
background: transparent;
border: none;
cursor: pointer;
vertical-align: middle;
}
.checkbox {
-webkit-appearance: none;
appearance: none;
position: absolute;
opacity: 0;
cursor: pointer;
height: 0;
width: 0;
}
.checkbox + label {
position: relative;
cursor: pointer;
padding-left: 1.75rem;
user-select: none;
}
.checkbox + label::before {
content: '';
position: absolute;
left: 0;
top: 0.1em;
width: 1rem;
height: 1rem;
border: 1px solid #828282;
background-color: transparent;
border-radius: 3px;
}
.checkbox:checked + label::after {
content: "";
position: absolute;
left: 0.35rem;
top: 0.2em;
width: 0.3rem;
height: 0.6rem;
border: solid #000;
border-width: 0 2px 2px 0;
transform: rotate(45deg);
}

View File

@@ -1,95 +0,0 @@
.red {
color: #b00;
scrollbar-color: #b00 #440000;
}
.red a {
color: #b00;
}
.red input {
color: #b00;
border: 1px solid #690000;
}
.red input::placeholder {
color: #690000;
}
.red hr {
background-color: #690000;
}
.red .menu button,
.red .story-text button {
background-color: #440000;
border-color: #b00;
color: #b00;
}
.red .item,
.red .slogan {
color: #690000;
}
.red .item .source-logo {
display: none;
}
.red .item a {
color: #690000;
}
.red .item a.link {
color: #b00;
}
.red .item a.link:visited {
color: #690000;
}
.red .item .info a.hot {
color: #cc0000;
}
.red .article a {
border-bottom: 1px solid #aa0000;
}
.red .article u {
border-bottom: 1px solid #aa0000;
text-decoration: none;
}
.red .story-text video,
.red .story-text img {
filter: grayscale(100%) brightness(20%) sepia(100%) hue-rotate(-50deg) saturate(600%) contrast(0.8);
}
.red .article .info {
color: #690000;
}
.red .article .info a {
border-bottom: none;
color: #690000;
}
.red .comment.lined {
border-left: 1px solid #440000;
}
.red .dot {
background-color: #440000;
}
.red .checkbox + label::before {
border: 1px solid #690000;
}
.red .checkbox:checked + label::after {
border-color: #aa0000;
}
.red .copy-button {
color: #690000;
}

View File

@@ -1,53 +1,54 @@
import React, { useState, useRef } from 'react';
import { useHistory } from 'react-router-dom';
import React, { Component } from 'react';
import { withRouter } from 'react-router-dom';
function Submit() {
const [progress, setProgress] = useState(null);
const inputRef = useRef(null);
const history = useHistory();
class Submit extends Component {
constructor(props) {
super(props);
const submitArticle = async (event) => {
this.state = {
progress: null,
};
this.inputRef = React.createRef();
}
submitArticle = (event) => {
event.preventDefault();
const url = event.target[0].value;
inputRef.current.blur();
this.inputRef.current.blur();
setProgress('Submitting...');
this.setState({ progress: 'Submitting...' });
let data = new FormData();
data.append('url', url);
try {
const res = await fetch('/api/submit', { method: 'POST', body: data });
if (res.ok) {
const result = await res.json();
history.replace('/' + result.nid);
} else {
let errorData;
try {
errorData = await res.json();
} catch (jsonError) {
// Not a JSON error from our API, so it's a server issue
throw new Error(`Server responded with ${res.status} ${res.statusText}`);
fetch('/api/submit', { method: 'POST', body: data })
.then(res => res.json())
.then(
(result) => {
this.props.history.replace('/' + result.nid);
},
(error) => {
this.setState({ progress: 'Error' });
}
setProgress(errorData.error || 'An unknown error occurred.');
}
} catch (error) {
setProgress(`Error: ${error.toString()}`);
}
);
}
return (
<span className='search'>
<form onSubmit={submitArticle}>
<input
placeholder='Submit URL'
ref={inputRef}
/>
</form>
{progress && <p>{progress}</p>}
</span>
);
render() {
const progress = this.state.progress;
return (
<span className='search'>
<form onSubmit={this.submitArticle}>
<input
placeholder='Submit Article'
ref={this.inputRef}
/>
</form>
{progress ? progress : ''}
</span>
);
}
}
export default Submit;
export default withRouter(Submit);

View File

@@ -26,8 +26,3 @@
font-family: 'Icomoon';
src: url('icomoon.ttf') format('truetype');
}
@font-face {
font-family: 'Icomoon2';
src: url('icomoon2.ttf') format('truetype');
}

Binary file not shown.

Binary file not shown.

View File

@@ -8,4 +8,4 @@ ReactDOM.render(<App />, document.getElementById('root'));
// If you want your app to work offline and load faster, you can change
// // unregister() to register() below. Note this comes with some pitfalls.
// // Learn more about service workers: https://bit.ly/CRA-PWA
serviceWorker.unregister();
serviceWorker.register();

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff