Compare commits

...

47 Commits

Author SHA1 Message Date
249a616531 Alert on story update error 2024-03-16 20:41:24 +00:00
ab92bd5441 Adjust score and comment thresholds 2024-03-08 03:08:18 +00:00
6b16a768a7 Fix deletion script 2024-03-08 03:08:03 +00:00
57de076fec Increase database timeout 2024-02-27 18:48:56 +00:00
074b898508 Fix lobsters comment parsing 2024-02-27 18:47:00 +00:00
f049d194ab Move scripts into own folder 2024-02-27 18:32:29 +00:00
c2b9a1cb7a Update readability 2024-02-27 18:32:19 +00:00
4435f49e17 Make "dark" theme grey, add "black" theme 2023-09-13 01:19:47 +00:00
494d89ac30 Disable lobsters 2023-09-13 01:02:15 +00:00
e79fca6ecc Replace "indent_level" with "depth" in lobsters API
See:
fe09e5aa31
2023-08-31 07:35:44 +00:00
c65fb69092 Handle Lobsters comment parsing TypeErrors
Too lazy to debug this:

2023-08-29 12:56:35,111 - root - INFO - Updating lobsters story: yktkwr, index: 55
Traceback (most recent call last):
  File "src/gevent/greenlet.py", line 854, in gevent._gevent_cgreenlet.Greenlet.run
  File "/home/tanner/qotnews/apiserver/server.py", line 194, in feed_thread
    valid = feed.update_story(story)
  File "/home/tanner/qotnews/apiserver/feed.py", line 74, in update_story
    res = lobsters.story(story['ref'])
  File "/home/tanner/qotnews/apiserver/feeds/lobsters.py", line 103, in story
    s['comments'] = iter_comments(r['comments'])
  File "/home/tanner/qotnews/apiserver/feeds/lobsters.py", line 76, in iter_comments
    parent_stack = parent_stack[:indent-1]
TypeError: unsupported operand type(s) for -: 'NoneType' and 'int'
2023-08-29T12:56:35Z <Greenlet at 0x7f92ad840ae0: feed_thread> failed with TypeError
2023-08-31 07:30:39 +00:00
632d028e4c Add Tildes group whitelist 2023-07-13 22:54:36 +00:00
ea8e9e5a23 Increase again 2023-06-13 17:11:50 +00:00
2838ea9b41 Increase Tildes story score requirement 2023-06-11 01:01:31 +00:00
f15d108971 Catch all possible Reddit API exceptions 2023-03-15 21:16:37 +00:00
f777348af8 Fix darkmode fullscreen button color 2022-08-11 19:36:36 +00:00
486404a413 Fix fix-stories bug 2022-08-10 04:06:39 +00:00
7c9c07a4cf Hide fullscreen button if it's not available 2022-08-10 04:05:25 +00:00
08d02f6013 Add fullscreen mode 2022-08-08 23:21:49 +00:00
1b54342702 Add red theme 2022-08-08 20:14:57 +00:00
9e9571a3c0 Write fixed stories to database 2022-07-05 00:57:56 +00:00
dc83a70887 Begin script to fix bad gzip text 2022-07-04 20:32:01 +00:00
2e2c9ae837 Move FEED_LENGTH to settings.py, use for search results 2022-07-04 19:08:24 +00:00
61021d8f91 Small UI changes 2022-07-04 19:08:24 +00:00
e65047fead Add accept gzip header to readability server 2022-07-04 19:07:31 +00:00
8e775c189f Add test file 2022-07-04 05:56:06 +00:00
3d9274309a Fix requests text encoding slowness 2022-07-04 05:55:52 +00:00
7bdbbf10b2 Return search results directly from the server 2022-07-04 04:33:01 +00:00
6aa0f78536 Remove Article / Comments, etc thing after name 2022-07-04 04:33:01 +00:00
bf3663bbec Remove hard-coded title 2022-06-30 00:12:22 +00:00
e6589dc61c Adjust title 2022-06-30 00:05:15 +00:00
307e8349f3 Change header based on page 2022-06-30 00:00:30 +00:00
04cd56daa8 Add index / noindex to client 2022-06-29 23:30:39 +00:00
c80769def6 Add noindex meta tag to stories 2022-06-29 23:20:53 +00:00
ebd1ad2140 Increase database timeout 2022-06-24 20:50:27 +00:00
2cc7dd0d6d Update software 2022-05-31 04:24:12 +00:00
6e7cb86d2e Explain no javascript 2022-05-31 04:23:52 +00:00
a25457254f Improve logging, sends tweets to nitter.net 2022-03-05 23:48:46 +00:00
a693ea5342 Remove outline API 2022-03-05 22:05:29 +00:00
7386e1d8b0 Include option to disable readerserver 2022-03-05 22:04:25 +00:00
f8e8597e3a Include option to disable search 2022-03-05 21:58:35 +00:00
55c282ee69 Fix search to work with low-RAM server 2022-03-05 21:33:07 +00:00
3f774a9e38 Improve logging 2021-09-06 00:21:05 +00:00
dcedd4caa1 Add script to reindex search, abstract search API 2021-09-06 00:20:21 +00:00
7a131ebd03 Change the order by which content-type is grabbed 2021-01-30 06:36:02 +00:00
6f64401785 Add optional skip and limit to API route 2021-01-18 03:59:33 +00:00
3ff917e806 Remove colons from date string so Python 3.5 can parse 2020-12-15 23:19:50 +00:00
33 changed files with 953 additions and 502 deletions

View File

@ -5,7 +5,7 @@ from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy.exc import IntegrityError
engine = create_engine('sqlite:///data/qotnews.sqlite', connect_args={'timeout': 120})
engine = create_engine('sqlite:///data/qotnews.sqlite', connect_args={'timeout': 360})
Session = sessionmaker(bind=engine)
Base = declarative_base()
@ -68,12 +68,13 @@ def get_reflist(amount):
q = session.query(Reflist).order_by(Reflist.rid.desc()).limit(amount)
return [dict(ref=x.ref, sid=x.sid, source=x.source) for x in q.all()]
def get_stories(amount):
def get_stories(amount, skip=0):
session = Session()
q = session.query(Reflist, Story.meta_json).\
order_by(Reflist.rid.desc()).\
join(Story).\
filter(Story.title != None).\
offset(skip).\
limit(amount)
return [x[1] for x in q]
@ -100,7 +101,22 @@ def del_ref(ref):
finally:
session.close()
def count_stories():
try:
session = Session()
return session.query(Story).count()
finally:
session.close()
def get_story_list():
try:
session = Session()
return session.query(Story.sid).all()
finally:
session.close()
if __name__ == '__main__':
init()
print(get_story_by_ref('hgi3sy'))
#print(get_story_by_ref('hgi3sy'))
print(len(get_reflist(99999)))

View File

@ -10,9 +10,6 @@ from bs4 import BeautifulSoup
import settings
from feeds import hackernews, reddit, tildes, manual, lobsters
OUTLINE_API = 'https://api.outline.com/v3/parse_article'
READ_API = 'http://127.0.0.1:33843'
INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com', 'sec.gov']
TWO_DAYS = 60*60*24*2
@ -33,29 +30,16 @@ def list():
return feed
def get_article(url):
try:
params = {'source_url': url}
headers = {'Referer': 'https://outline.com/'}
r = requests.get(OUTLINE_API, params=params, headers=headers, timeout=20)
if r.status_code == 429:
logging.info('Rate limited by outline, sleeping 30s and skipping...')
time.sleep(30)
if not settings.READER_URL:
logging.info('Readerserver not configured, aborting.')
return ''
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
html = r.json()['data']['html']
if 'URL is not supported by Outline' in html:
raise Exception('URL not supported by Outline')
return html
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem outlining article: {}'.format(str(e)))
logging.info('Trying our server instead...')
if url.startswith('https://twitter.com'):
logging.info('Replacing twitter.com url with nitter.net')
url = url.replace('twitter.com', 'nitter.net')
try:
r = requests.post(READ_API, data=dict(url=url), timeout=20)
r = requests.post(settings.READER_URL, data=dict(url=url), timeout=20)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.text
@ -66,24 +50,25 @@ def get_article(url):
return ''
def get_content_type(url):
try:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'}
return requests.get(url, headers=headers, timeout=5).headers['content-type']
except:
return ''
try:
headers = {
'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
'X-Forwarded-For': '66.249.66.1',
}
return requests.get(url, headers=headers, timeout=5).headers['content-type']
except:
pass
try:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'}
return requests.get(url, headers=headers, timeout=10).headers['content-type']
except:
return ''
pass
def update_story(story, is_manual=False):
res = {}
try:
if story['source'] == 'hackernews':
res = hackernews.story(story['ref'])
elif story['source'] == 'lobsters':
@ -94,6 +79,10 @@ def update_story(story, is_manual=False):
res = tildes.story(story['ref'])
elif story['source'] == 'manual':
res = manual.story(story['ref'])
except BaseException as e:
utils.alert_tanner('Problem updating {} story, ref {}: {}'.format(story['source'], story['ref'], str(e)))
logging.exception(e)
return False
if res:
story.update(res) # join dicts
@ -102,7 +91,7 @@ def update_story(story, is_manual=False):
return False
if story['date'] and not is_manual and story['date'] + TWO_DAYS < time.time():
logging.info('Story too old, removing')
logging.info('Story too old, removing. Date: {}'.format(story['date']))
return False
if story.get('url', '') and not story.get('text', ''):
@ -133,7 +122,7 @@ if __name__ == '__main__':
#print(get_article('https://www.bloomberg.com/news/articles/2019-09-23/xi-s-communists-under-pressure-as-high-prices-hit-china-workers'))
a = get_article('https://blog.joinmastodon.org/2019/10/mastodon-3.0/')
a = get_content_type('https://tefkos.comminfo.rutgers.edu/Courses/e530/Readings/Beal%202008%20full%20text%20searching.pdf')
print(a)
print('done')

View File

@ -61,11 +61,15 @@ def comment_count(i):
def story(ref):
r = api(API_ITEM, ref)
if not r: return False
if not r:
logging.info('Bad Hackernews API response.')
return False
if 'deleted' in r:
logging.info('Story was deleted.')
return False
elif r.get('type', '') != 'story':
logging.info('Type "{}" is not "story".'.format(r.get('type', '')))
return False
s = {}
@ -80,6 +84,10 @@ def story(ref):
s['comments'] = list(filter(bool, s['comments']))
s['num_comments'] = comment_count(s) - 1
if s['score'] < 25 and s['num_comments'] < 10:
logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
return False
if 'text' in r and r['text']:
s['text'] = clean(r['text'] or '')

View File

@ -44,7 +44,8 @@ def feed():
return [x['short_id'] for x in api(API_HOTTEST) or []]
def unix(date_str):
return int(datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S.%f%z').timestamp())
date_str = date_str.replace(':', '')
return int(datetime.strptime(date_str, '%Y-%m-%dT%H%M%S.%f%z').timestamp())
def make_comment(i):
c = {}
@ -66,13 +67,13 @@ def iter_comments(flat_comments):
parent_stack = []
for comment in flat_comments:
c = make_comment(comment)
indent = comment['indent_level']
indent = comment['depth']
if indent == 1:
if indent == 0:
nested_comments.append(c)
parent_stack = [c]
else:
parent_stack = parent_stack[:indent-1]
parent_stack = parent_stack[:indent]
p = parent_stack[-1]
p['comments'].append(c)
parent_stack.append(c)
@ -80,11 +81,13 @@ def iter_comments(flat_comments):
def story(ref):
r = api(API_ITEM, ref)
if not r: return False
if not r:
logging.info('Bad Lobsters API response.')
return False
s = {}
try:
s['author'] = r['submitter_user']['username']
s['author'] = r['submitter_user']
s['author_link'] = SITE_AUTHOR_LINK(s['author'])
except KeyError:
s['author'] = ''
@ -100,6 +103,10 @@ def story(ref):
s['comments'] = iter_comments(r['comments'])
s['num_comments'] = r['comment_count']
if s['score'] < 15 and s['num_comments'] < 10:
logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
return False
if 'description' in r and r['description']:
s['text'] = clean(r['description'] or '')
@ -109,5 +116,5 @@ def story(ref):
if __name__ == '__main__':
#print(feed())
import json
print(json.dumps(story('fzvd1v')))
#print(story(20802050))
print(json.dumps(story('fzvd1v'), indent=4))
#print(json.dumps(story('ixyv5u'), indent=4))

View File

@ -27,7 +27,9 @@ def api(route):
def story(ref):
html = api(ref)
if not html: return False
if not html:
logging.info('Bad http GET response.')
return False
soup = BeautifulSoup(html, features='html.parser')

View File

@ -32,11 +32,8 @@ def feed():
return [x.id for x in reddit.subreddit(subs).hot()]
except KeyboardInterrupt:
raise
except PRAWException as e:
logging.error('Problem hitting reddit API: {}'.format(str(e)))
return []
except PrawcoreException as e:
logging.error('Problem hitting reddit API: {}'.format(str(e)))
except BaseException as e:
logging.critical('Problem hitting reddit API: {}'.format(str(e)))
return []
def comment(i):
@ -59,7 +56,9 @@ def comment(i):
def story(ref):
try:
r = reddit.submission(ref)
if not r: return False
if not r:
logging.info('Bad Reddit API response.')
return False
s = {}
s['author'] = r.author.name if r.author else '[Deleted]'
@ -74,6 +73,7 @@ def story(ref):
s['num_comments'] = r.num_comments
if s['score'] < 25 and s['num_comments'] < 10:
logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
return False
if r.selftext:
@ -84,10 +84,10 @@ def story(ref):
except KeyboardInterrupt:
raise
except PRAWException as e:
logging.error('Problem hitting reddit API: {}'.format(str(e)))
logging.critical('Problem hitting reddit API: {}'.format(str(e)))
return False
except PrawcoreException as e:
logging.error('Problem hitting reddit API: {}'.format(str(e)))
logging.critical('Problem hitting reddit API: {}'.format(str(e)))
return False
# scratchpad so I can quickly develop the parser

View File

@ -34,7 +34,7 @@ def api(route):
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting tildes website: {}'.format(str(e)))
logging.critical('Problem hitting tildes website: {}'.format(str(e)))
return False
def feed():
@ -71,11 +71,15 @@ def story(ref):
html = api(SITE_LINK(group_lookup[ref], ref))
else:
html = api(API_ITEM(ref))
if not html: return False
if not html:
logging.info('Bad Tildes API response.')
return False
soup = BeautifulSoup(html, features='html.parser')
a = soup.find('article', class_='topic-full')
if a is None: return False
if a is None:
logging.info('Tildes <article> element not found.')
return False
h = a.find('header')
lu = h.find('a', class_='link-user')
@ -83,6 +87,7 @@ def story(ref):
error = a.find('div', class_='text-error')
if error:
if 'deleted' in error.string or 'removed' in error.string:
logging.info('Article was deleted or removed.')
return False
s = {}
@ -102,7 +107,21 @@ def story(ref):
ch = a.find('header', class_='topic-comments-header')
s['num_comments'] = int(ch.h2.string.split(' ')[0]) if ch else 0
if s['score'] < 8 and s['num_comments'] < 6:
if s['group'].split('.')[0] not in [
'~arts',
'~comp',
'~creative',
'~design',
'~engineering',
'~finance',
'~science',
'~tech',
]:
logging.info('Group ({}) not in whitelist.'.format(s['group']))
return False
if s['score'] < 15 and s['num_comments'] < 10:
logging.info('Score ({}) or num comments ({}) below threshold.'.format(s['score'], s['num_comments']))
return False
td = a.find('div', class_='topic-full-text')
@ -113,7 +132,7 @@ def story(ref):
# scratchpad so I can quickly develop the parser
if __name__ == '__main__':
#print(feed())
print(feed())
#normal = story('gxt')
#print(normal)
#no_comments = story('gxr')
@ -122,8 +141,8 @@ if __name__ == '__main__':
#print(self_post)
#li_comment = story('gqx')
#print(li_comment)
broken = story('q4y')
print(broken)
#broken = story('q4y')
#print(broken)
# make sure there's no self-reference
#import copy

View File

@ -1,6 +1,8 @@
import database
import search
import sys
import settings
import logging
import json
import requests
@ -21,7 +23,7 @@ def database_del_story(sid):
def search_del_story(sid):
try:
r = requests.delete(search.MEILI_URL + 'indexes/qotnews/documents/'+sid, timeout=2)
r = requests.delete(settings.MEILI_URL + 'indexes/qotnews/documents/'+sid, timeout=2)
if r.status_code != 202:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()

View File

@ -0,0 +1,58 @@
import time
import json
import logging
import feed
import database
import search
database.init()
def fix_gzip_bug(story_list):
FIX_THRESHOLD = 150
count = 1
for sid in story_list:
try:
sid = sid[0]
story = database.get_story(sid)
full_json = json.loads(story.full_json)
meta_json = json.loads(story.meta_json)
text = full_json.get('text', '')
count = text.count('<EFBFBD>')
if not count: continue
ratio = count / len(text) * 1000
print('Bad story:', sid, 'Num ?:', count, 'Ratio:', ratio)
if ratio < FIX_THRESHOLD: continue
print('Attempting to fix...')
valid = feed.update_story(meta_json, is_manual=True)
if valid:
database.put_story(meta_json)
search.put_story(meta_json)
print('Success')
else:
print('Story was not valid')
time.sleep(3)
except KeyboardInterrupt:
raise
except BaseException as e:
logging.exception(e)
breakpoint()
if __name__ == '__main__':
num_stories = database.count_stories()
print('Fix {} stories?'.format(num_stories))
print('Press ENTER to continue, ctrl-c to cancel')
input()
story_list = database.get_story_list()
fix_gzip_bug(story_list)

View File

@ -0,0 +1,62 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.INFO)
import database
from sqlalchemy import select
import search
import sys
import time
import json
import requests
database.init()
search.init()
BATCH_SIZE = 5000
def put_stories(stories):
return search.meili_api(requests.post, 'indexes/qotnews/documents', stories)
def get_update(update_id):
return search.meili_api(requests.get, 'tasks/{}'.format(update_id))
if __name__ == '__main__':
num_stories = database.count_stories()
print('Reindex {} stories?'.format(num_stories))
print('Press ENTER to continue, ctrl-c to cancel')
input()
story_list = database.get_story_list()
count = 1
while len(story_list):
stories = []
for _ in range(BATCH_SIZE):
try:
sid = story_list.pop()
except IndexError:
break
story = database.get_story(sid)
print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title))
story_obj = json.loads(story.meta_json)
stories.append(story_obj)
count += 1
res = put_stories(stories)
update_id = res['uid']
print('Waiting for processing', end='')
while get_update(update_id)['status'] != 'succeeded':
time.sleep(0.5)
print('.', end='', flush=True)
print()
print('Done.')

View File

@ -0,0 +1,23 @@
import time
import requests
def test_search_api():
num_tests = 100
total_time = 0
for i in range(num_tests):
start = time.time()
res = requests.get('http://127.0.0.1:33842/api/search?q=iphone')
res.raise_for_status()
duration = time.time() - start
total_time += duration
avg_time = total_time / num_tests
print('Average search time:', avg_time)
if __name__ == '__main__':
test_search_api()

View File

@ -4,86 +4,62 @@ logging.basicConfig(
level=logging.DEBUG)
import requests
import settings
MEILI_URL = 'http://127.0.0.1:7700/'
SEARCH_ENABLED = bool(settings.MEILI_URL)
def meili_api(method, route, json=None, params=None, parse_json=True):
try:
r = method(settings.MEILI_URL + route, json=json, params=params, timeout=4)
if r.status_code > 299:
raise Exception('Bad response code ' + str(r.status_code))
if parse_json:
return r.json()
else:
r.encoding = 'utf-8'
return r.text
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem with MeiliSearch api route: %s: %s', route, str(e))
return False
def create_index():
try:
json = dict(name='qotnews', uid='qotnews')
r = requests.post(MEILI_URL + 'indexes', json=json, timeout=2)
if r.status_code != 201:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem creating MeiliSearch index: {}'.format(str(e)))
return False
json = dict(uid='qotnews', primaryKey='id')
return meili_api(requests.post, 'indexes', json=json)
def update_rankings():
try:
json = ['typo', 'words', 'proximity', 'attribute', 'desc(date)', 'wordsPosition', 'exactness']
r = requests.post(MEILI_URL + 'indexes/qotnews/settings/ranking-rules', json=json, timeout=2)
if r.status_code != 202:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem setting MeiliSearch ranking rules: {}'.format(str(e)))
return False
json = ['typo', 'words', 'proximity', 'date:desc', 'exactness']
return meili_api(requests.post, 'indexes/qotnews/settings/ranking-rules', json=json)
def update_attributes():
try:
json = ['title', 'url', 'author', 'link', 'id']
r = requests.post(MEILI_URL + 'indexes/qotnews/settings/searchable-attributes', json=json, timeout=2)
if r.status_code != 202:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
r = requests.delete(MEILI_URL + 'indexes/qotnews/settings/displayed-attributes', timeout=2)
if r.status_code != 202:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem setting MeiliSearch searchable attributes: {}'.format(str(e)))
return False
json = ['title', 'url', 'author']
r = meili_api(requests.post, 'indexes/qotnews/settings/searchable-attributes', json=json)
json = ['id', 'ref', 'source', 'author', 'author_link', 'score', 'date', 'title', 'link', 'url', 'num_comments']
r = meili_api(requests.post, 'indexes/qotnews/settings/displayed-attributes', json=json)
return r
def init():
create_index()
if not SEARCH_ENABLED:
logging.info('Search is not enabled, skipping init.')
return
print(create_index())
update_rankings()
update_attributes()
def put_story(story):
story = story.copy()
story.pop('text', None)
story.pop('comments', None)
try:
r = requests.post(MEILI_URL + 'indexes/qotnews/documents', json=[story], timeout=2)
if r.status_code != 202:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem putting MeiliSearch story: {}'.format(str(e)))
return False
if not SEARCH_ENABLED: return
return meili_api(requests.post, 'indexes/qotnews/documents', [story])
def search(q):
try:
params = dict(q=q, limit=250)
r = requests.get(MEILI_URL + 'indexes/qotnews/search', params=params, timeout=2)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()['hits']
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem searching MeiliSearch: {}'.format(str(e)))
return False
if not SEARCH_ENABLED: return []
params = dict(q=q, limit=settings.FEED_LENGTH)
r = meili_api(requests.get, 'indexes/qotnews/search', params=params, parse_json=False)
return r
if __name__ == '__main__':
create_index()
init()
print(search('the'))
print(update_rankings())
print(search('facebook'))

View File

@ -15,6 +15,7 @@ import traceback
import time
from urllib.parse import urlparse, parse_qs
import settings
import database
import search
import feed
@ -27,7 +28,6 @@ from flask_cors import CORS
database.init()
search.init()
FEED_LENGTH = 75
news_index = 0
def new_id():
@ -42,7 +42,9 @@ cors = CORS(flask_app)
@flask_app.route('/api')
def api():
stories = database.get_stories(FEED_LENGTH)
skip = request.args.get('skip', 0)
limit = request.args.get('limit', settings.FEED_LENGTH)
stories = database.get_stories(limit, skip)
# hacky nested json
res = Response('{"stories":[' + ','.join(stories) + ']}')
res.headers['content-type'] = 'application/json'
@ -54,8 +56,10 @@ def apisearch():
if len(q) >= 3:
results = search.search(q)
else:
results = []
return dict(results=results)
results = '[]'
res = Response(results)
res.headers['content-type'] = 'application/json'
return res
@flask_app.route('/api/submit', methods=['POST'], strict_slashes=False)
def submit():
@ -63,6 +67,8 @@ def submit():
url = request.form['url']
nid = new_id()
logging.info('Manual submission: ' + url)
parse = urlparse(url)
if 'news.ycombinator.com' in parse.hostname:
source = 'hackernews'
@ -116,9 +122,11 @@ def story(sid):
@flask_app.route('/search')
def index():
return render_template('index.html',
title='Feed',
title='QotNews',
url='news.t0.vc',
description='Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode')
description='Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode',
robots='index',
)
@flask_app.route('/<sid>', strict_slashes=False)
@flask_app.route('/<sid>/c', strict_slashes=False)
@ -143,9 +151,11 @@ def static_story(sid):
url = url.replace('www.', '')
return render_template('index.html',
title=story['title'],
title=story['title'] + ' | QotNews',
url=url,
description=description)
description=description,
robots='noindex',
)
http_server = WSGIServer(('', 33842), flask_app)
@ -161,12 +171,13 @@ def feed_thread():
continue
try:
nid = new_id()
logging.info('Adding ref: {}, id: {}, source: {}'.format(ref, nid, source))
database.put_ref(ref, nid, source)
logging.info('Added ref ' + ref)
except database.IntegrityError:
logging.info('Already have ID / ref, skipping.')
continue
ref_list = database.get_reflist(FEED_LENGTH)
ref_list = database.get_reflist(settings.FEED_LENGTH)
# update current stories
if news_index < len(ref_list):
@ -178,7 +189,7 @@ def feed_thread():
except AttributeError:
story = dict(id=item['sid'], ref=item['ref'], source=item['source'])
logging.info('Updating story: ' + str(story['ref']) + ', index: ' + str(news_index))
logging.info('Updating {} story: {}, index: {}'.format(story['source'], story['ref'], news_index))
valid = feed.update_story(story)
if valid:
@ -193,18 +204,18 @@ def feed_thread():
gevent.sleep(6)
news_index += 1
if news_index == FEED_LENGTH: news_index = 0
if news_index == settings.FEED_LENGTH: news_index = 0
except KeyboardInterrupt:
logging.info('Ending feed thread...')
except ValueError as e:
logging.error('feed_thread error: {} {}'.format(e.__class__.__name__, e))
logging.critical('feed_thread error: {} {}'.format(e.__class__.__name__, e))
http_server.stop()
print('Starting Feed thread...')
logging.info('Starting Feed thread...')
gevent.spawn(feed_thread)
print('Starting HTTP thread...')
logging.info('Starting HTTP thread...')
try:
http_server.serve_forever()
except KeyboardInterrupt:

View File

@ -4,11 +4,21 @@
# Feed Lengths
# Number of top items from each site to pull
# set to 0 to disable that site
FEED_LENGTH = 75
NUM_HACKERNEWS = 15
NUM_LOBSTERS = 10
NUM_REDDIT = 10
NUM_REDDIT = 15
NUM_TILDES = 5
# Meilisearch server URL
# Leave blank if not using search
#MEILI_URL = 'http://127.0.0.1:7700/'
MEILI_URL = ''
# Readerserver URL
# Leave blank if not using, but that defeats the whole point
READER_URL = 'http://127.0.0.1:33843/'
# Reddit account info
# leave blank if not using Reddit
REDDIT_CLIENT_ID = ''
@ -25,9 +35,7 @@ SUBREDDITS = [
'PhilosophyofScience',
'StateOfTheUnion',
'TheAgora',
'TrueFilm',
'TrueReddit',
'UniversityofReddit',
'culturalstudies',
'hardscience',
'indepthsports',
@ -37,6 +45,6 @@ SUBREDDITS = [
'resilientcommunities',
'worldevents',
'StallmanWasRight',
'DarkFuturology',
'EverythingScience',
'longevity',
]

View File

@ -8,6 +8,14 @@ import string
from bleach.sanitizer import Cleaner
def alert_tanner(message):
try:
logger.info('Alerting Tanner: ' + message)
params = dict(qotnews=message)
requests.get('https://tbot.tannercollin.com/message', params=params, timeout=4)
except BaseException as e:
logger.error('Problem alerting Tanner: ' + str(e))
def gen_rand_id():
return ''.join(random.choice(string.ascii_uppercase) for _ in range(4))

View File

@ -35,6 +35,7 @@ app.post('/', (req, res) => {
const url = req.body.url;
const requestOptions = {
url: url,
gzip: true,
//headers: {'User-Agent': 'Googlebot/2.1 (+http://www.google.com/bot.html)'},
//headers: {'User-Agent': 'Twitterbot/1.0'},
headers: {

File diff suppressed because it is too large Load Diff

View File

@ -3,7 +3,7 @@
Download MeiliSearch with:
```
wget https://github.com/meilisearch/MeiliSearch/releases/download/v0.11.1/meilisearch-linux-amd64
wget https://github.com/meilisearch/meilisearch/releases/download/v0.27.0/meilisearch-linux-amd64
chmod +x meilisearch-linux-amd64
```

View File

@ -8,6 +8,8 @@
content="{{ description }}"
/>
<meta content="{{ url }}" name="og:site_name">
<meta name="robots" content="{{ robots }}">
<link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
<link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">
@ -26,7 +28,7 @@
work correctly both with client-side routing and a non-root public URL.
Learn how to configure a non-root public URL by running `npm run build`.
-->
<title>{{ title }} - QotNews</title>
<title>{{ title }}</title>
<style>
html {
@ -37,13 +39,23 @@
}
.nojs {
color: white;
max-width: 32rem;
}
</style>
</head>
<body>
<div class="nojs">
<noscript>You need to enable JavaScript to run this app.</noscript>
<noscript>
You need to enable JavaScript to run this app because it's written in React.
I was planning on writing a server-side version, but I've become distracted
by other projects -- sorry!
<br/>
I originally wrote this for myself, and of course I whitelist JavaScript on
all my own domains.
<br/><br/>
Alternatively, try activex.news.t0.vc for an ActiveX™ version.
</noscript>
</div>
<div id="root"></div>
<!--

View File

@ -3,8 +3,10 @@ import { BrowserRouter as Router, Route, Link, Switch } from 'react-router-dom';
import localForage from 'localforage';
import './Style-light.css';
import './Style-dark.css';
import './Style-black.css';
import './Style-red.css';
import './fonts/Fonts.css';
import { ForwardDot } from './utils.js';
import { BackwardDot, ForwardDot } from './utils.js';
import Feed from './Feed.js';
import Article from './Article.js';
import Comments from './Comments.js';
@ -38,6 +40,16 @@ class App extends React.Component {
localStorage.setItem('theme', 'dark');
}
black() {
this.setState({ theme: 'black' });
localStorage.setItem('theme', 'black');
}
red() {
this.setState({ theme: 'red' });
localStorage.setItem('theme', 'red');
}
componentDidMount() {
if (!this.cache.length) {
localForage.iterate((value, key) => {
@ -47,22 +59,61 @@ class App extends React.Component {
}
}
goFullScreen() {
if ('wakeLock' in navigator) {
navigator.wakeLock.request('screen');
}
document.body.requestFullscreen({ navigationUI: 'hide' }).then(() => {
window.addEventListener('resize', () => this.forceUpdate());
this.forceUpdate();
});
};
exitFullScreen() {
document.exitFullscreen().then(() => {
this.forceUpdate();
});
};
render() {
const theme = this.state.theme;
document.body.style.backgroundColor = theme === 'dark' ? '#000' : '#eeeeee';
if (theme === 'dark') {
document.body.style.backgroundColor = '#1a1a1a';
} else if (theme === 'black') {
document.body.style.backgroundColor = '#000';
} else if (theme === 'red') {
document.body.style.backgroundColor = '#000';
} else {
document.body.style.backgroundColor = '#eeeeee';
}
const fullScreenAvailable = document.fullscreenEnabled ||
document.mozFullscreenEnabled ||
document.webkitFullscreenEnabled ||
document.msFullscreenEnabled;
return (
<div className={theme}>
<Router>
<div className='container menu'>
<p>
<Link to='/'>QotNews - Feed</Link>
<span className='theme'>Theme: <a href='#' onClick={() => this.light()}>Light</a> - <a href='#' onClick={() => this.dark()}>Dark</a></span>
<Link to='/'>QotNews</Link>
<span className='theme'><a href='#' onClick={() => this.light()}>Light</a> - <a href='#' onClick={() => this.dark()}>Dark</a> - <a href='#' onClick={() => this.black()}>Black</a> - <a href='#' onClick={() => this.red()}>Red</a></span>
<br />
<span className='slogan'>Hacker News, Reddit, Lobsters, and Tildes articles rendered in reader mode.</span>
</p>
<Route path='/(|search)' component={Search} />
<Route path='/(|search)' component={Submit} />
{fullScreenAvailable &&
<Route path='/(|search)' render={() => !document.fullscreenElement ?
<button className='fullscreen' onClick={() => this.goFullScreen()}>Enter Fullscreen</button>
:
<button className='fullscreen' onClick={() => this.exitFullScreen()}>Exit Fullscreen</button>
} />
}
</div>
<Route path='/' exact render={(props) => <Feed {...props} updateCache={this.updateCache} />} />
@ -72,6 +123,7 @@ class App extends React.Component {
</Switch>
<Route path='/:id/c' exact render={(props) => <Comments {...props} cache={this.cache} />} />
<BackwardDot />
<ForwardDot />
<ScrollToTop />

View File

@ -67,7 +67,8 @@ class Article extends React.Component {
{story ?
<div className='article'>
<Helmet>
<title>{story.title} - QotNews</title>
<title>{story.title} | QotNews</title>
<meta name="robots" content="noindex" />
</Helmet>
<h1>{story.title}</h1>

View File

@ -115,7 +115,8 @@ class Article extends React.Component {
{story ?
<div className='article'>
<Helmet>
<title>{story.title} - QotNews Comments</title>
<title>{story.title} | QotNews</title>
<meta name="robots" content="noindex" />
</Helmet>
<h1>{story.title}</h1>

View File

@ -52,7 +52,8 @@ class Feed extends React.Component {
return (
<div className='container'>
<Helmet>
<title>Feed - QotNews</title>
<title>QotNews</title>
<meta name="robots" content="index" />
</Helmet>
{error && <p>Connection error?</p>}
{stories ?

View File

@ -29,7 +29,7 @@ class Results extends React.Component {
.then(res => res.json())
.then(
(result) => {
this.setState({ stories: result.results });
this.setState({ stories: result.hits });
},
(error) => {
if (error.message !== 'The operation was aborted. ') {
@ -56,7 +56,7 @@ class Results extends React.Component {
return (
<div className='container'>
<Helmet>
<title>Feed - QotNews</title>
<title>Search Results | QotNews</title>
</Helmet>
{error && <p>Connection error?</p>}
{stories ?

View File

@ -15,6 +15,7 @@ class ScrollToTop extends React.Component {
}
window.scrollTo(0, 0);
document.body.scrollTop = 0;
}
render() {

View File

@ -37,7 +37,7 @@ class Search extends Component {
<span className='search'>
<form onSubmit={this.searchAgain}>
<input
placeholder='Search... (fixed)'
placeholder='Search...'
value={search}
onChange={this.searchArticles}
ref={this.inputRef}

View File

@ -0,0 +1,68 @@
.black {
color: #ddd;
}
.black a {
color: #ddd;
}
.black input {
color: #ddd;
border: 1px solid #828282;
}
.black button {
background-color: #444444;
border-color: #bbb;
color: #ddd;
}
.black .item {
color: #828282;
}
.black .item .source-logo {
filter: grayscale(1);
}
.black .item a {
color: #828282;
}
.black .item a.link {
color: #ddd;
}
.black .item a.link:visited {
color: #828282;
}
.black .item .info a.hot {
color: #cccccc;
}
.black .article a {
border-bottom: 1px solid #aaaaaa;
}
.black .article u {
border-bottom: 1px solid #aaaaaa;
text-decoration: none;
}
.black .story-text video,
.black .story-text img {
filter: brightness(50%);
}
.black .article .info {
color: #828282;
}
.black .article .info a {
border-bottom: none;
color: #828282;
}
.black .comment.lined {
border-left: 1px solid #444444;
}

View File

@ -11,12 +11,14 @@
border: 1px solid #828282;
}
.dark .item {
color: #828282;
.dark button {
background-color: #444444;
border-color: #bbb;
color: #ddd;
}
.dark .item .source-logo {
filter: grayscale(1);
.dark .item {
color: #828282;
}
.dark .item a {
@ -43,6 +45,7 @@
text-decoration: none;
}
.dark .story-text video,
.dark .story-text img {
filter: brightness(50%);
}

View File

@ -2,9 +2,30 @@ body {
text-rendering: optimizeLegibility;
font: 1rem/1.3 sans-serif;
color: #000000;
margin-bottom: 100vh;
word-break: break-word;
font-kerning: normal;
margin: 0;
}
::backdrop {
background-color: rgba(0,0,0,0);
}
body:fullscreen {
overflow-y: scroll !important;
}
body:-ms-fullscreen {
overflow-y: scroll !important;
}
body:-webkit-full-screen {
overflow-y: scroll !important;
}
body:-moz-full-screen {
overflow-y: scroll !important;
}
#root {
margin: 8px 8px 100vh 8px !important;
}
a {
@ -22,6 +43,12 @@ input {
border-radius: 4px;
}
.fullscreen {
margin: 0.25rem;
padding: 0.25rem;
}
pre {
overflow: auto;
}
@ -185,16 +212,20 @@ span.source {
cursor: pointer;
}
.toggleDot {
.dot {
cursor: pointer;
position: fixed;
bottom: 1rem;
left: 1rem;
height: 3rem;
width: 3rem;
background-color: #828282;
border-radius: 50%;
}
.toggleDot {
bottom: 1rem;
left: 1rem;
}
.toggleDot .button {
font: 2rem/1 'icomoon';
position: relative;
@ -203,21 +234,27 @@ span.source {
}
.forwardDot {
cursor: pointer;
position: fixed;
bottom: 1rem;
right: 1rem;
height: 3rem;
width: 3rem;
background-color: #828282;
border-radius: 50%;
}
.forwardDot .button {
font: 2.5rem/1 'icomoon';
font: 2rem/1 'icomoon';
position: relative;
top: 0.25rem;
left: 0.3rem;
top: 0.5rem;
left: 0.5rem;
}
.backwardDot {
bottom: 1rem;
right: 5rem;
}
.backwardDot .button {
font: 2rem/1 'icomoon';
position: relative;
top: 0.5rem;
left: 0.5rem;
}
.search form {

View File

@ -0,0 +1,82 @@
.red {
color: #b00;
scrollbar-color: #b00 #440000;
}
.red a {
color: #b00;
}
.red input {
color: #b00;
border: 1px solid #690000;
}
.red input::placeholder {
color: #690000;
}
.red hr {
background-color: #690000;
}
.red button {
background-color: #440000;
border-color: #b00;
color: #b00;
}
.red .item,
.red .slogan {
color: #690000;
}
.red .item .source-logo {
display: none;
}
.red .item a {
color: #690000;
}
.red .item a.link {
color: #b00;
}
.red .item a.link:visited {
color: #690000;
}
.red .item .info a.hot {
color: #cc0000;
}
.red .article a {
border-bottom: 1px solid #aa0000;
}
.red .article u {
border-bottom: 1px solid #aa0000;
text-decoration: none;
}
.red .story-text video,
.red .story-text img {
filter: grayscale(100%) brightness(20%) sepia(100%) hue-rotate(-50deg) saturate(600%) contrast(0.8);
}
.red .article .info {
color: #690000;
}
.red .article .info a {
border-bottom: none;
color: #690000;
}
.red .comment.lined {
border-left: 1px solid #440000;
}
.red .dot {
background-color: #440000;
}

View File

@ -41,7 +41,7 @@ class Submit extends Component {
<span className='search'>
<form onSubmit={this.submitArticle}>
<input
placeholder='Submit Article'
placeholder='Submit URL'
ref={this.inputRef}
/>
</form>

Binary file not shown.

View File

@ -25,8 +25,9 @@ export class ToggleDot extends React.Component {
render() {
const id = this.props.id;
const article = this.props.article;
return (
<div className='toggleDot'>
<div className='dot toggleDot'>
<div className='button'>
<Link to={'/' + id + (article ? '' : '/c')}>
{article ? '' : ''}
@ -37,6 +38,27 @@ export class ToggleDot extends React.Component {
}
}
export class BackwardDot extends React.Component {
goBackward() {
localStorage.setItem('scrollLock', 'True');
window.history.back();
}
render() {
const isMobile = /iPhone|iPad|iPod|Android/i.test(navigator.userAgent);
if (!isMobile) return null;
if (!document.fullscreenElement) return null;
return (
<div className='dot backwardDot' onClick={this.goBackward}>
<div className='button'>
</div>
</div>
);
}
}
export class ForwardDot extends React.Component {
goForward() {
localStorage.setItem('scrollLock', 'True');
@ -48,9 +70,9 @@ export class ForwardDot extends React.Component {
if (!isMobile) return null;
return (
<div className='forwardDot' onClick={this.goForward}>
<div className='dot forwardDot' onClick={this.goForward}>
<div className='button'>
</div>
</div>
);