Compare commits

..

No commits in common. "2439c113b30c34fb35d303153aa6c4d41cdc2fe5" and "2f730c1f52a82118643ff02a022196ae62ed862f" have entirely different histories.

25 changed files with 2602 additions and 3379 deletions

View File

@ -73,13 +73,6 @@ def get_stories_by_url(url):
filter(Story.meta['url'].as_string() == url).\ filter(Story.meta['url'].as_string() == url).\
order_by(Story.meta['date'].desc()) order_by(Story.meta['date'].desc())
def get_ref_by_sid(sid):
session = Session()
x = session.query(Reflist).\
filter(Reflist.sid == sid).\
first()
return dict(ref=x.ref, sid=x.sid, source=x.source, urlref=x.urlref)
def get_reflist(): def get_reflist():
session = Session() session = Session()
q = session.query(Reflist).order_by(Reflist.rid.desc()) q = session.query(Reflist).order_by(Reflist.rid.desc())

View File

@ -53,7 +53,7 @@ class Category(Base):
# scratchpad so I can quickly develop the parser # scratchpad so I can quickly develop the parser
if __name__ == '__main__': if __name__ == '__main__':
print("Category: RadioNZ") print("Category: RadioNZ")
site = Category({ 'url': "https://www.rnz.co.nz/news/" }) site = Category("https://www.rnz.co.nz/news/")
excludes = [ excludes = [
'rnz.co.nz/news/sport', 'rnz.co.nz/news/sport',
'rnz.co.nz/weather', 'rnz.co.nz/weather',
@ -61,12 +61,12 @@ if __name__ == '__main__':
] ]
posts = site.feed(excludes) posts = site.feed(excludes)
print(posts[:5]) print(posts[:5])
print(site.story(posts[0][0], posts[0][1])) print(site.story(posts[0]))
print("Category: Newsroom") print("Category: Newsroom")
site = Category({ 'url': "https://www.newsroom.co.nz/news/", 'tz': 'Pacific/Auckland'}) site = Category("https://www.newsroom.co.nz/news/", tz='Pacific/Auckland')
posts = site.feed() posts = site.feed()
print(posts[:5]) print(posts[:5])
print(site.story(posts[0][0], posts[0][1])) print(site.story(posts[0]))

View File

@ -40,7 +40,7 @@ def api(route, ref=None):
return False return False
def feed(): def feed():
return ['hn:'+str(x) for x in api(API_TOPSTORIES) or []] return [str(x) for x in api(API_TOPSTORIES) or []]
def comment(i): def comment(i):
if 'author' not in i: if 'author' not in i:
@ -60,7 +60,6 @@ def comment_count(i):
return sum([comment_count(c) for c in i['comments']]) + alive return sum([comment_count(c) for c in i['comments']]) + alive
def story(ref): def story(ref):
ref = ref.replace('hn:', '')
r = api(API_ITEM, ref) r = api(API_ITEM, ref)
if not r: return False if not r: return False

View File

@ -76,7 +76,7 @@ class Sitemap(Base):
# scratchpad so I can quickly develop the parser # scratchpad so I can quickly develop the parser
if __name__ == '__main__': if __name__ == '__main__':
print("Sitemap: The Spinoff") print("Sitemap: The Spinoff")
site = Sitemap({ 'url': "https://thespinoff.co.nz/sitemap.xml" }) site = Sitemap("https://thespinoff.co.nz/sitemap.xml")
excludes = [ excludes = [
'thespinoff.co.nz/sitemap-misc.xml', 'thespinoff.co.nz/sitemap-misc.xml',
'thespinoff.co.nz/sitemap-authors.xml', 'thespinoff.co.nz/sitemap-authors.xml',
@ -84,18 +84,16 @@ if __name__ == '__main__':
] ]
posts = site.feed(excludes) posts = site.feed(excludes)
print(posts[:5]) print(posts[:5])
print(site.story(posts[0][0], posts[0][1])) print(site.story(posts[0]))
print("Sitemap: Newshub") print("Sitemap: Newshub")
site = Sitemap({ site = Sitemap([
'url': [ 'https://www.newshub.co.nz/home/politics.gnewssitemap.xml',
'https://www.newshub.co.nz/home/politics.gnewssitemap.xml', 'https://www.newshub.co.nz/home/new-zealand.gnewssitemap.xml',
'https://www.newshub.co.nz/home/new-zealand.gnewssitemap.xml', 'https://www.newshub.co.nz/home/world.gnewssitemap.xml',
'https://www.newshub.co.nz/home/world.gnewssitemap.xml', 'https://www.newshub.co.nz/home/money.gnewssitemap.xml',
'https://www.newshub.co.nz/home/money.gnewssitemap.xml', ])
],
})
posts = site.feed() posts = site.feed()
print(posts[:5]) print(posts[:5])
print(site.story(posts[0][0], posts[0][1])) print(site.story(posts[0]))
print(site.story(posts[:-1]))

View File

@ -10,10 +10,6 @@ if __name__ == '__main__':
import requests import requests
from datetime import datetime from datetime import datetime
import settings
from misc.time import unix
from misc.metadata import get_icons
from misc.api import xml, json
from utils import clean from utils import clean
SUBSTACK_REFERER = 'https://substack.com' SUBSTACK_REFERER = 'https://substack.com'
@ -26,6 +22,32 @@ def api_comments(post_id, base_url):
def api_stories(x, base_url): def api_stories(x, base_url):
return f"{base_url}/api/v1/archive?sort=new&search=&offset=0&limit=100" return f"{base_url}/api/v1/archive?sort=new&search=&offset=0&limit=100"
def unix(date_str):
return int(datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S.%fZ').timestamp())
def api(route, ref=None, referer=None):
headers = {'Referer': referer} if referer else None
try:
r = requests.get(route(ref), headers=headers, timeout=10)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting Substack API: {}, trying again'.format(str(e)))
try:
r = requests.get(route(ref), headers=headers, timeout=20)
if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem hitting Substack API: {}'.format(str(e)))
return False
def comment(i): def comment(i):
if 'body' not in i: if 'body' not in i:
return False return False
@ -44,25 +66,14 @@ class Publication:
def __init__(self, domain): def __init__(self, domain):
self.BASE_DOMAIN = domain self.BASE_DOMAIN = domain
def ref_prefix(self, ref):
return f"{self.BASE_DOMAIN}/#id:{ref}"
def strip_ref_prefix(self, ref):
return ref.replace(f"{self.BASE_DOMAIN}/#id:", '')
def feed(self): def feed(self):
too_old = datetime.now().timestamp() - settings.MAX_STORY_AGE stories = api(lambda x: api_stories(x, self.BASE_DOMAIN), referer=self.BASE_DOMAIN)
stories = json(lambda x: api_stories(x, self.BASE_DOMAIN), headers={'Referer': self.BASE_DOMAIN})
if not stories: return [] if not stories: return []
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories])) stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
stories = list(filter(None, [i if unix(i.get('post_date')) > too_old else None for i in stories])) return [str(i.get("id")) for i in stories or []]
stories.sort(key=lambda a: unix(a.get('post_date')), reverse=True)
return [self.ref_prefix(str(i.get("id"))) for i in stories or []]
def story(self, ref): def story(self, ref):
ref = self.strip_ref_prefix(ref) stories = api(lambda x: api_stories(x, self.BASE_DOMAIN), referer=self.BASE_DOMAIN)
stories = json(lambda x: api_stories(x, self.BASE_DOMAIN), headers={'Referer': self.BASE_DOMAIN})
if not stories: return False if not stories: return False
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories])) stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
stories = list(filter(None, [i if str(i.get('id')) == ref else None for i in stories])) stories = list(filter(None, [i if str(i.get('id')) == ref else None for i in stories]))
@ -83,7 +94,7 @@ class Publication:
s['title'] = r.get('title', '') s['title'] = r.get('title', '')
s['link'] = r.get('canonical_url', '') s['link'] = r.get('canonical_url', '')
s['url'] = r.get('canonical_url', '') s['url'] = r.get('canonical_url', '')
comments = json(lambda x: api_comments(x, self.BASE_DOMAIN), r.get('id'), headers={'Referer': self.BASE_DOMAIN}) comments = api(lambda x: api_comments(x, self.BASE_DOMAIN), r.get('id'), referer=self.BASE_DOMAIN)
s['comments'] = [comment(i) for i in comments.get('comments')] s['comments'] = [comment(i) for i in comments.get('comments')]
s['comments'] = list(filter(bool, s['comments'])) s['comments'] = list(filter(bool, s['comments']))
s['num_comments'] = r.get('comment_count', 0) s['num_comments'] = r.get('comment_count', 0)
@ -93,12 +104,6 @@ class Publication:
s['author'] = authors[0].get('name') s['author'] = authors[0].get('name')
s['author_link'] = authors[0].get('link') s['author_link'] = authors[0].get('link')
markup = xml(lambda x: s['link'])
if markup:
icons = get_icons(markup, url=s['link'])
if icons:
s['icon'] = icons[0]
return s return s
def _bylines(self, b): def _bylines(self, b):
@ -111,28 +116,14 @@ class Publication:
class Top: class Top:
def ref_prefix(self, base_url, ref):
return f"{base_url}/#id:{ref}"
def strip_ref_prefix(self, ref):
if '/#id:' in ref:
base_url, item = ref.split(f"/#id:")
return item
return ref
def feed(self): def feed(self):
too_old = datetime.now().timestamp() - settings.MAX_STORY_AGE stories = api(SUBSTACK_API_TOP_POSTS, referer=SUBSTACK_REFERER)
stories = json(SUBSTACK_API_TOP_POSTS, headers={'Referer': SUBSTACK_REFERER})
if not stories: return [] if not stories: return []
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories])) stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
stories = list(filter(None, [i if unix(i.get('post_date')) > too_old else None for i in stories])) return [str(i.get("id")) for i in stories or []]
stories.sort(key=lambda a: unix(a.get('post_date')), reverse=True)
stories = [self.ref_prefix(str(i.get("pub").get("base_url")), str(i.get("id"))) for i in stories]
return stories
def story(self, ref): def story(self, ref):
ref = self.strip_ref_prefix(ref) stories = api(SUBSTACK_API_TOP_POSTS, referer=SUBSTACK_REFERER)
stories = json(SUBSTACK_API_TOP_POSTS, headers={'Referer': SUBSTACK_REFERER})
if not stories: return False if not stories: return False
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories])) stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
stories = list(filter(None, [i if str(i.get('id')) == ref else None for i in stories])) stories = list(filter(None, [i if str(i.get('id')) == ref else None for i in stories]))
@ -155,7 +146,7 @@ class Top:
s['title'] = r.get('title', '') s['title'] = r.get('title', '')
s['link'] = r.get('canonical_url', '') s['link'] = r.get('canonical_url', '')
s['url'] = r.get('canonical_url', '') s['url'] = r.get('canonical_url', '')
comments = json(lambda x: api_comments(x, base_url), r.get('id'), headers={'Referer': SUBSTACK_REFERER}) comments = api(lambda x: api_comments(x, base_url), r.get('id'), referer=SUBSTACK_REFERER)
s['comments'] = [comment(i) for i in comments.get('comments')] s['comments'] = [comment(i) for i in comments.get('comments')]
s['comments'] = list(filter(bool, s['comments'])) s['comments'] = list(filter(bool, s['comments']))
s['num_comments'] = r.get('comment_count', 0) s['num_comments'] = r.get('comment_count', 0)

View File

@ -5,16 +5,13 @@ logging.basicConfig(
import requests import requests
GOOGLEBOT_USER_AGENT = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" USER_AGENT = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
GOOGLEBOT_IP = '66.249.66.1' FORWARD_IP = '66.249.66.1'
TIMEOUT = 30
def xml(route, ref=None, headers=dict(), use_googlebot=True): def xml(route, ref=None):
try: try:
if use_googlebot: headers = {'User-Agent': USER_AGENT, 'X-Forwarded-For': FORWARD_IP}
headers['User-Agent'] = GOOGLEBOT_USER_AGENT r = requests.get(route(ref), headers=headers, timeout=5)
headers['X-Forwarded-For'] = GOOGLEBOT_IP
r = requests.get(route(ref), headers=headers, timeout=TIMEOUT)
if r.status_code != 200: if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code)) raise Exception('Bad response code ' + str(r.status_code))
return r.text return r.text
@ -24,12 +21,10 @@ def xml(route, ref=None, headers=dict(), use_googlebot=True):
logging.error('Problem hitting URL: {}'.format(str(e))) logging.error('Problem hitting URL: {}'.format(str(e)))
return False return False
def json(route, ref=None, headers=dict(), use_googlebot=True): def json(route, ref=None):
try: try:
if use_googlebot: headers = {'User-Agent': USER_AGENT, 'X-Forwarded-For': FORWARD_IP}
headers['User-Agent'] = GOOGLEBOT_USER_AGENT r = requests.get(route(ref), headers=headers, timeout=5)
headers['X-Forwarded-For'] = GOOGLEBOT_IP
r = requests.get(route(ref), headers=headers, timeout=TIMEOUT)
if r.status_code != 200: if r.status_code != 200:
raise Exception('Bad response code ' + str(r.status_code)) raise Exception('Bad response code ' + str(r.status_code))
return r.json() return r.json()

View File

@ -1,14 +0,0 @@
from bs4 import BeautifulSoup
def get_icons(markup):
soup = BeautifulSoup(markup, features='html.parser')
icon32 = soup.find_all('link', rel="icon", href=True, sizes="32x32")
icon16 = soup.find_all('link', rel="icon", href=True, sizes="16x16")
favicon = soup.find_all('link', rel="shortcut icon", href=True)
others = soup.find_all('link', rel="icon", href=True)
icons = icon32 + icon16 + favicon + others
base_url = '/'.join(urlref.split('/')[:3])
icons = list(set([i.get('href') for i in icons]))
icons = [i if i.startswith('http') else base_url + i for i in icons]
return icons

View File

@ -1,19 +1,4 @@
from bs4 import BeautifulSoup
def get_icons(markup, url):
soup = BeautifulSoup(markup, features='html.parser')
icon32 = soup.find_all('link', rel="icon", href=True, sizes="32x32")
icon16 = soup.find_all('link', rel="icon", href=True, sizes="16x16")
favicon = soup.find_all('link', rel="shortcut icon", href=True)
others = soup.find_all('link', rel="icon", href=True)
icons = icon32 + icon16 + favicon + others
base_url = '/'.join(url.split('/')[:3])
icons = list(set([i.get('href') for i in icons]))
icons = [i if i.startswith('http') else base_url + i for i in icons]
return icons
def parse_extruct(s, data): def parse_extruct(s, data):
rdfa_keys = { rdfa_keys = {
'title': [ 'title': [

View File

@ -11,10 +11,9 @@ import extruct
import settings import settings
from utils import clean from utils import clean
from misc.metadata import parse_extruct, get_icons from misc.metadata import parse_extruct
from misc.time import unix from misc.time import unix
from misc.api import xml from misc.api import xml
import misc.stuff as stuff
def comment(i): def comment(i):
if 'author' not in i: if 'author' not in i:
@ -69,7 +68,16 @@ class Base:
s['url'] = urlref s['url'] = urlref
s['date'] = 0 s['date'] = 0
icons = get_icons(markup, url=urlref) soup = BeautifulSoup(markup, features='html.parser')
icon32 = soup.find_all('link', rel="icon", href=True, sizes="32x32")
icon16 = soup.find_all('link', rel="icon", href=True, sizes="16x16")
favicon = soup.find_all('link', rel="shortcut icon", href=True)
others = soup.find_all('link', rel="icon", href=True)
icons = icon32 + icon16 + favicon + others
base_url = '/'.join(urlref.split('/')[:3])
icons = list(set([i.get('href') for i in icons]))
icons = [i if i.startswith('http') else base_url + i for i in icons]
if icons: if icons:
s['icon'] = icons[0] s['icon'] = icons[0]
@ -81,18 +89,13 @@ class Base:
if 'disqus' in markup: if 'disqus' in markup:
try: try:
s['comments'] = declutter.get_comments(urlref) s['comments'] = declutter.get_comments(urlref)
s['comments'] = list(filter(bool, s['comments'])) c['comments'] = list(filter(bool, c['comments']))
s['num_comments'] = comment_count(s['comments']) s['num_comments'] = comment_count(s['comments'])
except KeyboardInterrupt: except KeyboardInterrupt:
raise raise
except: except:
pass pass
if urlref.startswith('https://www.stuff.co.nz'):
s['comments'] = stuff.get_comments(urlref)
s['comments'] = list(filter(bool, s['comments']))
s['num_comments'] = len(s['comments'])
if not s['date']: if not s['date']:
return False return False
return s return s

View File

@ -1,64 +0,0 @@
import re
from bs4 import BeautifulSoup
if __name__ == '__main__':
import sys
sys.path.insert(0,'.')
from misc.time import unix
from misc.api import xml
def _soup_get_text(soup):
if not soup: return None
if soup.text: return soup.text
s = soup.find(text=lambda tag: isinstance(tag, bs4.CData))
if s and s.string: return s.string.strip()
return None
def _parse_comment(soup):
c = {
'author': '',
'authorLink': '',
'score': 0,
'date': 0,
'text': '',
'comments': [],
}
if soup.find('link'):
title = _soup_get_text(soup.find('link'))
if title and 'By:' in title:
c['author'] = title.strip('By:').strip()
if soup.find('dc:creator'):
c['author'] = _soup_get_text(soup.find('dc:creator'))
if soup.find('link'):
c['authorLink'] = _soup_get_text(soup.find('link'))
if soup.find('description'):
c['text'] = _soup_get_text(soup.find('description'))
if soup.find('pubdate'):
c['date'] = unix(soup.find('pubdate').text)
elif soup.find('pubDate'):
c['date'] = unix(soup.find('pubDate').text)
return c
def get_comments(url):
regex = r"https:\/\/www\.stuff\.co\.nz\/(.*\/\d+)/[^\/]+"
p = re.compile(regex).match(url)
path = p.groups()[0]
comment_url = f'https://comments.us1.gigya.com/comments/rss/6201101/Stuff/stuff/{path}'
markup = xml(lambda x: comment_url)
if not markup: return []
soup = BeautifulSoup(markup, features='html.parser')
comments = soup.find_all('item')
if not comments: return []
comments = [_parse_comment(c) for c in comments]
return comments
# scratchpad so I can quickly develop the parser
if __name__ == '__main__':
comments = get_comments('https://www.stuff.co.nz/life-style/homed/houses/123418468/dear-jacinda-we-need-to-talk-about-housing')
print(len(comments))
print(comments[:5])

View File

@ -4,9 +4,9 @@ logging.basicConfig(
level=logging.DEBUG) level=logging.DEBUG)
import requests import requests
DECLUTTER_API = 'https://declutter.1j.nz/headless/details' DECLUTTER_API = 'https://declutter.1j.nz/details'
DECLUTTER_COMMENT_API = 'https://declutter.1j.nz/headless/comments' DECLUTTER_COMMENT_API = 'https://declutter.1j.nz/comments'
TIMEOUT = 90 TIMEOUT = 30
def get_html(url): def get_html(url):

View File

@ -3,14 +3,15 @@ logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG) level=logging.DEBUG)
import requests import requests
from settings import HEADLESS_READER_PORT from settings import READER_PORT
READ_API = 'http://127.0.0.1:{}/headless/details'.format(READER_PORT or 3000)
READ_COMMENT__API = 'http://127.0.0.1:{}/headless/comments'.format(READER_PORT or 3000)
TIMEOUT = 60
READ_API = 'http://127.0.0.1:{}/headless/details'.format(HEADLESS_READER_PORT or 33843)
READ_COMMENT__API = 'http://127.0.0.1:{}/headless/comments'.format(HEADLESS_READER_PORT or 33843)
TIMEOUT = 90
def get_html(url): def get_html(url):
logging.info(f"Headless Scraper: {url}") logging.info(f"Headless Browser Scraper: {url}")
details = get_details(url) details = get_details(url)
if not details: if not details:
return '' return ''

View File

@ -3,9 +3,9 @@ logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG) level=logging.DEBUG)
import requests import requests
from settings import SIMPLE_READER_PORT from settings import READER_PORT
READ_API = 'http://127.0.0.1:{}/simple/details'.format(SIMPLE_READER_PORT or 33843) READ_API = 'http://127.0.0.1:{}/simple/details'.format(READER_PORT or 3000)
TIMEOUT = 20 TIMEOUT = 20
def get_html(url): def get_html(url):

View File

@ -99,10 +99,8 @@ def submit():
def story(sid): def story(sid):
story = database.get_story(sid) story = database.get_story(sid)
if story: if story:
related = [] related = database.get_stories_by_url(story.meta['url'])
if story.meta['url']: related = [r.meta for r in related]
related = database.get_stories_by_url(story.meta['url'])
related = [r.meta for r in related]
res = Response(json.dumps({"story": story.data, "related": related})) res = Response(json.dumps({"story": story.data, "related": related}))
res.headers['content-type'] = 'application/json' res.headers['content-type'] = 'application/json'
return res return res
@ -155,7 +153,6 @@ def _add_new_refs():
database.put_ref(ref, nid, source, urlref) database.put_ref(ref, nid, source, urlref)
logging.info('Added ref ' + ref) logging.info('Added ref ' + ref)
except database.IntegrityError: except database.IntegrityError:
logging.info('Unable to add ref ' + ref)
continue continue
def _update_current_story(item): def _update_current_story(item):
@ -168,11 +165,8 @@ def _update_current_story(item):
valid = feed.update_story(story, urlref=item['urlref']) valid = feed.update_story(story, urlref=item['urlref'])
if valid: if valid:
try: database.put_story(story)
database.put_story(story) search.put_story(story)
search.put_story(story)
except database.IntegrityError:
logging.info('Unable to add story with ref ' + ref)
else: else:
database.del_ref(item['ref']) database.del_ref(item['ref'])
logging.info('Removed ref {}'.format(item['ref'])) logging.info('Removed ref {}'.format(item['ref']))

View File

@ -6,8 +6,7 @@ MAX_STORY_AGE = 3*24*60*60
SCRAPERS = ['headless', 'outline', 'declutter', 'simple'] SCRAPERS = ['headless', 'outline', 'declutter', 'simple']
API_PORT = 33842 API_PORT = 33842
SIMPLE_READER_PORT = 33843 READER_PORT = 3000
HEADLESS_READER_PORT = 33843
# Feed Lengths # Feed Lengths
# Number of top items from each site to pull # Number of top items from each site to pull

View File

@ -1,48 +0,0 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.INFO)
import sys
import json
import requests
import database
import feed
import search
database.init()
search.init()
def _update_current_story(story, item):
logging.info('Updating story: {}'.format(str(story['ref'])))
if story.get('url', ''):
story['text'] = ''
valid = feed.update_story(story, urlref=item['urlref'])
if valid:
database.put_story(story)
search.put_story(story)
else:
database.del_ref(item['ref'])
logging.info('Removed ref {}'.format(item['ref']))
if __name__ == '__main__':
if len(sys.argv) == 2:
sid = sys.argv[1]
else:
print('Usage: python delete-story.py [story id]')
exit(1)
item = database.get_ref_by_sid(sid)
if item:
story = database.get_story(item['sid']).data
if story:
print('Updating story:')
_update_current_story(story, item)
else:
print('Story not found. Exiting.')
else:
print('Story not found. Exiting.')

View File

@ -9,7 +9,7 @@ import string
from bleach.sanitizer import Cleaner from bleach.sanitizer import Cleaner
def gen_rand_id(): def gen_rand_id():
return ''.join(random.choice(string.ascii_uppercase) for _ in range(5)) return ''.join(random.choice(string.ascii_uppercase) for _ in range(4))
def render_md(md): def render_md(md):
if md: if md:

@ -1 +1 @@
Subproject commit d3d5fc74acf0be8a49e2772b42ab59278d1a3e81 Subproject commit 50a94df7283e31680c5d94dd666bab58aea2e475

View File

@ -71,7 +71,7 @@ class App extends React.Component {
<Route path='/search' component={Results} /> <Route path='/search' component={Results} />
<Route path='/:id' exact render={(props) => <Article {...props} cache={this.cache} />} /> <Route path='/:id' exact render={(props) => <Article {...props} cache={this.cache} />} />
</Switch> </Switch>
<Route path='/:id/c' exact render={(props) => <Comments {...props} cache={this.cache} key={props.match.params.id} />} /> <Route path='/:id/c' exact render={(props) => <Comments {...props} cache={this.cache} />} />
<ForwardDot /> <ForwardDot />

View File

@ -1,231 +1,225 @@
body { body {
text-rendering: optimizeLegibility; text-rendering: optimizeLegibility;
font: 1rem/1.3 sans-serif; font: 1rem/1.3 sans-serif;
color: #000000; color: #000000;
margin-bottom: 100vh; margin-bottom: 100vh;
word-break: break-word; word-break: break-word;
font-kerning: normal; font-kerning: normal;
} }
a { a {
color: #000000; color: #000000;
text-decoration: none; text-decoration: none;
outline: none; outline: none;
} }
input { input {
font-size: 1.05rem; font-size: 1.05rem;
background-color: transparent; background-color: transparent;
border: 1px solid #828282; border: 1px solid #828282;
margin: 0.25rem; margin: 0.25rem;
padding: 6px; padding: 6px;
border-radius: 4px; border-radius: 4px;
} }
pre { pre {
overflow: auto; overflow: auto;
} }
.container { .container {
margin: 1rem auto; margin: 1rem auto;
max-width: 64rem; max-width: 64rem;
} }
.menu { .menu {
font-size: 1.1rem; font-size: 1.1rem;
padding: 0 1rem; padding: 0 1rem;
} }
.slogan { .slogan {
color: #828282; color: #828282;
} }
.theme { .theme {
float: right; float: right;
} }
.item { .item {
display: table; display: table;
color: #828282; color: #828282;
margin-bottom: 0.7rem; margin-bottom: 0.7rem;
} }
.item .source-logo { .item .source-logo {
width: 0.9rem; width: 0.9rem;
height: 0.9rem; height: 0.9rem;
} }
.item a { .item a {
color: #828282; color: #828282;
} }
.item a:hover { .item a:hover {
text-decoration: underline; text-decoration: underline;
} }
.item a.link { .item a.link {
font-size: 1.1rem; font-size: 1.1rem;
color: #000000; color: #000000;
} }
.item a.link:visited { .item a.link:visited {
color: #828282; color: #828282;
} }
.item a.link:hover { .item a.link:hover {
text-decoration: none; text-decoration: none;
} }
span.source { span.source {
margin-left: 0.4rem; margin-left: 0.4rem;
} }
.item .info a.hot { .item .info a.hot {
color: #444444; color: #444444;
} }
.article { .article {
padding-bottom: 3rem; padding-bottom: 3rem;
} }
.article-container { .article-container {
margin: 1rem auto; margin: 1rem auto;
max-width: 38rem; max-width: 38rem;
} }
.article a { .article a {
border-bottom: 1px solid #222222; border-bottom: 1px solid #222222;
} }
.article h1 { .article h1 {
font-size: 1.6rem; font-size: 1.6rem;
} }
.article h2 { .article h2 {
font-size: 1.4rem; font-size: 1.4rem;
} }
.article h3, .article h3, .article h4 {
.article h4 { font-size: 1.3rem;
font-size: 1.3rem;
} }
.article img { .article img {
max-width: 100%; max-width: 100%;
height: auto; height: auto;
} }
.article figure, .article figure, .article video {
.article video { width: 100%;
width: 100%; height: auto;
height: auto; margin: 0;
margin: 0;
} }
.article table { .article table {
width: 100%; width: 100%;
table-layout: fixed; table-layout: fixed;
} }
.article iframe { .article iframe {
display: none; display: none;
} }
.article u { .article u {
border-bottom: 1px solid #222; border-bottom: 1px solid #222;
text-decoration: none; text-decoration: none;
} }
.article .info { .article .info {
color: #828282; color: #828282;
} }
.article .info a { .article .info a {
border-bottom: none; border-bottom: none;
color: #828282; color: #828282;
} }
.article .info a:hover { .article .info a:hover {
text-decoration: underline; text-decoration: underline;
} }
.story-text { .story-text {
font: 1.2rem/1.5 "Apparatus SIL", sans-serif; font: 1.2rem/1.5 'Apparatus SIL', sans-serif;
margin-top: 1em; margin-top: 1em;
} }
.comments { .comments {
margin-left: -1.25rem; margin-left: -1.25rem;
} }
.comment { .comment {
padding-left: 1.25rem; padding-left: 1.25rem;
} }
.comment.lined { .comment.lined {
border-left: 1px solid #cccccc; border-left: 1px solid #cccccc;
} }
.comment .text { .comment .text {
margin-top: -0.5rem; margin-top: -0.5rem;
} }
.comment .text.hidden > p { .comment .text.hidden > p {
white-space: nowrap; white-space: nowrap;
overflow: hidden; overflow: hidden;
text-overflow: ellipsis; text-overflow: ellipsis;
display: none; display: none;
color: #828282; color: #828282;
} }
.comment .text.hidden > p:first-child { .comment .text.hidden > p:first-child {
display: block; display: block;
} }
.comment .collapser { .comment .collapser {
padding-left: 0.5rem; padding-left: 0.5rem;
padding-right: 1.5rem; padding-right: 1.5rem;
} }
.comment .pointer { .comment .pointer {
cursor: pointer; cursor: pointer;
} }
.toggleDot { .toggleDot {
position: fixed; position: fixed;
bottom: 1rem; bottom: 1rem;
left: 1rem; left: 1rem;
height: 3rem; height: 3rem;
width: 3rem; width: 3rem;
background-color: #828282; background-color: #828282;
border-radius: 50%; border-radius: 50%;
} }
.toggleDot .button { .toggleDot .button {
font: 2rem/1 "icomoon"; font: 2rem/1 'icomoon';
position: relative; position: relative;
top: 0.5rem; top: 0.5rem;
left: 0.55rem; left: 0.55rem;
} }
.forwardDot { .forwardDot {
cursor: pointer; cursor: pointer;
position: fixed; position: fixed;
bottom: 1rem; bottom: 1rem;
right: 1rem; right: 1rem;
height: 3rem; height: 3rem;
width: 3rem; width: 3rem;
background-color: #828282; background-color: #828282;
border-radius: 50%; border-radius: 50%;
} }
.forwardDot .button { .forwardDot .button {
font: 2.5rem/1 "icomoon"; font: 2.5rem/1 'icomoon';
position: relative; position: relative;
top: 0.25rem; top: 0.25rem;
left: 0.3rem; left: 0.3rem;
} }
.search form { .search form {
display: inline; display: inline;
}
.indented {
padding: 0 0 0 1rem;
} }

View File

@ -1,7 +1,7 @@
import React from 'react'; import React from 'react';
import { Helmet } from 'react-helmet'; import { Helmet } from 'react-helmet';
import localForage from 'localforage'; import localForage from 'localforage';
import { sourceLink, infoLine, otherDiscussions, ToggleDot } from '../utils.js'; import { sourceLink, infoLine, ToggleDot } from '../utils.js';
class Article extends React.Component { class Article extends React.Component {
constructor(props) { constructor(props) {
@ -14,7 +14,6 @@ class Article extends React.Component {
this.state = { this.state = {
story: cache[id] || false, story: cache[id] || false,
related: [],
error: false, error: false,
pConv: [], pConv: [],
}; };
@ -23,16 +22,21 @@ class Article extends React.Component {
componentDidMount() { componentDidMount() {
const id = this.props.match ? this.props.match.params.id : 'CLOL'; const id = this.props.match ? this.props.match.params.id : 'CLOL';
localForage.getItem(id).then((value) => value ? this.setState({ story: value }) : null); localForage.getItem(id)
localForage.getItem(`related-${id}`).then((value) => value ? this.setState({ related: value }) : null); .then(
(value) => {
if (value) {
this.setState({ story: value });
}
}
);
fetch('/api/' + id) fetch('/api/' + id)
.then(res => res.json()) .then(res => res.json())
.then( .then(
(result) => { (result) => {
this.setState({ story: result.story, related: result.related }); this.setState({ story: result.story });
localForage.setItem(id, result.story); localForage.setItem(id, result.story);
localForage.setItem(`related-${id}`, result.related);
}, },
(error) => { (error) => {
this.setState({ error: true }); this.setState({ error: true });
@ -47,7 +51,6 @@ class Article extends React.Component {
render() { render() {
const id = this.props.match ? this.props.match.params.id : 'CLOL'; const id = this.props.match ? this.props.match.params.id : 'CLOL';
const story = this.state.story; const story = this.state.story;
const related = this.state.related.filter(r => r.id != id);
const error = this.state.error; const error = this.state.error;
const pConv = this.state.pConv; const pConv = this.state.pConv;
let nodes = null; let nodes = null;
@ -74,7 +77,6 @@ class Article extends React.Component {
</div> </div>
{infoLine(story)} {infoLine(story)}
{otherDiscussions(related)}
{nodes ? {nodes ?
<div className='story-text'> <div className='story-text'>

View File

@ -4,9 +4,9 @@ import { HashLink } from 'react-router-hash-link';
import { Helmet } from 'react-helmet'; import { Helmet } from 'react-helmet';
import moment from 'moment'; import moment from 'moment';
import localForage from 'localforage'; import localForage from 'localforage';
import { infoLine, otherDiscussions, ToggleDot } from '../utils.js'; import { infoLine, ToggleDot } from '../utils.js';
class Comments extends React.Component { class Article extends React.Component {
constructor(props) { constructor(props) {
super(props); super(props);
@ -17,7 +17,6 @@ class Comments extends React.Component {
this.state = { this.state = {
story: cache[id] || false, story: cache[id] || false,
related: [],
error: false, error: false,
collapsed: [], collapsed: [],
expanded: [], expanded: [],
@ -27,21 +26,24 @@ class Comments extends React.Component {
componentDidMount() { componentDidMount() {
const id = this.props.match.params.id; const id = this.props.match.params.id;
localForage.getItem(id).then((value) => this.setState({ story: value })); localForage.getItem(id)
localForage.getItem(`related-${id}`).then((value) => value ? this.setState({ related: value }) : null); .then(
(value) => {
this.setState({ story: value });
}
);
fetch('/api/' + id) fetch('/api/' + id)
.then(res => res.json()) .then(res => res.json())
.then( .then(
(result) => { (result) => {
this.setState({ story: result.story, related: result.related }, () => { this.setState({ story: result.story }, () => {
const hash = window.location.hash.substring(1); const hash = window.location.hash.substring(1);
if (hash) { if (hash) {
document.getElementById(hash).scrollIntoView(); document.getElementById(hash).scrollIntoView();
} }
}); });
localForage.setItem(id, result.story); localForage.setItem(id, result.story);
localForage.setItem(`related-${id}`, result.related);
}, },
(error) => { (error) => {
this.setState({ error: true }); this.setState({ error: true });
@ -108,7 +110,6 @@ class Comments extends React.Component {
render() { render() {
const id = this.props.match.params.id; const id = this.props.match.params.id;
const story = this.state.story; const story = this.state.story;
const related = this.state.related.filter(r => r.id != id);
const error = this.state.error; const error = this.state.error;
return ( return (
@ -127,7 +128,6 @@ class Comments extends React.Component {
</div> </div>
{infoLine(story)} {infoLine(story)}
{otherDiscussions(related)}
<div className='comments'> <div className='comments'>
{story.comments.map(c => this.displayComment(story, c, 0))} {story.comments.map(c => this.displayComment(story, c, 0))}
@ -142,4 +142,4 @@ class Comments extends React.Component {
} }
} }
export default Comments; export default Article;

View File

@ -30,13 +30,10 @@ class Feed extends React.Component {
stories.forEach((x, i) => { stories.forEach((x, i) => {
fetch('/api/' + x.id) fetch('/api/' + x.id)
.then(res => res.json()) .then(res => res.json())
.then(({ story, related }) => { .then(({ story }) => {
Promise.all([ localForage.setItem(x.id, story)
localForage.setItem(x.id, story), .then(console.log('preloaded', x.id, x.title));
localForage.setItem(`related-${x.id}`, related)
]).then(console.log('preloaded', x.id, x.title));
this.props.updateCache(x.id, story); this.props.updateCache(x.id, story);
this.props.updateCache(`related-${x.id}`, related);
}, error => { } }, error => { }
); );
}); });

View File

@ -15,37 +15,18 @@ export const sourceLink = (story) => {
export const infoLine = (story) => ( export const infoLine = (story) => (
<div className="info"> <div className="info">
{story.score} points by {story.author_link ? <a href={story.author_link}>{story.author}</a> : story.author} {story.score} points by <a href={story.author_link}>{story.author}</a>
&#8203; {moment.unix(story.date).fromNow()} &#8203; {moment.unix(story.date).fromNow()}
&#8203; on <a href={story.link}>{story.source}</a> | &#8203; &#8203; on <a href={story.link}>{story.source}</a> | &#8203;
<Link <Link
className={story.num_comments > 99 ? "hot" : ""} className={story.num_comments > 99 ? "hot" : ""}
to={"/" + story.id + "/c"}> to={"/" + story.id + "/c"}
>
{story.num_comments} comment{story.num_comments !== 1 && "s"} {story.num_comments} comment{story.num_comments !== 1 && "s"}
</Link> </Link>
</div> </div>
); );
export const otherDiscussions = (related) => {
const stories = related.filter(r => r.num_comments > 0);
if (!stories.length) {
return null;
}
return (
<div className='related indented info'>
<span>Other discussions: </span>
{stories.map((story, i) =>
<span id={story.id}>
{i !== 0 ? <> &bull; </> : <></>}
<Link className={story.num_comments > 99 ? "hot" : ""} to={"/" + story.id + "/c"}>
{story.source} ({story.num_comments} comment{story.num_comments !== 1 && "s"})
</Link>
</span>
)}
</div>
);
}
export class ToggleDot extends React.Component { export class ToggleDot extends React.Component {
render() { render() {
const id = this.props.id; const id = this.props.id;

File diff suppressed because it is too large Load Diff