From d1c513b9d64b30e7f061d184a8d6e0113e4750cd Mon Sep 17 00:00:00 2001 From: Jason Schwarzenberger Date: Wed, 2 Dec 2020 15:46:06 +1300 Subject: [PATCH] move purify to server side. --- apiserver/misc/news.py | 18 +- apiserver/misc/stuff.py | 3 +- webapp/package.json | 1 + webapp/src/components/Article.svelte | 5 +- webapp/src/components/Comment.svelte | 3 +- webapp/src/components/StoryList.svelte | 3 +- webapp/src/routes/[id].json.js | 10 +- webapp/src/routes/_purify.js | 25 ++ webapp/src/routes/index.json.js | 9 +- webapp/src/routes/search.json.js | 9 +- webapp/yarn.lock | 583 ++++++++++++++++++++++++- 11 files changed, 639 insertions(+), 30 deletions(-) create mode 100644 webapp/src/routes/_purify.js diff --git a/apiserver/misc/news.py b/apiserver/misc/news.py index 5498266..2b0a3b9 100644 --- a/apiserver/misc/news.py +++ b/apiserver/misc/news.py @@ -16,18 +16,10 @@ from misc.time import unix from misc.api import xml import misc.stuff as stuff -def comment(i): - if 'author' not in i: - return False - - c = {} - c['author'] = i.get('author', '') - c['score'] = i.get('points', 0) - c['date'] = unix(i.get('date', 0)) - c['text'] = clean(i.get('text', '') or '') - c['comments'] = [comment(j) for j in i['children']] - c['comments'] = list(filter(bool, c['comments'])) - return c +def clean_comment(comment): + comment['text'] = clean(comment['text']) + comment['comments'] = [clean_comments(c) for c in comment['comments']] + return comment def comment_count(i): alive = 1 if i['author'] else 0 @@ -75,12 +67,14 @@ class Base: data = extruct.extract(markup) s = parse_extruct(s, data) + s['title'] = clean(s['title']) if s['date']: s['date'] = unix(s['date'], tz=self.tz) if 'disqus' in markup: try: s['comments'] = declutter.get_comments(urlref) + s['comments'] = [clean_comments(c) for c in s['comments']] s['comments'] = list(filter(bool, s['comments'])) s['num_comments'] = comment_count(s['comments']) except KeyboardInterrupt: diff --git a/apiserver/misc/stuff.py b/apiserver/misc/stuff.py index b25a688..ae78308 100644 --- a/apiserver/misc/stuff.py +++ b/apiserver/misc/stuff.py @@ -7,6 +7,7 @@ if __name__ == '__main__': from misc.time import unix from misc.api import xml +from utils import clean def _soup_get_text(soup): if not soup: return None @@ -35,7 +36,7 @@ def _parse_comment(soup): if soup.find('link'): c['authorLink'] = _soup_get_text(soup.find('link')) if soup.find('description'): - c['text'] = _soup_get_text(soup.find('description')) + c['text'] = clean(_soup_get_text(soup.find('description'))) if soup.find('pubdate'): c['date'] = unix(soup.find('pubdate').text) elif soup.find('pubDate'): diff --git a/webapp/package.json b/webapp/package.json index 3b0c2dd..71b28aa 100644 --- a/webapp/package.json +++ b/webapp/package.json @@ -13,6 +13,7 @@ "date-fns": "^2.16.1", "dompurify": "^2.2.2", "isomorphic-fetch": "^3.0.0", + "jsdom": "^16.4.0", "lodash": "^4.17.20", "node-fetch": "^2.6.1", "polka": "next", diff --git a/webapp/src/components/Article.svelte b/webapp/src/components/Article.svelte index 5413db8..9cbc8bc 100644 --- a/webapp/src/components/Article.svelte +++ b/webapp/src/components/Article.svelte @@ -1,6 +1,5 @@