Compare commits
109 Commits
master
...
2439c113b3
Author | SHA1 | Date | |
---|---|---|---|
|
2439c113b3 | ||
|
0f5e28136d | ||
|
bb1413b586 | ||
|
0a27c0da1f | ||
|
fe01ea52e5 | ||
|
3daae5fa1b | ||
|
25caee17d6 | ||
|
c1b6349771 | ||
|
54a4c7e55a | ||
|
b12a3570b0 | ||
|
0bfa920654 | ||
|
9341b4d966 | ||
|
a2e5faa3b5 | ||
|
a86eb98c1a | ||
|
abf7f0a802 | ||
|
d288546d6f | ||
|
cc130942ca | ||
|
f0b14408d4 | ||
|
e1830a589b | ||
|
32bc3b906b | ||
|
f5e65632b8 | ||
|
1fe524207e | ||
|
dc3d17b171 | ||
|
539350a83d | ||
|
2f730c1f52 | ||
|
e0960d59f3 | ||
|
f5b38f5c6b | ||
|
c9da2a078b | ||
|
78654e0c63 | ||
|
3b885e4327 | ||
|
55d50a86d8 | ||
|
55e7f6bb14 | ||
|
5668fa5dbc | ||
|
b771b52501 | ||
|
f5c7a658ba | ||
|
f5ccd844da | ||
|
6a91b9402f | ||
|
b80c1a5cb5 | ||
|
b23e470317 | ||
|
7420b5ece9 | ||
|
64ced635cc | ||
|
9318627f1b | ||
|
3d0a3f1577 | ||
|
587b10c438 | ||
|
00954c6cac | ||
|
637bc38476 | ||
|
164b7e72c4 | ||
|
3169af3002 | ||
|
d588a60930 | ||
|
408e2870b2 | ||
|
44b8b36547 | ||
|
4f49684194 | ||
|
1d78b1c592 | ||
|
0374794536 | ||
|
943a1cfa4f | ||
|
9cee370a25 | ||
|
5efc6ef2d3 | ||
|
4ec50e20cb | ||
|
c1b7877f4b | ||
|
7b8cbfc9b9 | ||
|
bfa4108a8e | ||
|
0bd0d40a31 | ||
|
4e04595415 | ||
|
006db2960c | ||
|
1f063f0dac | ||
|
1658346aa9 | ||
|
2dbc702b40 | ||
|
1c4764e67d | ||
|
ee49d2021e | ||
|
c391c50ab1 | ||
|
095f0d549a | ||
|
c21c71667e | ||
|
c3a2c91a11 | ||
|
0f39446a61 | ||
|
351059aab1 | ||
|
4488e2c292 | ||
|
afda5b635c | ||
|
0fc1a44d2b | ||
|
9fff1b9e46 | ||
|
16b59f6c67 | ||
|
939f4775a7 | ||
|
9bfc6fc6fa | ||
|
6ea9844d00 | ||
|
1318259d3d | ||
|
98a0c2257c | ||
|
e6976db25d | ||
|
9edc8b7cca | ||
|
33e21e7f30 | ||
|
892a99eca6 | ||
|
d718d05a04 | ||
|
d1795eb1b8 | ||
|
9f4ff4acf0 | ||
|
db6aad84ec | ||
|
29f8a8b8cc | ||
|
abf8589e02 | ||
|
b759f46582 | ||
|
736cdc8576 | ||
|
244d416f6e | ||
|
5f98a2e76a | ||
|
0567cdfd9b | ||
|
4f90671cec | ||
|
e63a1456a5 | ||
|
76f1d57702 | ||
|
de80389ed0 | ||
|
4e64cf682a | ||
|
c5fe5d25a0 | ||
|
283a2b1545 | ||
|
0d6a86ace2 | ||
|
f23bf628e0 |
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
[submodule "readerserver"]
|
||||||
|
path = readerserver
|
||||||
|
url = https://github.com/master5o1/declutter.git
|
20
README.md
20
README.md
@@ -20,7 +20,7 @@ $ sudo apt install yarn
|
|||||||
Clone this repo:
|
Clone this repo:
|
||||||
|
|
||||||
```text
|
```text
|
||||||
$ git clone https://gogs.tannercollin.com/tanner/qotnews.git
|
$ git clone --recurse-submodules https://git.1j.nz/jason/qotnews.git
|
||||||
$ cd qotnews
|
$ cd qotnews
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -37,14 +37,14 @@ $ source env/bin/activate
|
|||||||
|
|
||||||
Configure Praw for your Reddit account (optional):
|
Configure Praw for your Reddit account (optional):
|
||||||
|
|
||||||
* Go to https://www.reddit.com/prefs/apps
|
- Go to https://www.reddit.com/prefs/apps
|
||||||
* Click "Create app"
|
- Click "Create app"
|
||||||
* Name: whatever
|
- Name: whatever
|
||||||
* App type: script
|
- App type: script
|
||||||
* Description: blank
|
- Description: blank
|
||||||
* About URL: blank
|
- About URL: blank
|
||||||
* Redirect URL: your GitHub profile
|
- Redirect URL: your GitHub profile
|
||||||
* Submit, copy the client ID and client secret into `settings.py` below
|
- Submit, copy the client ID and client secret into `settings.py` below
|
||||||
|
|
||||||
```text
|
```text
|
||||||
(env) $ vim settings.py.example
|
(env) $ vim settings.py.example
|
||||||
@@ -109,7 +109,7 @@ stdout_logfile_maxbytes=1MB
|
|||||||
[program:qotnewsreader]
|
[program:qotnewsreader]
|
||||||
user=qotnews
|
user=qotnews
|
||||||
directory=/home/qotnews/qotnews/readerserver
|
directory=/home/qotnews/qotnews/readerserver
|
||||||
command=node main.js
|
command=node index.js
|
||||||
autostart=true
|
autostart=true
|
||||||
autorestart=true
|
autorestart=true
|
||||||
stderr_logfile=/var/log/qotnewsreader.log
|
stderr_logfile=/var/log/qotnewsreader.log
|
||||||
|
@@ -1,9 +1,9 @@
|
|||||||
import json
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
from sqlalchemy import create_engine, Column, String, ForeignKey, Integer
|
from sqlalchemy import create_engine, Column, String, ForeignKey, Integer
|
||||||
from sqlalchemy.ext.declarative import declarative_base
|
from sqlalchemy.ext.declarative import declarative_base
|
||||||
from sqlalchemy.orm import sessionmaker
|
from sqlalchemy.orm import sessionmaker
|
||||||
from sqlalchemy.exc import IntegrityError
|
from sqlalchemy.exc import IntegrityError
|
||||||
|
from sqlalchemy.types import JSON
|
||||||
|
|
||||||
engine = create_engine('sqlite:///data/qotnews.sqlite')
|
engine = create_engine('sqlite:///data/qotnews.sqlite')
|
||||||
Session = sessionmaker(bind=engine)
|
Session = sessionmaker(bind=engine)
|
||||||
@@ -15,8 +15,8 @@ class Story(Base):
|
|||||||
|
|
||||||
sid = Column(String(16), primary_key=True)
|
sid = Column(String(16), primary_key=True)
|
||||||
ref = Column(String(16), unique=True)
|
ref = Column(String(16), unique=True)
|
||||||
meta_json = Column(String)
|
meta = Column(JSON)
|
||||||
full_json = Column(String)
|
data = Column(JSON)
|
||||||
title = Column(String)
|
title = Column(String)
|
||||||
|
|
||||||
class Reflist(Base):
|
class Reflist(Base):
|
||||||
@@ -24,6 +24,7 @@ class Reflist(Base):
|
|||||||
|
|
||||||
rid = Column(Integer, primary_key=True)
|
rid = Column(Integer, primary_key=True)
|
||||||
ref = Column(String(16), unique=True)
|
ref = Column(String(16), unique=True)
|
||||||
|
urlref = Column(String)
|
||||||
sid = Column(String, ForeignKey('stories.sid'), unique=True)
|
sid = Column(String, ForeignKey('stories.sid'), unique=True)
|
||||||
source = Column(String(16))
|
source = Column(String(16))
|
||||||
|
|
||||||
@@ -36,19 +37,21 @@ def get_story(sid):
|
|||||||
|
|
||||||
def put_story(story):
|
def put_story(story):
|
||||||
story = story.copy()
|
story = story.copy()
|
||||||
full_json = json.dumps(story)
|
data = {}
|
||||||
|
data.update(story)
|
||||||
|
|
||||||
story.pop('text', None)
|
meta = {}
|
||||||
story.pop('comments', None)
|
meta.update(story)
|
||||||
meta_json = json.dumps(story)
|
meta.pop('text', None)
|
||||||
|
meta.pop('comments', None)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
session = Session()
|
session = Session()
|
||||||
s = Story(
|
s = Story(
|
||||||
sid=story['id'],
|
sid=story['id'],
|
||||||
ref=story['ref'],
|
ref=story['ref'],
|
||||||
full_json=full_json,
|
data=data,
|
||||||
meta_json=meta_json,
|
meta=meta,
|
||||||
title=story.get('title', None),
|
title=story.get('title', None),
|
||||||
)
|
)
|
||||||
session.merge(s)
|
session.merge(s)
|
||||||
@@ -63,24 +66,39 @@ def get_story_by_ref(ref):
|
|||||||
session = Session()
|
session = Session()
|
||||||
return session.query(Story).filter(Story.ref==ref).first()
|
return session.query(Story).filter(Story.ref==ref).first()
|
||||||
|
|
||||||
def get_reflist(amount):
|
def get_stories_by_url(url):
|
||||||
session = Session()
|
session = Session()
|
||||||
q = session.query(Reflist).order_by(Reflist.rid.desc()).limit(amount)
|
return session.query(Story).\
|
||||||
return [dict(ref=x.ref, sid=x.sid, source=x.source) for x in q.all()]
|
filter(Story.title != None).\
|
||||||
|
filter(Story.meta['url'].as_string() == url).\
|
||||||
|
order_by(Story.meta['date'].desc())
|
||||||
|
|
||||||
def get_stories(amount):
|
def get_ref_by_sid(sid):
|
||||||
session = Session()
|
session = Session()
|
||||||
q = session.query(Reflist, Story.meta_json).\
|
x = session.query(Reflist).\
|
||||||
order_by(Reflist.rid.desc()).\
|
filter(Reflist.sid == sid).\
|
||||||
|
first()
|
||||||
|
return dict(ref=x.ref, sid=x.sid, source=x.source, urlref=x.urlref)
|
||||||
|
|
||||||
|
def get_reflist():
|
||||||
|
session = Session()
|
||||||
|
q = session.query(Reflist).order_by(Reflist.rid.desc())
|
||||||
|
return [dict(ref=x.ref, sid=x.sid, source=x.source, urlref=x.urlref) for x in q.all()]
|
||||||
|
|
||||||
|
def get_stories(maxage=60*60*24*2):
|
||||||
|
time = datetime.now().timestamp() - maxage
|
||||||
|
session = Session()
|
||||||
|
q = session.query(Reflist, Story.meta).\
|
||||||
join(Story).\
|
join(Story).\
|
||||||
filter(Story.title != None).\
|
filter(Story.title != None).\
|
||||||
limit(amount)
|
filter(Story.meta['date'].as_integer() > time).\
|
||||||
|
order_by(Story.meta['date'].desc())
|
||||||
return [x[1] for x in q]
|
return [x[1] for x in q]
|
||||||
|
|
||||||
def put_ref(ref, sid, source):
|
def put_ref(ref, sid, source, urlref):
|
||||||
try:
|
try:
|
||||||
session = Session()
|
session = Session()
|
||||||
r = Reflist(ref=ref, sid=sid, source=source)
|
r = Reflist(ref=ref, sid=sid, source=source, urlref=urlref)
|
||||||
session.add(r)
|
session.add(r)
|
||||||
session.commit()
|
session.commit()
|
||||||
except:
|
except:
|
||||||
|
@@ -6,61 +6,84 @@ logging.basicConfig(
|
|||||||
import requests
|
import requests
|
||||||
import time
|
import time
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
import itertools
|
||||||
|
|
||||||
import settings
|
import settings
|
||||||
from feeds import hackernews, reddit, tildes, manual
|
from feeds import hackernews, reddit, tildes, substack, manual
|
||||||
|
from feeds.sitemap import Sitemap
|
||||||
OUTLINE_API = 'https://api.outline.com/v3/parse_article'
|
from feeds.category import Category
|
||||||
READ_API = 'http://127.0.0.1:33843'
|
from scrapers import outline, declutter, headless, simple
|
||||||
|
|
||||||
INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com']
|
INVALID_DOMAINS = ['youtube.com', 'bloomberg.com', 'wsj.com']
|
||||||
TWO_DAYS = 60*60*24*2
|
|
||||||
|
|
||||||
def list():
|
substacks = {}
|
||||||
feed = []
|
for key, value in settings.SUBSTACK.items():
|
||||||
|
substacks[key] = substack.Publication(value['url'])
|
||||||
|
categories = {}
|
||||||
|
for key, value in settings.CATEGORY.items():
|
||||||
|
categories[key] = Category(value)
|
||||||
|
sitemaps = {}
|
||||||
|
for key, value in settings.SITEMAP.items():
|
||||||
|
sitemaps[key] = Sitemap(value)
|
||||||
|
|
||||||
|
def get_list():
|
||||||
|
feeds = {}
|
||||||
|
|
||||||
if settings.NUM_HACKERNEWS:
|
if settings.NUM_HACKERNEWS:
|
||||||
feed += [(x, 'hackernews') for x in hackernews.feed()[:settings.NUM_HACKERNEWS]]
|
feeds['hackernews'] = [(x, 'hackernews', x) for x in hackernews.feed()[:settings.NUM_HACKERNEWS]]
|
||||||
|
|
||||||
if settings.NUM_REDDIT:
|
if settings.NUM_REDDIT:
|
||||||
feed += [(x, 'reddit') for x in reddit.feed()[:settings.NUM_REDDIT]]
|
feeds['reddit'] = [(x, 'reddit', x) for x in reddit.feed()[:settings.NUM_REDDIT]]
|
||||||
|
|
||||||
if settings.NUM_TILDES:
|
if settings.NUM_TILDES:
|
||||||
feed += [(x, 'tildes') for x in tildes.feed()[:settings.NUM_TILDES]]
|
feeds['tildes'] = [(x, 'tildes', x) for x in tildes.feed()[:settings.NUM_TILDES]]
|
||||||
|
|
||||||
|
if settings.NUM_SUBSTACK:
|
||||||
|
feeds['substack'] = [(x, 'substack', x) for x in substack.top.feed()[:settings.NUM_SUBSTACK]]
|
||||||
|
|
||||||
|
for key, publication in substacks.items():
|
||||||
|
count = settings.SUBSTACK[key]['count']
|
||||||
|
feeds[key] = [(x, key, x) for x in publication.feed()[:count]]
|
||||||
|
|
||||||
|
for key, sites in categories.items():
|
||||||
|
count = settings.CATEGORY[key].get('count') or 0
|
||||||
|
excludes = settings.CATEGORY[key].get('excludes')
|
||||||
|
tz = settings.CATEGORY[key].get('tz')
|
||||||
|
feeds[key] = [(x, key, u) for x, u in sites.feed(excludes)[:count]]
|
||||||
|
|
||||||
|
for key, sites in sitemaps.items():
|
||||||
|
count = settings.SITEMAP[key].get('count') or 0
|
||||||
|
excludes = settings.SITEMAP[key].get('excludes')
|
||||||
|
feeds[key] = [(x, key, u) for x, u in sites.feed(excludes)[:count]]
|
||||||
|
|
||||||
|
values = feeds.values()
|
||||||
|
feed = itertools.chain.from_iterable(itertools.zip_longest(*values, fillvalue=None))
|
||||||
|
feed = list(filter(None, feed))
|
||||||
return feed
|
return feed
|
||||||
|
|
||||||
def get_article(url):
|
def get_article(url):
|
||||||
try:
|
scrapers = {
|
||||||
params = {'source_url': url}
|
'headless': headless,
|
||||||
headers = {'Referer': 'https://outline.com/'}
|
'simple': simple,
|
||||||
r = requests.get(OUTLINE_API, params=params, headers=headers, timeout=20)
|
'outline': outline,
|
||||||
if r.status_code == 429:
|
'declutter': declutter,
|
||||||
logging.info('Rate limited by outline, sleeping 30s and skipping...')
|
}
|
||||||
time.sleep(30)
|
available = settings.SCRAPERS or ['headless', 'simple']
|
||||||
return ''
|
if 'simple' not in available:
|
||||||
if r.status_code != 200:
|
available += ['simple']
|
||||||
raise Exception('Bad response code ' + str(r.status_code))
|
|
||||||
html = r.json()['data']['html']
|
|
||||||
if 'URL is not supported by Outline' in html:
|
|
||||||
raise Exception('URL not supported by Outline')
|
|
||||||
return html
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
raise
|
|
||||||
except BaseException as e:
|
|
||||||
logging.error('Problem outlining article: {}'.format(str(e)))
|
|
||||||
|
|
||||||
logging.info('Trying our server instead...')
|
for scraper in available:
|
||||||
|
if scraper not in scrapers.keys():
|
||||||
try:
|
continue
|
||||||
r = requests.post(READ_API, data=dict(url=url), timeout=20)
|
try:
|
||||||
if r.status_code != 200:
|
html = scrapers[scraper].get_html(url)
|
||||||
raise Exception('Bad response code ' + str(r.status_code))
|
if html:
|
||||||
return r.text
|
return html
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
raise
|
raise
|
||||||
except BaseException as e:
|
except:
|
||||||
logging.error('Problem getting article: {}'.format(str(e)))
|
pass
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
def get_content_type(url):
|
def get_content_type(url):
|
||||||
try:
|
try:
|
||||||
@@ -78,7 +101,7 @@ def get_content_type(url):
|
|||||||
except:
|
except:
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
def update_story(story, is_manual=False):
|
def update_story(story, is_manual=False, urlref=None):
|
||||||
res = {}
|
res = {}
|
||||||
|
|
||||||
if story['source'] == 'hackernews':
|
if story['source'] == 'hackernews':
|
||||||
@@ -87,6 +110,14 @@ def update_story(story, is_manual=False):
|
|||||||
res = reddit.story(story['ref'])
|
res = reddit.story(story['ref'])
|
||||||
elif story['source'] == 'tildes':
|
elif story['source'] == 'tildes':
|
||||||
res = tildes.story(story['ref'])
|
res = tildes.story(story['ref'])
|
||||||
|
elif story['source'] == 'substack':
|
||||||
|
res = substack.top.story(story['ref'])
|
||||||
|
elif story['source'] in categories.keys():
|
||||||
|
res = categories[story['source']].story(story['ref'], urlref)
|
||||||
|
elif story['source'] in sitemaps.keys():
|
||||||
|
res = sitemaps[story['source']].story(story['ref'], urlref)
|
||||||
|
elif story['source'] in substacks.keys():
|
||||||
|
res = substacks[story['source']].story(story['ref'])
|
||||||
elif story['source'] == 'manual':
|
elif story['source'] == 'manual':
|
||||||
res = manual.story(story['ref'])
|
res = manual.story(story['ref'])
|
||||||
|
|
||||||
@@ -96,7 +127,7 @@ def update_story(story, is_manual=False):
|
|||||||
logging.info('Story not ready yet')
|
logging.info('Story not ready yet')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if story['date'] and not is_manual and story['date'] + TWO_DAYS < time.time():
|
if story['date'] and not is_manual and story['date'] + settings.MAX_STORY_AGE < time.time():
|
||||||
logging.info('Story too old, removing')
|
logging.info('Story too old, removing')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
72
apiserver/feeds/category.py
Normal file
72
apiserver/feeds/category.py
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
import logging
|
||||||
|
logging.basicConfig(
|
||||||
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||||
|
level=logging.DEBUG)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0,'.')
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
import settings
|
||||||
|
from utils import clean
|
||||||
|
from misc.api import xml
|
||||||
|
from misc.news import Base
|
||||||
|
|
||||||
|
def _filter_links(links, category_url, excludes=None):
|
||||||
|
links = list(filter(None, [link if link.startswith(category_url) else None for link in links]))
|
||||||
|
links = list(filter(None, [link if link != category_url else None for link in links]))
|
||||||
|
links = list(set(links))
|
||||||
|
if excludes:
|
||||||
|
links = list(filter(None, [None if any(e in link for e in excludes) else link for link in links]))
|
||||||
|
return links
|
||||||
|
|
||||||
|
def _get_category(category_url, excludes=None):
|
||||||
|
base_url = '/'.join(category_url.split('/')[:3])
|
||||||
|
markup = xml(lambda x: category_url)
|
||||||
|
if not markup: return []
|
||||||
|
soup = BeautifulSoup(markup, features='html.parser')
|
||||||
|
links = soup.find_all('a', href=True)
|
||||||
|
links = [link.get('href') for link in links]
|
||||||
|
links = [f"{base_url}{link}" if link.startswith('/') else link for link in links]
|
||||||
|
links = _filter_links(links, category_url, excludes)
|
||||||
|
return links
|
||||||
|
|
||||||
|
class Category(Base):
|
||||||
|
def __init__(self, config):
|
||||||
|
self.config = config
|
||||||
|
self.category_url = config.get('url')
|
||||||
|
self.tz = config.get('tz')
|
||||||
|
|
||||||
|
def feed(self, excludes=None):
|
||||||
|
links = []
|
||||||
|
if isinstance(self.category_url, str):
|
||||||
|
links += _get_category(self.category_url, excludes)
|
||||||
|
elif isinstance(self.category_url, list):
|
||||||
|
for url in self.category_url:
|
||||||
|
links += _get_category(url, excludes)
|
||||||
|
links = list(set(links))
|
||||||
|
return [(self.get_id(link), link) for link in links]
|
||||||
|
|
||||||
|
|
||||||
|
# scratchpad so I can quickly develop the parser
|
||||||
|
if __name__ == '__main__':
|
||||||
|
print("Category: RadioNZ")
|
||||||
|
site = Category({ 'url': "https://www.rnz.co.nz/news/" })
|
||||||
|
excludes = [
|
||||||
|
'rnz.co.nz/news/sport',
|
||||||
|
'rnz.co.nz/weather',
|
||||||
|
'rnz.co.nz/news/weather',
|
||||||
|
]
|
||||||
|
posts = site.feed(excludes)
|
||||||
|
print(posts[:5])
|
||||||
|
print(site.story(posts[0][0], posts[0][1]))
|
||||||
|
|
||||||
|
print("Category: Newsroom")
|
||||||
|
site = Category({ 'url': "https://www.newsroom.co.nz/news/", 'tz': 'Pacific/Auckland'})
|
||||||
|
posts = site.feed()
|
||||||
|
print(posts[:5])
|
||||||
|
print(site.story(posts[0][0], posts[0][1]))
|
||||||
|
|
||||||
|
|
@@ -40,7 +40,7 @@ def api(route, ref=None):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def feed():
|
def feed():
|
||||||
return [str(x) for x in api(API_TOPSTORIES) or []]
|
return ['hn:'+str(x) for x in api(API_TOPSTORIES) or []]
|
||||||
|
|
||||||
def comment(i):
|
def comment(i):
|
||||||
if 'author' not in i:
|
if 'author' not in i:
|
||||||
@@ -60,6 +60,7 @@ def comment_count(i):
|
|||||||
return sum([comment_count(c) for c in i['comments']]) + alive
|
return sum([comment_count(c) for c in i['comments']]) + alive
|
||||||
|
|
||||||
def story(ref):
|
def story(ref):
|
||||||
|
ref = ref.replace('hn:', '')
|
||||||
r = api(API_ITEM, ref)
|
r = api(API_ITEM, ref)
|
||||||
if not r: return False
|
if not r: return False
|
||||||
|
|
||||||
|
@@ -7,6 +7,8 @@ import requests
|
|||||||
import time
|
import time
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
import settings
|
||||||
|
|
||||||
USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'
|
USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0'
|
||||||
|
|
||||||
def api(route):
|
def api(route):
|
||||||
@@ -33,7 +35,7 @@ def story(ref):
|
|||||||
|
|
||||||
s = {}
|
s = {}
|
||||||
s['author'] = 'manual submission'
|
s['author'] = 'manual submission'
|
||||||
s['author_link'] = 'https://news.t0.vc'
|
s['author_link'] = 'https://{}'.format(settings.HOSTNAME)
|
||||||
s['score'] = 0
|
s['score'] = 0
|
||||||
s['date'] = int(time.time())
|
s['date'] = int(time.time())
|
||||||
s['title'] = str(soup.title.string) if soup.title else ref
|
s['title'] = str(soup.title.string) if soup.title else ref
|
||||||
|
@@ -73,7 +73,7 @@ def story(ref):
|
|||||||
s['comments'] = list(filter(bool, s['comments']))
|
s['comments'] = list(filter(bool, s['comments']))
|
||||||
s['num_comments'] = r.num_comments
|
s['num_comments'] = r.num_comments
|
||||||
|
|
||||||
if s['score'] < 25 and s['num_comments'] < 10:
|
if s['score'] < settings.REDDIT_SCORE_THRESHOLD and s['num_comments'] < settings.REDDIT_COMMENT_THRESHOLD:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if r.selftext:
|
if r.selftext:
|
||||||
|
101
apiserver/feeds/sitemap.py
Normal file
101
apiserver/feeds/sitemap.py
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
import logging
|
||||||
|
logging.basicConfig(
|
||||||
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||||
|
level=logging.DEBUG)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0,'.')
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
import settings
|
||||||
|
from utils import clean
|
||||||
|
from misc.time import unix
|
||||||
|
from misc.api import xml
|
||||||
|
from misc.news import Base
|
||||||
|
|
||||||
|
def _get_sitemap_date(a):
|
||||||
|
if a.find('lastmod'):
|
||||||
|
return a.find('lastmod').text
|
||||||
|
if a.find('news:publication_date'):
|
||||||
|
return a.find('news:publication_date').text
|
||||||
|
if a.find('ns2:publication_date'):
|
||||||
|
return a.find('ns2:publication_date').text
|
||||||
|
return ''
|
||||||
|
|
||||||
|
def _filter_links(links, excludes=None):
|
||||||
|
too_old = datetime.now().timestamp() - settings.MAX_STORY_AGE
|
||||||
|
links = list(filter(None, [a if _get_sitemap_date(a) else None for a in links]))
|
||||||
|
links = list(filter(None, [a if unix(_get_sitemap_date(a)) > too_old else None for a in links]))
|
||||||
|
links.sort(key=lambda a: unix(_get_sitemap_date(a)), reverse=True)
|
||||||
|
|
||||||
|
links = [x.find('loc').text for x in links] or []
|
||||||
|
links = list(set(links))
|
||||||
|
if excludes:
|
||||||
|
links = list(filter(None, [None if any(e in link for e in excludes) else link for link in links]))
|
||||||
|
return links
|
||||||
|
|
||||||
|
def _get_sitemap(feed_url, excludes=None):
|
||||||
|
markup = xml(lambda x: feed_url)
|
||||||
|
if not markup: return []
|
||||||
|
soup = BeautifulSoup(markup, features='lxml')
|
||||||
|
links = []
|
||||||
|
feed_urls = []
|
||||||
|
if soup.find('sitemapindex'):
|
||||||
|
sitemap = soup.find('sitemapindex').findAll('sitemap')
|
||||||
|
feed_urls = list(filter(None, [a if a.find('loc') else None for a in sitemap]))
|
||||||
|
if soup.find('urlset'):
|
||||||
|
sitemap = soup.find('urlset').findAll('url')
|
||||||
|
links = list(filter(None, [a if a.find('loc') else None for a in sitemap]))
|
||||||
|
|
||||||
|
feed_urls = _filter_links(feed_urls, excludes)
|
||||||
|
links = _filter_links(links, excludes)
|
||||||
|
|
||||||
|
for url in feed_urls:
|
||||||
|
links += _get_sitemap(url, excludes)
|
||||||
|
return list(set(links))
|
||||||
|
|
||||||
|
class Sitemap(Base):
|
||||||
|
def __init__(self, config):
|
||||||
|
self.config = config
|
||||||
|
self.sitemap_url = config.get('url')
|
||||||
|
self.tz = config.get('tz')
|
||||||
|
|
||||||
|
def feed(self, excludes=None):
|
||||||
|
links = []
|
||||||
|
if isinstance(self.sitemap_url, str):
|
||||||
|
links += _get_sitemap(self.sitemap_url, excludes)
|
||||||
|
elif isinstance(self.sitemap_url, list):
|
||||||
|
for url in self.sitemap_url:
|
||||||
|
links += _get_sitemap(url, excludes)
|
||||||
|
links = list(set(links))
|
||||||
|
return [(self.get_id(link), link) for link in links]
|
||||||
|
|
||||||
|
# scratchpad so I can quickly develop the parser
|
||||||
|
if __name__ == '__main__':
|
||||||
|
print("Sitemap: The Spinoff")
|
||||||
|
site = Sitemap({ 'url': "https://thespinoff.co.nz/sitemap.xml" })
|
||||||
|
excludes = [
|
||||||
|
'thespinoff.co.nz/sitemap-misc.xml',
|
||||||
|
'thespinoff.co.nz/sitemap-authors.xml',
|
||||||
|
'thespinoff.co.nz/sitemap-tax-category.xml',
|
||||||
|
]
|
||||||
|
posts = site.feed(excludes)
|
||||||
|
print(posts[:5])
|
||||||
|
print(site.story(posts[0][0], posts[0][1]))
|
||||||
|
|
||||||
|
print("Sitemap: Newshub")
|
||||||
|
site = Sitemap({
|
||||||
|
'url': [
|
||||||
|
'https://www.newshub.co.nz/home/politics.gnewssitemap.xml',
|
||||||
|
'https://www.newshub.co.nz/home/new-zealand.gnewssitemap.xml',
|
||||||
|
'https://www.newshub.co.nz/home/world.gnewssitemap.xml',
|
||||||
|
'https://www.newshub.co.nz/home/money.gnewssitemap.xml',
|
||||||
|
],
|
||||||
|
})
|
||||||
|
posts = site.feed()
|
||||||
|
print(posts[:5])
|
||||||
|
print(site.story(posts[0][0], posts[0][1]))
|
||||||
|
|
174
apiserver/feeds/substack.py
Normal file
174
apiserver/feeds/substack.py
Normal file
@@ -0,0 +1,174 @@
|
|||||||
|
import logging
|
||||||
|
logging.basicConfig(
|
||||||
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||||
|
level=logging.DEBUG)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0,'.')
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
import settings
|
||||||
|
from misc.time import unix
|
||||||
|
from misc.metadata import get_icons
|
||||||
|
from misc.api import xml, json
|
||||||
|
from utils import clean
|
||||||
|
|
||||||
|
SUBSTACK_REFERER = 'https://substack.com'
|
||||||
|
SUBSTACK_API_TOP_POSTS = lambda x: "https://substack.com/api/v1/reader/top-posts"
|
||||||
|
|
||||||
|
def author_link(author_id, base_url):
|
||||||
|
return f"{base_url}/people/{author_id}"
|
||||||
|
def api_comments(post_id, base_url):
|
||||||
|
return f"{base_url}/api/v1/post/{post_id}/comments?all_comments=true&sort=best_first"
|
||||||
|
def api_stories(x, base_url):
|
||||||
|
return f"{base_url}/api/v1/archive?sort=new&search=&offset=0&limit=100"
|
||||||
|
|
||||||
|
def comment(i):
|
||||||
|
if 'body' not in i:
|
||||||
|
return False
|
||||||
|
|
||||||
|
c = {}
|
||||||
|
c['date'] = unix(i.get('date'))
|
||||||
|
c['author'] = i.get('name', '')
|
||||||
|
c['score'] = i.get('reactions').get('❤')
|
||||||
|
c['text'] = clean(i.get('body', '') or '')
|
||||||
|
c['comments'] = [comment(j) for j in i['children']]
|
||||||
|
c['comments'] = list(filter(bool, c['comments']))
|
||||||
|
|
||||||
|
return c
|
||||||
|
|
||||||
|
class Publication:
|
||||||
|
def __init__(self, domain):
|
||||||
|
self.BASE_DOMAIN = domain
|
||||||
|
|
||||||
|
def ref_prefix(self, ref):
|
||||||
|
return f"{self.BASE_DOMAIN}/#id:{ref}"
|
||||||
|
|
||||||
|
def strip_ref_prefix(self, ref):
|
||||||
|
return ref.replace(f"{self.BASE_DOMAIN}/#id:", '')
|
||||||
|
|
||||||
|
def feed(self):
|
||||||
|
too_old = datetime.now().timestamp() - settings.MAX_STORY_AGE
|
||||||
|
stories = json(lambda x: api_stories(x, self.BASE_DOMAIN), headers={'Referer': self.BASE_DOMAIN})
|
||||||
|
if not stories: return []
|
||||||
|
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
|
||||||
|
stories = list(filter(None, [i if unix(i.get('post_date')) > too_old else None for i in stories]))
|
||||||
|
stories.sort(key=lambda a: unix(a.get('post_date')), reverse=True)
|
||||||
|
|
||||||
|
return [self.ref_prefix(str(i.get("id"))) for i in stories or []]
|
||||||
|
|
||||||
|
def story(self, ref):
|
||||||
|
ref = self.strip_ref_prefix(ref)
|
||||||
|
stories = json(lambda x: api_stories(x, self.BASE_DOMAIN), headers={'Referer': self.BASE_DOMAIN})
|
||||||
|
if not stories: return False
|
||||||
|
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
|
||||||
|
stories = list(filter(None, [i if str(i.get('id')) == ref else None for i in stories]))
|
||||||
|
|
||||||
|
if len(stories) == 0:
|
||||||
|
return False
|
||||||
|
|
||||||
|
r = stories[0]
|
||||||
|
if not r:
|
||||||
|
return False
|
||||||
|
|
||||||
|
s = {}
|
||||||
|
s['author'] = ''
|
||||||
|
s['author_link'] = ''
|
||||||
|
|
||||||
|
s['date'] = unix(r.get('post_date'))
|
||||||
|
s['score'] = r.get('reactions').get('❤')
|
||||||
|
s['title'] = r.get('title', '')
|
||||||
|
s['link'] = r.get('canonical_url', '')
|
||||||
|
s['url'] = r.get('canonical_url', '')
|
||||||
|
comments = json(lambda x: api_comments(x, self.BASE_DOMAIN), r.get('id'), headers={'Referer': self.BASE_DOMAIN})
|
||||||
|
s['comments'] = [comment(i) for i in comments.get('comments')]
|
||||||
|
s['comments'] = list(filter(bool, s['comments']))
|
||||||
|
s['num_comments'] = r.get('comment_count', 0)
|
||||||
|
|
||||||
|
authors = list(filter(None, [self._bylines(byline) for byline in r.get('publishedBylines')]))
|
||||||
|
if len(authors):
|
||||||
|
s['author'] = authors[0].get('name')
|
||||||
|
s['author_link'] = authors[0].get('link')
|
||||||
|
|
||||||
|
markup = xml(lambda x: s['link'])
|
||||||
|
if markup:
|
||||||
|
icons = get_icons(markup, url=s['link'])
|
||||||
|
if icons:
|
||||||
|
s['icon'] = icons[0]
|
||||||
|
|
||||||
|
return s
|
||||||
|
|
||||||
|
def _bylines(self, b):
|
||||||
|
if 'id' not in b:
|
||||||
|
return None
|
||||||
|
a = {}
|
||||||
|
a['name'] = b.get('name')
|
||||||
|
a['link'] = author_link(b.get('id'), self.BASE_DOMAIN)
|
||||||
|
return a
|
||||||
|
|
||||||
|
|
||||||
|
class Top:
|
||||||
|
def ref_prefix(self, base_url, ref):
|
||||||
|
return f"{base_url}/#id:{ref}"
|
||||||
|
|
||||||
|
def strip_ref_prefix(self, ref):
|
||||||
|
if '/#id:' in ref:
|
||||||
|
base_url, item = ref.split(f"/#id:")
|
||||||
|
return item
|
||||||
|
return ref
|
||||||
|
|
||||||
|
def feed(self):
|
||||||
|
too_old = datetime.now().timestamp() - settings.MAX_STORY_AGE
|
||||||
|
stories = json(SUBSTACK_API_TOP_POSTS, headers={'Referer': SUBSTACK_REFERER})
|
||||||
|
if not stories: return []
|
||||||
|
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
|
||||||
|
stories = list(filter(None, [i if unix(i.get('post_date')) > too_old else None for i in stories]))
|
||||||
|
stories.sort(key=lambda a: unix(a.get('post_date')), reverse=True)
|
||||||
|
stories = [self.ref_prefix(str(i.get("pub").get("base_url")), str(i.get("id"))) for i in stories]
|
||||||
|
return stories
|
||||||
|
|
||||||
|
def story(self, ref):
|
||||||
|
ref = self.strip_ref_prefix(ref)
|
||||||
|
stories = json(SUBSTACK_API_TOP_POSTS, headers={'Referer': SUBSTACK_REFERER})
|
||||||
|
if not stories: return False
|
||||||
|
stories = list(filter(None, [i if i.get("audience") == "everyone" else None for i in stories]))
|
||||||
|
stories = list(filter(None, [i if str(i.get('id')) == ref else None for i in stories]))
|
||||||
|
|
||||||
|
if len(stories) == 0:
|
||||||
|
return False
|
||||||
|
|
||||||
|
r = stories[0]
|
||||||
|
if not r:
|
||||||
|
return False
|
||||||
|
|
||||||
|
s = {}
|
||||||
|
pub = r.get('pub')
|
||||||
|
base_url = pub.get('base_url')
|
||||||
|
s['author'] = pub.get('author_name')
|
||||||
|
s['author_link'] = author_link(pub.get('author_id'), base_url)
|
||||||
|
|
||||||
|
s['date'] = unix(r.get('post_date'))
|
||||||
|
s['score'] = r.get('score')
|
||||||
|
s['title'] = r.get('title', '')
|
||||||
|
s['link'] = r.get('canonical_url', '')
|
||||||
|
s['url'] = r.get('canonical_url', '')
|
||||||
|
comments = json(lambda x: api_comments(x, base_url), r.get('id'), headers={'Referer': SUBSTACK_REFERER})
|
||||||
|
s['comments'] = [comment(i) for i in comments.get('comments')]
|
||||||
|
s['comments'] = list(filter(bool, s['comments']))
|
||||||
|
s['num_comments'] = r.get('comment_count', 0)
|
||||||
|
|
||||||
|
return s
|
||||||
|
|
||||||
|
top = Top()
|
||||||
|
|
||||||
|
# scratchpad so I can quickly develop the parser
|
||||||
|
if __name__ == '__main__':
|
||||||
|
top_posts = top.feed()
|
||||||
|
print(top.story(top_posts[0]))
|
||||||
|
|
||||||
|
webworm = Publication("https://www.webworm.co/")
|
||||||
|
posts = webworm.feed()
|
||||||
|
print(webworm.story(posts[0]))
|
40
apiserver/misc/api.py
Normal file
40
apiserver/misc/api.py
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
import logging
|
||||||
|
logging.basicConfig(
|
||||||
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||||
|
level=logging.DEBUG)
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
GOOGLEBOT_USER_AGENT = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
|
||||||
|
GOOGLEBOT_IP = '66.249.66.1'
|
||||||
|
TIMEOUT = 30
|
||||||
|
|
||||||
|
def xml(route, ref=None, headers=dict(), use_googlebot=True):
|
||||||
|
try:
|
||||||
|
if use_googlebot:
|
||||||
|
headers['User-Agent'] = GOOGLEBOT_USER_AGENT
|
||||||
|
headers['X-Forwarded-For'] = GOOGLEBOT_IP
|
||||||
|
r = requests.get(route(ref), headers=headers, timeout=TIMEOUT)
|
||||||
|
if r.status_code != 200:
|
||||||
|
raise Exception('Bad response code ' + str(r.status_code))
|
||||||
|
return r.text
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
raise
|
||||||
|
except BaseException as e:
|
||||||
|
logging.error('Problem hitting URL: {}'.format(str(e)))
|
||||||
|
return False
|
||||||
|
|
||||||
|
def json(route, ref=None, headers=dict(), use_googlebot=True):
|
||||||
|
try:
|
||||||
|
if use_googlebot:
|
||||||
|
headers['User-Agent'] = GOOGLEBOT_USER_AGENT
|
||||||
|
headers['X-Forwarded-For'] = GOOGLEBOT_IP
|
||||||
|
r = requests.get(route(ref), headers=headers, timeout=TIMEOUT)
|
||||||
|
if r.status_code != 200:
|
||||||
|
raise Exception('Bad response code ' + str(r.status_code))
|
||||||
|
return r.json()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
raise
|
||||||
|
except BaseException as e:
|
||||||
|
logging.error('Problem hitting URL: {}'.format(str(e)))
|
||||||
|
return False
|
14
apiserver/misc/icons.py
Normal file
14
apiserver/misc/icons.py
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
def get_icons(markup):
|
||||||
|
soup = BeautifulSoup(markup, features='html.parser')
|
||||||
|
icon32 = soup.find_all('link', rel="icon", href=True, sizes="32x32")
|
||||||
|
icon16 = soup.find_all('link', rel="icon", href=True, sizes="16x16")
|
||||||
|
favicon = soup.find_all('link', rel="shortcut icon", href=True)
|
||||||
|
others = soup.find_all('link', rel="icon", href=True)
|
||||||
|
icons = icon32 + icon16 + favicon + others
|
||||||
|
base_url = '/'.join(urlref.split('/')[:3])
|
||||||
|
icons = list(set([i.get('href') for i in icons]))
|
||||||
|
icons = [i if i.startswith('http') else base_url + i for i in icons]
|
||||||
|
|
||||||
|
return icons
|
84
apiserver/misc/metadata.py
Normal file
84
apiserver/misc/metadata.py
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
def get_icons(markup, url):
|
||||||
|
soup = BeautifulSoup(markup, features='html.parser')
|
||||||
|
icon32 = soup.find_all('link', rel="icon", href=True, sizes="32x32")
|
||||||
|
icon16 = soup.find_all('link', rel="icon", href=True, sizes="16x16")
|
||||||
|
favicon = soup.find_all('link', rel="shortcut icon", href=True)
|
||||||
|
others = soup.find_all('link', rel="icon", href=True)
|
||||||
|
icons = icon32 + icon16 + favicon + others
|
||||||
|
base_url = '/'.join(url.split('/')[:3])
|
||||||
|
icons = list(set([i.get('href') for i in icons]))
|
||||||
|
icons = [i if i.startswith('http') else base_url + i for i in icons]
|
||||||
|
|
||||||
|
return icons
|
||||||
|
|
||||||
|
def parse_extruct(s, data):
|
||||||
|
rdfa_keys = {
|
||||||
|
'title': [
|
||||||
|
'http://ogp.me/ns#title',
|
||||||
|
'https://ogp.me/ns#title',
|
||||||
|
],
|
||||||
|
'date': [
|
||||||
|
'http://ogp.me/ns/article#modified_time',
|
||||||
|
'https://ogp.me/ns/article#modified_time',
|
||||||
|
'http://ogp.me/ns/article#published_time',
|
||||||
|
'https://ogp.me/ns/article#published_time',
|
||||||
|
]
|
||||||
|
}
|
||||||
|
for rdfa in data['rdfa']:
|
||||||
|
for key, props in rdfa.items():
|
||||||
|
for attribute, properties in rdfa_keys.items():
|
||||||
|
for prop in properties:
|
||||||
|
if prop in props:
|
||||||
|
for values in props[prop]:
|
||||||
|
s[attribute] = values['@value']
|
||||||
|
|
||||||
|
for og in data['opengraph']:
|
||||||
|
titles = list(filter(None, [value if 'og:title' in key else None for key, value in og['properties']]))
|
||||||
|
modified = list(filter(None, [value if 'article:modified_time' in key else None for key, value in og['properties']]))
|
||||||
|
published = list(filter(None, [value if 'article:published_time' in key else None for key, value in og['properties']]))
|
||||||
|
if len(modified):
|
||||||
|
s['date'] = modified[0]
|
||||||
|
if len(published):
|
||||||
|
s['date'] = published[0]
|
||||||
|
if len(titles):
|
||||||
|
s['title'] = titles[0]
|
||||||
|
|
||||||
|
for md in data['microdata']:
|
||||||
|
if md['type'] in ['https://schema.org/NewsArticle', 'http://schema.org/NewsArticle']:
|
||||||
|
props = md['properties']
|
||||||
|
s['title'] = props['headline']
|
||||||
|
if props['dateModified']:
|
||||||
|
s['date'] = props['dateModified']
|
||||||
|
if props['datePublished']:
|
||||||
|
s['date'] = props['datePublished']
|
||||||
|
if 'author' in props and props['author']:
|
||||||
|
if 'properties' in props['author']:
|
||||||
|
s['author'] = props['author']['properties']['name']
|
||||||
|
elif isinstance(props['author'], list):
|
||||||
|
s['author'] = props['author'][0]['properties']['name']
|
||||||
|
|
||||||
|
for ld in data['json-ld']:
|
||||||
|
if '@type' in ld and ld['@type'] in ['Article', 'NewsArticle']:
|
||||||
|
s['title'] = ld['headline']
|
||||||
|
if ld['dateModified']:
|
||||||
|
s['date'] = ld['dateModified']
|
||||||
|
if ld['datePublished']:
|
||||||
|
s['date'] = ld['datePublished']
|
||||||
|
if 'author' in ld and ld['author']:
|
||||||
|
if 'name' in ld['author']:
|
||||||
|
s['author'] = ld['author']['name']
|
||||||
|
elif isinstance(ld['author'], list):
|
||||||
|
s['author'] = ld['author'][0]['name']
|
||||||
|
if '@graph' in ld:
|
||||||
|
for gld in ld['@graph']:
|
||||||
|
if '@type' in gld and gld['@type'] in ['Article', 'NewsArticle']:
|
||||||
|
s['title'] = gld['headline']
|
||||||
|
if gld['dateModified']:
|
||||||
|
s['date'] = gld['dateModified']
|
||||||
|
if gld['datePublished']:
|
||||||
|
s['date'] = gld['datePublished']
|
||||||
|
|
||||||
|
return s
|
98
apiserver/misc/news.py
Normal file
98
apiserver/misc/news.py
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
import logging
|
||||||
|
logging.basicConfig(
|
||||||
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||||
|
level=logging.DEBUG)
|
||||||
|
|
||||||
|
import re
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from scrapers import declutter
|
||||||
|
import extruct
|
||||||
|
|
||||||
|
import settings
|
||||||
|
from utils import clean
|
||||||
|
from misc.metadata import parse_extruct, get_icons
|
||||||
|
from misc.time import unix
|
||||||
|
from misc.api import xml
|
||||||
|
import misc.stuff as stuff
|
||||||
|
|
||||||
|
def comment(i):
|
||||||
|
if 'author' not in i:
|
||||||
|
return False
|
||||||
|
|
||||||
|
c = {}
|
||||||
|
c['author'] = i.get('author', '')
|
||||||
|
c['score'] = i.get('points', 0)
|
||||||
|
c['date'] = unix(i.get('date', 0))
|
||||||
|
c['text'] = clean(i.get('text', '') or '')
|
||||||
|
c['comments'] = [comment(j) for j in i['children']]
|
||||||
|
c['comments'] = list(filter(bool, c['comments']))
|
||||||
|
return c
|
||||||
|
|
||||||
|
def comment_count(i):
|
||||||
|
alive = 1 if i['author'] else 0
|
||||||
|
return sum([comment_count(c) for c in i['comments']]) + alive
|
||||||
|
|
||||||
|
class Base:
|
||||||
|
def __init__(config):
|
||||||
|
self.config = config
|
||||||
|
self.url = config.get('url')
|
||||||
|
self.tz = config.get('tz')
|
||||||
|
|
||||||
|
def get_id(self, link):
|
||||||
|
patterns = self.config.get('patterns')
|
||||||
|
if not patterns:
|
||||||
|
return link
|
||||||
|
patterns = [re.compile(p) for p in patterns]
|
||||||
|
patterns = list(filter(None, [p.match(link) for p in patterns]))
|
||||||
|
patterns = list(set([':'.join(p.groups()) for p in patterns]))
|
||||||
|
if not patterns:
|
||||||
|
return link
|
||||||
|
return patterns[0]
|
||||||
|
|
||||||
|
def feed(self, excludes=None):
|
||||||
|
return []
|
||||||
|
|
||||||
|
def story(self, ref, urlref):
|
||||||
|
if urlref is None:
|
||||||
|
return False
|
||||||
|
markup = xml(lambda x: urlref)
|
||||||
|
if not markup:
|
||||||
|
return False
|
||||||
|
|
||||||
|
s = {}
|
||||||
|
s['author_link'] = ''
|
||||||
|
s['score'] = 0
|
||||||
|
s['comments'] = []
|
||||||
|
s['num_comments'] = 0
|
||||||
|
s['link'] = urlref
|
||||||
|
s['url'] = urlref
|
||||||
|
s['date'] = 0
|
||||||
|
|
||||||
|
icons = get_icons(markup, url=urlref)
|
||||||
|
if icons:
|
||||||
|
s['icon'] = icons[0]
|
||||||
|
|
||||||
|
data = extruct.extract(markup)
|
||||||
|
s = parse_extruct(s, data)
|
||||||
|
if s['date']:
|
||||||
|
s['date'] = unix(s['date'], tz=self.tz)
|
||||||
|
|
||||||
|
if 'disqus' in markup:
|
||||||
|
try:
|
||||||
|
s['comments'] = declutter.get_comments(urlref)
|
||||||
|
s['comments'] = list(filter(bool, s['comments']))
|
||||||
|
s['num_comments'] = comment_count(s['comments'])
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
raise
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if urlref.startswith('https://www.stuff.co.nz'):
|
||||||
|
s['comments'] = stuff.get_comments(urlref)
|
||||||
|
s['comments'] = list(filter(bool, s['comments']))
|
||||||
|
s['num_comments'] = len(s['comments'])
|
||||||
|
|
||||||
|
if not s['date']:
|
||||||
|
return False
|
||||||
|
return s
|
64
apiserver/misc/stuff.py
Normal file
64
apiserver/misc/stuff.py
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
import re
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0,'.')
|
||||||
|
|
||||||
|
from misc.time import unix
|
||||||
|
from misc.api import xml
|
||||||
|
|
||||||
|
def _soup_get_text(soup):
|
||||||
|
if not soup: return None
|
||||||
|
if soup.text: return soup.text
|
||||||
|
|
||||||
|
s = soup.find(text=lambda tag: isinstance(tag, bs4.CData))
|
||||||
|
if s and s.string: return s.string.strip()
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _parse_comment(soup):
|
||||||
|
c = {
|
||||||
|
'author': '',
|
||||||
|
'authorLink': '',
|
||||||
|
'score': 0,
|
||||||
|
'date': 0,
|
||||||
|
'text': '',
|
||||||
|
'comments': [],
|
||||||
|
}
|
||||||
|
|
||||||
|
if soup.find('link'):
|
||||||
|
title = _soup_get_text(soup.find('link'))
|
||||||
|
if title and 'By:' in title:
|
||||||
|
c['author'] = title.strip('By:').strip()
|
||||||
|
if soup.find('dc:creator'):
|
||||||
|
c['author'] = _soup_get_text(soup.find('dc:creator'))
|
||||||
|
if soup.find('link'):
|
||||||
|
c['authorLink'] = _soup_get_text(soup.find('link'))
|
||||||
|
if soup.find('description'):
|
||||||
|
c['text'] = _soup_get_text(soup.find('description'))
|
||||||
|
if soup.find('pubdate'):
|
||||||
|
c['date'] = unix(soup.find('pubdate').text)
|
||||||
|
elif soup.find('pubDate'):
|
||||||
|
c['date'] = unix(soup.find('pubDate').text)
|
||||||
|
|
||||||
|
return c
|
||||||
|
|
||||||
|
def get_comments(url):
|
||||||
|
regex = r"https:\/\/www\.stuff\.co\.nz\/(.*\/\d+)/[^\/]+"
|
||||||
|
p = re.compile(regex).match(url)
|
||||||
|
path = p.groups()[0]
|
||||||
|
comment_url = f'https://comments.us1.gigya.com/comments/rss/6201101/Stuff/stuff/{path}'
|
||||||
|
markup = xml(lambda x: comment_url)
|
||||||
|
if not markup: return []
|
||||||
|
soup = BeautifulSoup(markup, features='html.parser')
|
||||||
|
comments = soup.find_all('item')
|
||||||
|
if not comments: return []
|
||||||
|
comments = [_parse_comment(c) for c in comments]
|
||||||
|
return comments
|
||||||
|
|
||||||
|
|
||||||
|
# scratchpad so I can quickly develop the parser
|
||||||
|
if __name__ == '__main__':
|
||||||
|
comments = get_comments('https://www.stuff.co.nz/life-style/homed/houses/123418468/dear-jacinda-we-need-to-talk-about-housing')
|
||||||
|
print(len(comments))
|
||||||
|
print(comments[:5])
|
18
apiserver/misc/time.py
Normal file
18
apiserver/misc/time.py
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
import pytz
|
||||||
|
import dateutil.parser
|
||||||
|
|
||||||
|
|
||||||
|
TZINFOS = {
|
||||||
|
'NZDT': pytz.timezone('Pacific/Auckland'),
|
||||||
|
'NZST': pytz.timezone('Pacific/Auckland')
|
||||||
|
}
|
||||||
|
|
||||||
|
def unix(date_str, tz=None, tzinfos=TZINFOS):
|
||||||
|
try:
|
||||||
|
dt = dateutil.parser.parse(date_str, tzinfos=tzinfos)
|
||||||
|
if tz:
|
||||||
|
dt = pytz.timezone(tz).localize(dt)
|
||||||
|
return int(dt.timestamp())
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return 0
|
@@ -4,6 +4,7 @@ certifi==2020.6.20
|
|||||||
chardet==3.0.4
|
chardet==3.0.4
|
||||||
click==7.1.2
|
click==7.1.2
|
||||||
commonmark==0.9.1
|
commonmark==0.9.1
|
||||||
|
extruct==0.10.0
|
||||||
Flask==1.1.2
|
Flask==1.1.2
|
||||||
Flask-Cors==3.0.8
|
Flask-Cors==3.0.8
|
||||||
gevent==20.6.2
|
gevent==20.6.2
|
||||||
@@ -11,11 +12,13 @@ greenlet==0.4.16
|
|||||||
idna==2.10
|
idna==2.10
|
||||||
itsdangerous==1.1.0
|
itsdangerous==1.1.0
|
||||||
Jinja2==2.11.2
|
Jinja2==2.11.2
|
||||||
|
lxml==4.6.1
|
||||||
MarkupSafe==1.1.1
|
MarkupSafe==1.1.1
|
||||||
packaging==20.4
|
packaging==20.4
|
||||||
praw==6.4.0
|
praw==6.4.0
|
||||||
prawcore==1.4.0
|
prawcore==1.4.0
|
||||||
pyparsing==2.4.7
|
pyparsing==2.4.7
|
||||||
|
pytz==2020.4
|
||||||
requests==2.24.0
|
requests==2.24.0
|
||||||
six==1.15.0
|
six==1.15.0
|
||||||
soupsieve==2.0.1
|
soupsieve==2.0.1
|
||||||
@@ -27,3 +30,4 @@ websocket-client==0.57.0
|
|||||||
Werkzeug==1.0.1
|
Werkzeug==1.0.1
|
||||||
zope.event==4.4
|
zope.event==4.4
|
||||||
zope.interface==5.1.0
|
zope.interface==5.1.0
|
||||||
|
python-dateutil==2.8.1
|
||||||
|
41
apiserver/scrapers/declutter.py
Normal file
41
apiserver/scrapers/declutter.py
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
import logging
|
||||||
|
logging.basicConfig(
|
||||||
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||||
|
level=logging.DEBUG)
|
||||||
|
import requests
|
||||||
|
|
||||||
|
DECLUTTER_API = 'https://declutter.1j.nz/headless/details'
|
||||||
|
DECLUTTER_COMMENT_API = 'https://declutter.1j.nz/headless/comments'
|
||||||
|
TIMEOUT = 90
|
||||||
|
|
||||||
|
|
||||||
|
def get_html(url):
|
||||||
|
logging.info(f"Declutter Scraper: {url}")
|
||||||
|
details = get_details(url)
|
||||||
|
if not details:
|
||||||
|
return ''
|
||||||
|
return details['content']
|
||||||
|
|
||||||
|
def get_details(url):
|
||||||
|
try:
|
||||||
|
r = requests.post(DECLUTTER_API, data=dict(url=url), timeout=TIMEOUT)
|
||||||
|
if r.status_code != 200:
|
||||||
|
raise Exception('Bad response code ' + str(r.status_code))
|
||||||
|
return r.json()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
raise
|
||||||
|
except BaseException as e:
|
||||||
|
logging.error('Problem decluttering article: {}'.format(str(e)))
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_comments(url):
|
||||||
|
try:
|
||||||
|
r = requests.post(DECLUTTER_COMMENT_API, data=dict(url=url), timeout=TIMEOUT)
|
||||||
|
if r.status_code != 200:
|
||||||
|
raise Exception('Bad response code ' + str(r.status_code))
|
||||||
|
return r.json()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
raise
|
||||||
|
except BaseException as e:
|
||||||
|
logging.error('Problem getting comments for article: {}'.format(str(e)))
|
||||||
|
return None
|
41
apiserver/scrapers/headless.py
Normal file
41
apiserver/scrapers/headless.py
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
import logging
|
||||||
|
logging.basicConfig(
|
||||||
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||||
|
level=logging.DEBUG)
|
||||||
|
import requests
|
||||||
|
from settings import HEADLESS_READER_PORT
|
||||||
|
|
||||||
|
READ_API = 'http://127.0.0.1:{}/headless/details'.format(HEADLESS_READER_PORT or 33843)
|
||||||
|
READ_COMMENT__API = 'http://127.0.0.1:{}/headless/comments'.format(HEADLESS_READER_PORT or 33843)
|
||||||
|
TIMEOUT = 90
|
||||||
|
|
||||||
|
def get_html(url):
|
||||||
|
logging.info(f"Headless Scraper: {url}")
|
||||||
|
details = get_details(url)
|
||||||
|
if not details:
|
||||||
|
return ''
|
||||||
|
return details['content']
|
||||||
|
|
||||||
|
def get_details(url):
|
||||||
|
try:
|
||||||
|
r = requests.post(READ_API, data=dict(url=url), timeout=TIMEOUT)
|
||||||
|
if r.status_code != 200:
|
||||||
|
raise Exception('Bad response code ' + str(r.status_code))
|
||||||
|
return r.json()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
raise
|
||||||
|
except BaseException as e:
|
||||||
|
logging.error('Problem scraping article: {}'.format(str(e)))
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_comments(url):
|
||||||
|
try:
|
||||||
|
r = requests.post(READ_COMMENT_API, data=dict(url=url), timeout=TIMEOUT)
|
||||||
|
if r.status_code != 200:
|
||||||
|
raise Exception('Bad response code ' + str(r.status_code))
|
||||||
|
return r.json()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
raise
|
||||||
|
except BaseException as e:
|
||||||
|
logging.error('Problem getting comments for article: {}'.format(str(e)))
|
||||||
|
return None
|
37
apiserver/scrapers/outline.py
Normal file
37
apiserver/scrapers/outline.py
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
import logging
|
||||||
|
logging.basicConfig(
|
||||||
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||||
|
level=logging.DEBUG)
|
||||||
|
import requests
|
||||||
|
|
||||||
|
OUTLINE_REFERER = 'https://outline.com/'
|
||||||
|
OUTLINE_API = 'https://api.outline.com/v3/parse_article'
|
||||||
|
TIMEOUT = 20
|
||||||
|
|
||||||
|
def get_html(url):
|
||||||
|
details = get_details(url)
|
||||||
|
if not details:
|
||||||
|
return ''
|
||||||
|
return details['html']
|
||||||
|
|
||||||
|
def get_details(url):
|
||||||
|
try:
|
||||||
|
logging.info(f"Outline Scraper: {url}")
|
||||||
|
params = {'source_url': url}
|
||||||
|
headers = {'Referer': OUTLINE_REFERER}
|
||||||
|
r = requests.get(OUTLINE_API, params=params, headers=headers, timeout=TIMEOUT)
|
||||||
|
if r.status_code == 429:
|
||||||
|
logging.info('Rate limited by outline, sleeping 30s and skipping...')
|
||||||
|
time.sleep(30)
|
||||||
|
return None
|
||||||
|
if r.status_code != 200:
|
||||||
|
raise Exception('Bad response code ' + str(r.status_code))
|
||||||
|
data = r.json()['data']
|
||||||
|
if 'URL is not supported by Outline' in data['html']:
|
||||||
|
raise Exception('URL not supported by Outline')
|
||||||
|
return data
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
raise
|
||||||
|
except BaseException as e:
|
||||||
|
logging.error('Problem outlining article: {}'.format(str(e)))
|
||||||
|
return None
|
28
apiserver/scrapers/simple.py
Normal file
28
apiserver/scrapers/simple.py
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
import logging
|
||||||
|
logging.basicConfig(
|
||||||
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||||
|
level=logging.DEBUG)
|
||||||
|
import requests
|
||||||
|
from settings import SIMPLE_READER_PORT
|
||||||
|
|
||||||
|
READ_API = 'http://127.0.0.1:{}/simple/details'.format(SIMPLE_READER_PORT or 33843)
|
||||||
|
TIMEOUT = 20
|
||||||
|
|
||||||
|
def get_html(url):
|
||||||
|
logging.info(f"Simple Scraper: {url}")
|
||||||
|
details = get_details(url)
|
||||||
|
if not details:
|
||||||
|
return ''
|
||||||
|
return details['content']
|
||||||
|
|
||||||
|
def get_details(url):
|
||||||
|
try:
|
||||||
|
r = requests.post(READ_API, data=dict(url=url), timeout=TIMEOUT)
|
||||||
|
if r.status_code != 200:
|
||||||
|
raise Exception('Bad response code ' + str(r.status_code))
|
||||||
|
return r.json()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
raise
|
||||||
|
except BaseException as e:
|
||||||
|
logging.error('Problem getting article: {}'.format(str(e)))
|
||||||
|
return None
|
@@ -35,14 +35,11 @@ def update_rankings():
|
|||||||
|
|
||||||
def update_attributes():
|
def update_attributes():
|
||||||
try:
|
try:
|
||||||
json = ['title', 'url', 'author', 'link', 'id']
|
json = ['title', 'url', 'author', 'link', 'id', 'source']
|
||||||
r = requests.post(MEILI_URL + 'indexes/qotnews/settings/searchable-attributes', json=json, timeout=2)
|
r = requests.post(MEILI_URL + 'indexes/qotnews/settings/searchable-attributes', json=json, timeout=2)
|
||||||
if r.status_code != 202:
|
if r.status_code != 202:
|
||||||
raise Exception('Bad response code ' + str(r.status_code))
|
raise Exception('Bad response code ' + str(r.status_code))
|
||||||
return r.json()
|
requests.delete(MEILI_URL + 'indexes/qotnews/settings/displayed-attributes', timeout=2)
|
||||||
r = requests.delete(MEILI_URL + 'indexes/qotnews/settings/displayed-attributes', timeout=2)
|
|
||||||
if r.status_code != 202:
|
|
||||||
raise Exception('Bad response code ' + str(r.status_code))
|
|
||||||
return r.json()
|
return r.json()
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
raise
|
raise
|
||||||
|
@@ -15,6 +15,7 @@ import traceback
|
|||||||
import time
|
import time
|
||||||
from urllib.parse import urlparse, parse_qs
|
from urllib.parse import urlparse, parse_qs
|
||||||
|
|
||||||
|
import settings
|
||||||
import database
|
import database
|
||||||
import search
|
import search
|
||||||
import feed
|
import feed
|
||||||
@@ -27,9 +28,6 @@ from flask_cors import CORS
|
|||||||
database.init()
|
database.init()
|
||||||
search.init()
|
search.init()
|
||||||
|
|
||||||
FEED_LENGTH = 75
|
|
||||||
news_index = 0
|
|
||||||
|
|
||||||
def new_id():
|
def new_id():
|
||||||
nid = gen_rand_id()
|
nid = gen_rand_id()
|
||||||
while database.get_story(nid):
|
while database.get_story(nid):
|
||||||
@@ -42,9 +40,8 @@ cors = CORS(flask_app)
|
|||||||
|
|
||||||
@flask_app.route('/api')
|
@flask_app.route('/api')
|
||||||
def api():
|
def api():
|
||||||
stories = database.get_stories(FEED_LENGTH)
|
stories = database.get_stories(settings.MAX_STORY_AGE)
|
||||||
# hacky nested json
|
res = Response(json.dumps({"stories": stories}))
|
||||||
res = Response('{"stories":[' + ','.join(stories) + ']}')
|
|
||||||
res.headers['content-type'] = 'application/json'
|
res.headers['content-type'] = 'application/json'
|
||||||
return res
|
return res
|
||||||
|
|
||||||
@@ -73,7 +70,7 @@ def submit():
|
|||||||
elif 'reddit.com' in parse.hostname and 'comments' in url:
|
elif 'reddit.com' in parse.hostname and 'comments' in url:
|
||||||
source = 'reddit'
|
source = 'reddit'
|
||||||
ref = parse.path.split('/')[4]
|
ref = parse.path.split('/')[4]
|
||||||
elif 'news.t0.vc' in parse.hostname:
|
elif settings.HOSTNAME in parse.hostname:
|
||||||
raise Exception('Invalid article')
|
raise Exception('Invalid article')
|
||||||
else:
|
else:
|
||||||
source = 'manual'
|
source = 'manual'
|
||||||
@@ -102,8 +99,11 @@ def submit():
|
|||||||
def story(sid):
|
def story(sid):
|
||||||
story = database.get_story(sid)
|
story = database.get_story(sid)
|
||||||
if story:
|
if story:
|
||||||
# hacky nested json
|
related = []
|
||||||
res = Response('{"story":' + story.full_json + '}')
|
if story.meta['url']:
|
||||||
|
related = database.get_stories_by_url(story.meta['url'])
|
||||||
|
related = [r.meta for r in related]
|
||||||
|
res = Response(json.dumps({"story": story.data, "related": related}))
|
||||||
res.headers['content-type'] = 'application/json'
|
res.headers['content-type'] = 'application/json'
|
||||||
return res
|
return res
|
||||||
else:
|
else:
|
||||||
@@ -114,7 +114,7 @@ def story(sid):
|
|||||||
def index():
|
def index():
|
||||||
return render_template('index.html',
|
return render_template('index.html',
|
||||||
title='Feed',
|
title='Feed',
|
||||||
url='news.t0.vc',
|
url=settings.HOSTNAME,
|
||||||
description='Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode')
|
description='Reddit, Hacker News, and Tildes combined, then pre-rendered in reader mode')
|
||||||
|
|
||||||
@flask_app.route('/<sid>', strict_slashes=False)
|
@flask_app.route('/<sid>', strict_slashes=False)
|
||||||
@@ -127,7 +127,7 @@ def static_story(sid):
|
|||||||
|
|
||||||
story = database.get_story(sid)
|
story = database.get_story(sid)
|
||||||
if not story: return abort(404)
|
if not story: return abort(404)
|
||||||
story = json.loads(story.full_json)
|
story = story.data
|
||||||
|
|
||||||
score = story['score']
|
score = story['score']
|
||||||
num_comments = story['num_comments']
|
num_comments = story['num_comments']
|
||||||
@@ -144,54 +144,55 @@ def static_story(sid):
|
|||||||
url=url,
|
url=url,
|
||||||
description=description)
|
description=description)
|
||||||
|
|
||||||
http_server = WSGIServer(('', 33842), flask_app)
|
http_server = WSGIServer(('', settings.API_PORT or 33842), flask_app)
|
||||||
|
|
||||||
|
def _add_new_refs():
|
||||||
|
for ref, source, urlref in feed.get_list():
|
||||||
|
if database.get_story_by_ref(ref):
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
nid = new_id()
|
||||||
|
database.put_ref(ref, nid, source, urlref)
|
||||||
|
logging.info('Added ref ' + ref)
|
||||||
|
except database.IntegrityError:
|
||||||
|
logging.info('Unable to add ref ' + ref)
|
||||||
|
continue
|
||||||
|
|
||||||
|
def _update_current_story(item):
|
||||||
|
try:
|
||||||
|
story = database.get_story(item['sid']).data
|
||||||
|
except AttributeError:
|
||||||
|
story = dict(id=item['sid'], ref=item['ref'], source=item['source'])
|
||||||
|
|
||||||
|
logging.info('Updating story: {}'.format(str(story['ref'])))
|
||||||
|
|
||||||
|
valid = feed.update_story(story, urlref=item['urlref'])
|
||||||
|
if valid:
|
||||||
|
try:
|
||||||
|
database.put_story(story)
|
||||||
|
search.put_story(story)
|
||||||
|
except database.IntegrityError:
|
||||||
|
logging.info('Unable to add story with ref ' + ref)
|
||||||
|
else:
|
||||||
|
database.del_ref(item['ref'])
|
||||||
|
logging.info('Removed ref {}'.format(item['ref']))
|
||||||
|
|
||||||
def feed_thread():
|
def feed_thread():
|
||||||
global news_index
|
ref_list = []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
# onboard new stories
|
# onboard new stories
|
||||||
if news_index == 0:
|
if not len(ref_list):
|
||||||
for ref, source in feed.list():
|
_add_new_refs()
|
||||||
if database.get_story_by_ref(ref):
|
ref_list = database.get_reflist()
|
||||||
continue
|
|
||||||
try:
|
|
||||||
nid = new_id()
|
|
||||||
database.put_ref(ref, nid, source)
|
|
||||||
logging.info('Added ref ' + ref)
|
|
||||||
except database.IntegrityError:
|
|
||||||
continue
|
|
||||||
|
|
||||||
ref_list = database.get_reflist(FEED_LENGTH)
|
|
||||||
|
|
||||||
# update current stories
|
# update current stories
|
||||||
if news_index < len(ref_list):
|
if len(ref_list):
|
||||||
item = ref_list[news_index]
|
item = ref_list.pop(0)
|
||||||
|
_update_current_story(item)
|
||||||
try:
|
|
||||||
story_json = database.get_story(item['sid']).full_json
|
|
||||||
story = json.loads(story_json)
|
|
||||||
except AttributeError:
|
|
||||||
story = dict(id=item['sid'], ref=item['ref'], source=item['source'])
|
|
||||||
|
|
||||||
logging.info('Updating story: ' + str(story['ref']) + ', index: ' + str(news_index))
|
|
||||||
|
|
||||||
valid = feed.update_story(story)
|
|
||||||
if valid:
|
|
||||||
database.put_story(story)
|
|
||||||
search.put_story(story)
|
|
||||||
else:
|
|
||||||
database.del_ref(item['ref'])
|
|
||||||
logging.info('Removed ref {}'.format(item['ref']))
|
|
||||||
else:
|
|
||||||
logging.info('Skipping index: ' + str(news_index))
|
|
||||||
|
|
||||||
gevent.sleep(6)
|
gevent.sleep(6)
|
||||||
|
|
||||||
news_index += 1
|
|
||||||
if news_index == FEED_LENGTH: news_index = 0
|
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
logging.info('Ending feed thread...')
|
logging.info('Ending feed thread...')
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
|
@@ -1,12 +1,60 @@
|
|||||||
# QotNews settings
|
# QotNews settings
|
||||||
# edit this file and save it as settings.py
|
# edit this file and save it as settings.py
|
||||||
|
|
||||||
|
HOSTNAME = 'news.t0.vc'
|
||||||
|
MAX_STORY_AGE = 3*24*60*60
|
||||||
|
|
||||||
|
SCRAPERS = ['headless', 'outline', 'declutter', 'simple']
|
||||||
|
API_PORT = 33842
|
||||||
|
SIMPLE_READER_PORT = 33843
|
||||||
|
HEADLESS_READER_PORT = 33843
|
||||||
|
|
||||||
# Feed Lengths
|
# Feed Lengths
|
||||||
# Number of top items from each site to pull
|
# Number of top items from each site to pull
|
||||||
# set to 0 to disable that site
|
# set to 0 to disable that site
|
||||||
NUM_HACKERNEWS = 15
|
NUM_HACKERNEWS = 15
|
||||||
NUM_REDDIT = 10
|
NUM_REDDIT = 10
|
||||||
NUM_TILDES = 5
|
NUM_TILDES = 5
|
||||||
|
NUM_SUBSTACK = 10
|
||||||
|
|
||||||
|
SITEMAP = {}
|
||||||
|
# SITEMAP['nzherald'] = {
|
||||||
|
# 'url': "https://www.nzherald.co.nz/arcio/news-sitemap/",
|
||||||
|
# 'count': 20,
|
||||||
|
# 'patterns': [
|
||||||
|
# r'^https:\/\/www\.(nzherald\.co\.nz)\/.*\/([^/]+)\/?$',
|
||||||
|
# ],
|
||||||
|
# 'excludes': [
|
||||||
|
# 'driven.co.nz',
|
||||||
|
# 'oneroof.co.nz',
|
||||||
|
# 'nzherald.co.nz/sponsored-stories',
|
||||||
|
# 'nzherald.co.nz/entertainment/',
|
||||||
|
# 'nzherald.co.nz/lifestyle/',
|
||||||
|
# 'nzherald.co.nz/travel/',
|
||||||
|
# 'nzherald.co.nz/sport/',
|
||||||
|
# 'nzherald.co.nz/promotions/',
|
||||||
|
# 'nzherald.co.nzhttp',
|
||||||
|
# 'herald-afternoon-quiz',
|
||||||
|
# 'herald-morning-quiz'
|
||||||
|
# ],
|
||||||
|
# }
|
||||||
|
|
||||||
|
SUBSTACK = {}
|
||||||
|
# SUBSTACK['webworm'] = { 'url': "https://www.webworm.co", 'count': 10},
|
||||||
|
# SUBSTACK['the bulletin'] = { 'url': "https://thespinoff.substack.com", 'count': 10},
|
||||||
|
|
||||||
|
CATEGORY = {}
|
||||||
|
# CATEGORY['radionz'] = {
|
||||||
|
# 'url': "https://www.rnz.co.nz/news/",
|
||||||
|
# 'count': 20,
|
||||||
|
# 'patterns': [
|
||||||
|
# r'https:\/\/www\.(rnz\.co\.nz)\/news\/[^\/]+\/(\d+)\/[^\/]+\/?'
|
||||||
|
# ],
|
||||||
|
# 'excludes': [
|
||||||
|
# 'rnz.co.nz/news/sport',
|
||||||
|
# 'rnz.co.nz/weather',
|
||||||
|
# ],
|
||||||
|
# }
|
||||||
|
|
||||||
# Reddit account info
|
# Reddit account info
|
||||||
# leave blank if not using Reddit
|
# leave blank if not using Reddit
|
||||||
@@ -14,6 +62,10 @@ REDDIT_CLIENT_ID = ''
|
|||||||
REDDIT_CLIENT_SECRET = ''
|
REDDIT_CLIENT_SECRET = ''
|
||||||
REDDIT_USER_AGENT = ''
|
REDDIT_USER_AGENT = ''
|
||||||
|
|
||||||
|
# Minimum points or number of comments before including a thread:
|
||||||
|
REDDIT_COMMENT_THRESHOLD = 10
|
||||||
|
REDDIT_SCORE_THRESHOLD = 25
|
||||||
|
|
||||||
SUBREDDITS = [
|
SUBREDDITS = [
|
||||||
'Economics',
|
'Economics',
|
||||||
'AcademicPhilosophy',
|
'AcademicPhilosophy',
|
||||||
|
48
apiserver/update-story.py
Normal file
48
apiserver/update-story.py
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
import logging
|
||||||
|
logging.basicConfig(
|
||||||
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||||
|
level=logging.INFO)
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import requests
|
||||||
|
|
||||||
|
import database
|
||||||
|
import feed
|
||||||
|
import search
|
||||||
|
|
||||||
|
database.init()
|
||||||
|
search.init()
|
||||||
|
|
||||||
|
def _update_current_story(story, item):
|
||||||
|
logging.info('Updating story: {}'.format(str(story['ref'])))
|
||||||
|
|
||||||
|
if story.get('url', ''):
|
||||||
|
story['text'] = ''
|
||||||
|
|
||||||
|
valid = feed.update_story(story, urlref=item['urlref'])
|
||||||
|
if valid:
|
||||||
|
database.put_story(story)
|
||||||
|
search.put_story(story)
|
||||||
|
else:
|
||||||
|
database.del_ref(item['ref'])
|
||||||
|
logging.info('Removed ref {}'.format(item['ref']))
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
if len(sys.argv) == 2:
|
||||||
|
sid = sys.argv[1]
|
||||||
|
else:
|
||||||
|
print('Usage: python delete-story.py [story id]')
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
item = database.get_ref_by_sid(sid)
|
||||||
|
|
||||||
|
if item:
|
||||||
|
story = database.get_story(item['sid']).data
|
||||||
|
if story:
|
||||||
|
print('Updating story:')
|
||||||
|
_update_current_story(story, item)
|
||||||
|
else:
|
||||||
|
print('Story not found. Exiting.')
|
||||||
|
else:
|
||||||
|
print('Story not found. Exiting.')
|
@@ -9,7 +9,7 @@ import string
|
|||||||
from bleach.sanitizer import Cleaner
|
from bleach.sanitizer import Cleaner
|
||||||
|
|
||||||
def gen_rand_id():
|
def gen_rand_id():
|
||||||
return ''.join(random.choice(string.ascii_uppercase) for _ in range(4))
|
return ''.join(random.choice(string.ascii_uppercase) for _ in range(5))
|
||||||
|
|
||||||
def render_md(md):
|
def render_md(md):
|
||||||
if md:
|
if md:
|
||||||
|
1
readerserver
Submodule
1
readerserver
Submodule
Submodule readerserver added at d3d5fc74ac
92
readerserver/.gitignore
vendored
92
readerserver/.gitignore
vendored
@@ -1,92 +0,0 @@
|
|||||||
# Logs
|
|
||||||
logs
|
|
||||||
*.log
|
|
||||||
npm-debug.log*
|
|
||||||
yarn-debug.log*
|
|
||||||
yarn-error.log*
|
|
||||||
lerna-debug.log*
|
|
||||||
|
|
||||||
# Diagnostic reports (https://nodejs.org/api/report.html)
|
|
||||||
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
|
|
||||||
|
|
||||||
# Runtime data
|
|
||||||
pids
|
|
||||||
*.pid
|
|
||||||
*.seed
|
|
||||||
*.pid.lock
|
|
||||||
|
|
||||||
# Directory for instrumented libs generated by jscoverage/JSCover
|
|
||||||
lib-cov
|
|
||||||
|
|
||||||
# Coverage directory used by tools like istanbul
|
|
||||||
coverage
|
|
||||||
*.lcov
|
|
||||||
|
|
||||||
# nyc test coverage
|
|
||||||
.nyc_output
|
|
||||||
|
|
||||||
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
|
|
||||||
.grunt
|
|
||||||
|
|
||||||
# Bower dependency directory (https://bower.io/)
|
|
||||||
bower_components
|
|
||||||
|
|
||||||
# node-waf configuration
|
|
||||||
.lock-wscript
|
|
||||||
|
|
||||||
# Compiled binary addons (https://nodejs.org/api/addons.html)
|
|
||||||
build/Release
|
|
||||||
|
|
||||||
# Dependency directories
|
|
||||||
node_modules/
|
|
||||||
jspm_packages/
|
|
||||||
|
|
||||||
# TypeScript v1 declaration files
|
|
||||||
typings/
|
|
||||||
|
|
||||||
# TypeScript cache
|
|
||||||
*.tsbuildinfo
|
|
||||||
|
|
||||||
# Optional npm cache directory
|
|
||||||
.npm
|
|
||||||
|
|
||||||
# Optional eslint cache
|
|
||||||
.eslintcache
|
|
||||||
|
|
||||||
# Optional REPL history
|
|
||||||
.node_repl_history
|
|
||||||
|
|
||||||
# Output of 'npm pack'
|
|
||||||
*.tgz
|
|
||||||
|
|
||||||
# Yarn Integrity file
|
|
||||||
.yarn-integrity
|
|
||||||
|
|
||||||
# dotenv environment variables file
|
|
||||||
.env
|
|
||||||
.env.test
|
|
||||||
|
|
||||||
# parcel-bundler cache (https://parceljs.org/)
|
|
||||||
.cache
|
|
||||||
|
|
||||||
# next.js build output
|
|
||||||
.next
|
|
||||||
|
|
||||||
# nuxt.js build output
|
|
||||||
.nuxt
|
|
||||||
|
|
||||||
# vuepress build output
|
|
||||||
.vuepress/dist
|
|
||||||
|
|
||||||
# Serverless directories
|
|
||||||
.serverless/
|
|
||||||
|
|
||||||
# FuseBox cache
|
|
||||||
.fusebox/
|
|
||||||
|
|
||||||
# DynamoDB Local files
|
|
||||||
.dynamodb/
|
|
||||||
|
|
||||||
# Editor
|
|
||||||
*.swp
|
|
||||||
*.swo
|
|
@@ -1,53 +0,0 @@
|
|||||||
const express = require('express');
|
|
||||||
const app = express();
|
|
||||||
const port = 33843;
|
|
||||||
|
|
||||||
const request = require('request');
|
|
||||||
const JSDOM = require('jsdom').JSDOM;
|
|
||||||
const { Readability } = require('readability');
|
|
||||||
|
|
||||||
app.use(express.urlencoded({ extended: true }));
|
|
||||||
|
|
||||||
app.get('/', (req, res) => {
|
|
||||||
res.send('<form method="POST" accept-charset="UTF-8"><input name="url"><button type="submit">SUBMIT</button></form>');
|
|
||||||
});
|
|
||||||
|
|
||||||
const requestCallback = (url, res) => (error, response, body) => {
|
|
||||||
if (!error && response.statusCode == 200) {
|
|
||||||
console.log('Response OK.');
|
|
||||||
|
|
||||||
const doc = new JSDOM(body, {url: url});
|
|
||||||
const reader = new Readability(doc.window.document);
|
|
||||||
const article = reader.parse();
|
|
||||||
|
|
||||||
if (article && article.content) {
|
|
||||||
res.send(article.content);
|
|
||||||
} else {
|
|
||||||
res.sendStatus(404);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
console.log('Response error:', error ? error.toString() : response.statusCode);
|
|
||||||
res.sendStatus(response ? response.statusCode : 404);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
app.post('/', (req, res) => {
|
|
||||||
const url = req.body.url;
|
|
||||||
const requestOptions = {
|
|
||||||
url: url,
|
|
||||||
//headers: {'User-Agent': 'Googlebot/2.1 (+http://www.google.com/bot.html)'},
|
|
||||||
//headers: {'User-Agent': 'Twitterbot/1.0'},
|
|
||||||
headers: {
|
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0',
|
|
||||||
'X-Forwarded-For': '66.249.66.1',
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
console.log('Parse request for:', url);
|
|
||||||
|
|
||||||
request(requestOptions, requestCallback(url, res));
|
|
||||||
});
|
|
||||||
|
|
||||||
app.listen(port, () => {
|
|
||||||
console.log(`Example app listening on port ${port}!`);
|
|
||||||
});
|
|
@@ -1,13 +0,0 @@
|
|||||||
{
|
|
||||||
"name": "readerserver",
|
|
||||||
"version": "1.0.0",
|
|
||||||
"main": "main.js",
|
|
||||||
"license": "MIT",
|
|
||||||
"dependencies": {
|
|
||||||
"dompurify": "^1.0.11",
|
|
||||||
"express": "^4.17.1",
|
|
||||||
"jsdom": "^15.1.1",
|
|
||||||
"readability": "https://github.com/mozilla/readability",
|
|
||||||
"request": "^2.88.0"
|
|
||||||
}
|
|
||||||
}
|
|
File diff suppressed because it is too large
Load Diff
@@ -5,13 +5,14 @@ import './Style-light.css';
|
|||||||
import './Style-dark.css';
|
import './Style-dark.css';
|
||||||
import './fonts/Fonts.css';
|
import './fonts/Fonts.css';
|
||||||
import { ForwardDot } from './utils.js';
|
import { ForwardDot } from './utils.js';
|
||||||
import Feed from './Feed.js';
|
|
||||||
import Article from './Article.js';
|
|
||||||
import Comments from './Comments.js';
|
|
||||||
import Search from './Search.js';
|
import Search from './Search.js';
|
||||||
import Submit from './Submit.js';
|
import Submit from './Submit.js';
|
||||||
import Results from './Results.js';
|
|
||||||
import ScrollToTop from './ScrollToTop.js';
|
import ScrollToTop from './ScrollToTop.js';
|
||||||
|
import Feed from './pages/Feed.js';
|
||||||
|
import Article from './pages/Article.js';
|
||||||
|
import Comments from './pages/Comments.js';
|
||||||
|
import Results from './pages/Results.js';
|
||||||
|
|
||||||
|
|
||||||
class App extends React.Component {
|
class App extends React.Component {
|
||||||
constructor(props) {
|
constructor(props) {
|
||||||
@@ -70,7 +71,7 @@ class App extends React.Component {
|
|||||||
<Route path='/search' component={Results} />
|
<Route path='/search' component={Results} />
|
||||||
<Route path='/:id' exact render={(props) => <Article {...props} cache={this.cache} />} />
|
<Route path='/:id' exact render={(props) => <Article {...props} cache={this.cache} />} />
|
||||||
</Switch>
|
</Switch>
|
||||||
<Route path='/:id/c' exact render={(props) => <Comments {...props} cache={this.cache} />} />
|
<Route path='/:id/c' exact render={(props) => <Comments {...props} cache={this.cache} key={props.match.params.id} />} />
|
||||||
|
|
||||||
<ForwardDot />
|
<ForwardDot />
|
||||||
|
|
||||||
|
@@ -1,225 +1,231 @@
|
|||||||
body {
|
body {
|
||||||
text-rendering: optimizeLegibility;
|
text-rendering: optimizeLegibility;
|
||||||
font: 1rem/1.3 sans-serif;
|
font: 1rem/1.3 sans-serif;
|
||||||
color: #000000;
|
color: #000000;
|
||||||
margin-bottom: 100vh;
|
margin-bottom: 100vh;
|
||||||
word-break: break-word;
|
word-break: break-word;
|
||||||
font-kerning: normal;
|
font-kerning: normal;
|
||||||
}
|
}
|
||||||
|
|
||||||
a {
|
a {
|
||||||
color: #000000;
|
color: #000000;
|
||||||
text-decoration: none;
|
text-decoration: none;
|
||||||
outline: none;
|
outline: none;
|
||||||
}
|
}
|
||||||
|
|
||||||
input {
|
input {
|
||||||
font-size: 1.05rem;
|
font-size: 1.05rem;
|
||||||
background-color: transparent;
|
background-color: transparent;
|
||||||
border: 1px solid #828282;
|
border: 1px solid #828282;
|
||||||
margin: 0.25rem;
|
margin: 0.25rem;
|
||||||
padding: 6px;
|
padding: 6px;
|
||||||
border-radius: 4px;
|
border-radius: 4px;
|
||||||
}
|
}
|
||||||
|
|
||||||
pre {
|
pre {
|
||||||
overflow: auto;
|
overflow: auto;
|
||||||
}
|
}
|
||||||
|
|
||||||
.container {
|
.container {
|
||||||
margin: 1rem auto;
|
margin: 1rem auto;
|
||||||
max-width: 64rem;
|
max-width: 64rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
.menu {
|
.menu {
|
||||||
font-size: 1.1rem;
|
font-size: 1.1rem;
|
||||||
padding: 0 1rem;
|
padding: 0 1rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
.slogan {
|
.slogan {
|
||||||
color: #828282;
|
color: #828282;
|
||||||
}
|
}
|
||||||
|
|
||||||
.theme {
|
.theme {
|
||||||
float: right;
|
float: right;
|
||||||
}
|
}
|
||||||
|
|
||||||
.item {
|
.item {
|
||||||
display: table;
|
display: table;
|
||||||
color: #828282;
|
color: #828282;
|
||||||
margin-bottom: 0.7rem;
|
margin-bottom: 0.7rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
.item .source-logo {
|
.item .source-logo {
|
||||||
width: 0.9rem;
|
width: 0.9rem;
|
||||||
height: 0.9rem;
|
height: 0.9rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
.item a {
|
.item a {
|
||||||
color: #828282;
|
color: #828282;
|
||||||
}
|
}
|
||||||
.item a:hover {
|
.item a:hover {
|
||||||
text-decoration: underline;
|
text-decoration: underline;
|
||||||
}
|
}
|
||||||
|
|
||||||
.item a.link {
|
.item a.link {
|
||||||
font-size: 1.1rem;
|
font-size: 1.1rem;
|
||||||
color: #000000;
|
color: #000000;
|
||||||
}
|
}
|
||||||
.item a.link:visited {
|
.item a.link:visited {
|
||||||
color: #828282;
|
color: #828282;
|
||||||
}
|
}
|
||||||
.item a.link:hover {
|
.item a.link:hover {
|
||||||
text-decoration: none;
|
text-decoration: none;
|
||||||
}
|
}
|
||||||
|
|
||||||
span.source {
|
span.source {
|
||||||
margin-left: 0.4rem;
|
margin-left: 0.4rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
.item .info a.hot {
|
.item .info a.hot {
|
||||||
color: #444444;
|
color: #444444;
|
||||||
}
|
}
|
||||||
|
|
||||||
.article {
|
.article {
|
||||||
padding-bottom: 3rem;
|
padding-bottom: 3rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
.article-container {
|
.article-container {
|
||||||
margin: 1rem auto;
|
margin: 1rem auto;
|
||||||
max-width: 38rem;
|
max-width: 38rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
.article a {
|
.article a {
|
||||||
border-bottom: 1px solid #222222;
|
border-bottom: 1px solid #222222;
|
||||||
}
|
}
|
||||||
|
|
||||||
.article h1 {
|
.article h1 {
|
||||||
font-size: 1.6rem;
|
font-size: 1.6rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
.article h2 {
|
.article h2 {
|
||||||
font-size: 1.4rem;
|
font-size: 1.4rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
.article h3, .article h4 {
|
.article h3,
|
||||||
font-size: 1.3rem;
|
.article h4 {
|
||||||
|
font-size: 1.3rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
.article img {
|
.article img {
|
||||||
max-width: 100%;
|
max-width: 100%;
|
||||||
height: auto;
|
height: auto;
|
||||||
}
|
}
|
||||||
|
|
||||||
.article figure, .article video {
|
.article figure,
|
||||||
width: 100%;
|
.article video {
|
||||||
height: auto;
|
width: 100%;
|
||||||
margin: 0;
|
height: auto;
|
||||||
|
margin: 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
.article table {
|
.article table {
|
||||||
width: 100%;
|
width: 100%;
|
||||||
table-layout: fixed;
|
table-layout: fixed;
|
||||||
}
|
}
|
||||||
|
|
||||||
.article iframe {
|
.article iframe {
|
||||||
display: none;
|
display: none;
|
||||||
}
|
}
|
||||||
|
|
||||||
.article u {
|
.article u {
|
||||||
border-bottom: 1px solid #222;
|
border-bottom: 1px solid #222;
|
||||||
text-decoration: none;
|
text-decoration: none;
|
||||||
}
|
}
|
||||||
|
|
||||||
.article .info {
|
.article .info {
|
||||||
color: #828282;
|
color: #828282;
|
||||||
}
|
}
|
||||||
|
|
||||||
.article .info a {
|
.article .info a {
|
||||||
border-bottom: none;
|
border-bottom: none;
|
||||||
color: #828282;
|
color: #828282;
|
||||||
}
|
}
|
||||||
.article .info a:hover {
|
.article .info a:hover {
|
||||||
text-decoration: underline;
|
text-decoration: underline;
|
||||||
}
|
}
|
||||||
|
|
||||||
.story-text {
|
.story-text {
|
||||||
font: 1.2rem/1.5 'Apparatus SIL', sans-serif;
|
font: 1.2rem/1.5 "Apparatus SIL", sans-serif;
|
||||||
margin-top: 1em;
|
margin-top: 1em;
|
||||||
}
|
}
|
||||||
|
|
||||||
.comments {
|
.comments {
|
||||||
margin-left: -1.25rem;
|
margin-left: -1.25rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
.comment {
|
.comment {
|
||||||
padding-left: 1.25rem;
|
padding-left: 1.25rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
.comment.lined {
|
.comment.lined {
|
||||||
border-left: 1px solid #cccccc;
|
border-left: 1px solid #cccccc;
|
||||||
}
|
}
|
||||||
|
|
||||||
.comment .text {
|
.comment .text {
|
||||||
margin-top: -0.5rem;
|
margin-top: -0.5rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
.comment .text.hidden > p {
|
.comment .text.hidden > p {
|
||||||
white-space: nowrap;
|
white-space: nowrap;
|
||||||
overflow: hidden;
|
overflow: hidden;
|
||||||
text-overflow: ellipsis;
|
text-overflow: ellipsis;
|
||||||
display: none;
|
display: none;
|
||||||
color: #828282;
|
color: #828282;
|
||||||
}
|
}
|
||||||
|
|
||||||
.comment .text.hidden > p:first-child {
|
.comment .text.hidden > p:first-child {
|
||||||
display: block;
|
display: block;
|
||||||
}
|
}
|
||||||
|
|
||||||
.comment .collapser {
|
.comment .collapser {
|
||||||
padding-left: 0.5rem;
|
padding-left: 0.5rem;
|
||||||
padding-right: 1.5rem;
|
padding-right: 1.5rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
.comment .pointer {
|
.comment .pointer {
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
}
|
}
|
||||||
|
|
||||||
.toggleDot {
|
.toggleDot {
|
||||||
position: fixed;
|
position: fixed;
|
||||||
bottom: 1rem;
|
bottom: 1rem;
|
||||||
left: 1rem;
|
left: 1rem;
|
||||||
height: 3rem;
|
height: 3rem;
|
||||||
width: 3rem;
|
width: 3rem;
|
||||||
background-color: #828282;
|
background-color: #828282;
|
||||||
border-radius: 50%;
|
border-radius: 50%;
|
||||||
}
|
}
|
||||||
|
|
||||||
.toggleDot .button {
|
.toggleDot .button {
|
||||||
font: 2rem/1 'icomoon';
|
font: 2rem/1 "icomoon";
|
||||||
position: relative;
|
position: relative;
|
||||||
top: 0.5rem;
|
top: 0.5rem;
|
||||||
left: 0.55rem;
|
left: 0.55rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
.forwardDot {
|
.forwardDot {
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
position: fixed;
|
position: fixed;
|
||||||
bottom: 1rem;
|
bottom: 1rem;
|
||||||
right: 1rem;
|
right: 1rem;
|
||||||
height: 3rem;
|
height: 3rem;
|
||||||
width: 3rem;
|
width: 3rem;
|
||||||
background-color: #828282;
|
background-color: #828282;
|
||||||
border-radius: 50%;
|
border-radius: 50%;
|
||||||
}
|
}
|
||||||
|
|
||||||
.forwardDot .button {
|
.forwardDot .button {
|
||||||
font: 2.5rem/1 'icomoon';
|
font: 2.5rem/1 "icomoon";
|
||||||
position: relative;
|
position: relative;
|
||||||
top: 0.25rem;
|
top: 0.25rem;
|
||||||
left: 0.3rem;
|
left: 0.3rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
.search form {
|
.search form {
|
||||||
display: inline;
|
display: inline;
|
||||||
|
}
|
||||||
|
|
||||||
|
.indented {
|
||||||
|
padding: 0 0 0 1rem;
|
||||||
}
|
}
|
||||||
|
34
webclient/src/components/StoryItem.js
Normal file
34
webclient/src/components/StoryItem.js
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
import React from "react";
|
||||||
|
import { Link } from "react-router-dom";
|
||||||
|
import { sourceLink, infoLine, getLogoUrl } from "../utils.js";
|
||||||
|
|
||||||
|
export class StoryItem extends React.Component {
|
||||||
|
constructor(props) {
|
||||||
|
super(props);
|
||||||
|
}
|
||||||
|
|
||||||
|
render() {
|
||||||
|
const story = this.props.story;
|
||||||
|
const { id, title } = story;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="item" key={id}>
|
||||||
|
<div className="title">
|
||||||
|
<Link className="link" to={"/" + id}>
|
||||||
|
<img
|
||||||
|
className="source-logo"
|
||||||
|
src={getLogoUrl(story)}
|
||||||
|
alt="source logo"
|
||||||
|
/>
|
||||||
|
{" "}
|
||||||
|
{title}
|
||||||
|
</Link>
|
||||||
|
|
||||||
|
<span className="source">({sourceLink(story)})</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{infoLine(story)}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
@@ -1,7 +1,7 @@
|
|||||||
import React from 'react';
|
import React from 'react';
|
||||||
import { Helmet } from 'react-helmet';
|
import { Helmet } from 'react-helmet';
|
||||||
import localForage from 'localforage';
|
import localForage from 'localforage';
|
||||||
import { sourceLink, infoLine, ToggleDot } from './utils.js';
|
import { sourceLink, infoLine, otherDiscussions, ToggleDot } from '../utils.js';
|
||||||
|
|
||||||
class Article extends React.Component {
|
class Article extends React.Component {
|
||||||
constructor(props) {
|
constructor(props) {
|
||||||
@@ -14,29 +14,25 @@ class Article extends React.Component {
|
|||||||
|
|
||||||
this.state = {
|
this.state = {
|
||||||
story: cache[id] || false,
|
story: cache[id] || false,
|
||||||
|
related: [],
|
||||||
error: false,
|
error: false,
|
||||||
pConv: [],
|
pConv: [],
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
componentDidMount() {
|
componentDidMount() {
|
||||||
const id = this.props.match ? this.props.match.params.id : 'CLOL';
|
const id = this.props.match ? this.props.match.params.id : 'CLOL';
|
||||||
|
|
||||||
localForage.getItem(id)
|
localForage.getItem(id).then((value) => value ? this.setState({ story: value }) : null);
|
||||||
.then(
|
localForage.getItem(`related-${id}`).then((value) => value ? this.setState({ related: value }) : null);
|
||||||
(value) => {
|
|
||||||
if (value) {
|
|
||||||
this.setState({ story: value });
|
|
||||||
}
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
fetch('/api/' + id)
|
fetch('/api/' + id)
|
||||||
.then(res => res.json())
|
.then(res => res.json())
|
||||||
.then(
|
.then(
|
||||||
(result) => {
|
(result) => {
|
||||||
this.setState({ story: result.story });
|
this.setState({ story: result.story, related: result.related });
|
||||||
localForage.setItem(id, result.story);
|
localForage.setItem(id, result.story);
|
||||||
|
localForage.setItem(`related-${id}`, result.related);
|
||||||
},
|
},
|
||||||
(error) => {
|
(error) => {
|
||||||
this.setState({ error: true });
|
this.setState({ error: true });
|
||||||
@@ -45,12 +41,13 @@ class Article extends React.Component {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pConvert = (n) => {
|
pConvert = (n) => {
|
||||||
this.setState({ pConv: [...this.state.pConv, n]});
|
this.setState({ pConv: [...this.state.pConv, n] });
|
||||||
}
|
}
|
||||||
|
|
||||||
render() {
|
render() {
|
||||||
const id = this.props.match ? this.props.match.params.id : 'CLOL';
|
const id = this.props.match ? this.props.match.params.id : 'CLOL';
|
||||||
const story = this.state.story;
|
const story = this.state.story;
|
||||||
|
const related = this.state.related.filter(r => r.id != id);
|
||||||
const error = this.state.error;
|
const error = this.state.error;
|
||||||
const pConv = this.state.pConv;
|
const pConv = this.state.pConv;
|
||||||
let nodes = null;
|
let nodes = null;
|
||||||
@@ -77,6 +74,7 @@ class Article extends React.Component {
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
{infoLine(story)}
|
{infoLine(story)}
|
||||||
|
{otherDiscussions(related)}
|
||||||
|
|
||||||
{nodes ?
|
{nodes ?
|
||||||
<div className='story-text'>
|
<div className='story-text'>
|
||||||
@@ -85,10 +83,10 @@ class Article extends React.Component {
|
|||||||
v.innerHTML.split('\n\n').map(x =>
|
v.innerHTML.split('\n\n').map(x =>
|
||||||
<p dangerouslySetInnerHTML={{ __html: x }} />
|
<p dangerouslySetInnerHTML={{ __html: x }} />
|
||||||
)
|
)
|
||||||
:
|
:
|
||||||
(v.nodeName === '#text' ?
|
(v.nodeName === '#text' ?
|
||||||
<p>{v.data}</p>
|
<p>{v.data}</p>
|
||||||
:
|
:
|
||||||
<>
|
<>
|
||||||
<v.localName dangerouslySetInnerHTML={v.innerHTML ? { __html: v.innerHTML } : null} />
|
<v.localName dangerouslySetInnerHTML={v.innerHTML ? { __html: v.innerHTML } : null} />
|
||||||
{v.localName == 'pre' && <button onClick={() => this.pConvert(k)}>Convert Code to Paragraph</button>}
|
{v.localName == 'pre' && <button onClick={() => this.pConvert(k)}>Convert Code to Paragraph</button>}
|
||||||
@@ -96,11 +94,11 @@ class Article extends React.Component {
|
|||||||
)
|
)
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
:
|
:
|
||||||
<p>Problem getting article :(</p>
|
<p>Problem getting article :(</p>
|
||||||
}
|
}
|
||||||
</div>
|
</div>
|
||||||
:
|
:
|
||||||
<p>loading...</p>
|
<p>loading...</p>
|
||||||
}
|
}
|
||||||
<ToggleDot id={id} article={false} />
|
<ToggleDot id={id} article={false} />
|
@@ -4,9 +4,9 @@ import { HashLink } from 'react-router-hash-link';
|
|||||||
import { Helmet } from 'react-helmet';
|
import { Helmet } from 'react-helmet';
|
||||||
import moment from 'moment';
|
import moment from 'moment';
|
||||||
import localForage from 'localforage';
|
import localForage from 'localforage';
|
||||||
import { infoLine, ToggleDot } from './utils.js';
|
import { infoLine, otherDiscussions, ToggleDot } from '../utils.js';
|
||||||
|
|
||||||
class Article extends React.Component {
|
class Comments extends React.Component {
|
||||||
constructor(props) {
|
constructor(props) {
|
||||||
super(props);
|
super(props);
|
||||||
|
|
||||||
@@ -17,6 +17,7 @@ class Article extends React.Component {
|
|||||||
|
|
||||||
this.state = {
|
this.state = {
|
||||||
story: cache[id] || false,
|
story: cache[id] || false,
|
||||||
|
related: [],
|
||||||
error: false,
|
error: false,
|
||||||
collapsed: [],
|
collapsed: [],
|
||||||
expanded: [],
|
expanded: [],
|
||||||
@@ -26,24 +27,21 @@ class Article extends React.Component {
|
|||||||
componentDidMount() {
|
componentDidMount() {
|
||||||
const id = this.props.match.params.id;
|
const id = this.props.match.params.id;
|
||||||
|
|
||||||
localForage.getItem(id)
|
localForage.getItem(id).then((value) => this.setState({ story: value }));
|
||||||
.then(
|
localForage.getItem(`related-${id}`).then((value) => value ? this.setState({ related: value }) : null);
|
||||||
(value) => {
|
|
||||||
this.setState({ story: value });
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
fetch('/api/' + id)
|
fetch('/api/' + id)
|
||||||
.then(res => res.json())
|
.then(res => res.json())
|
||||||
.then(
|
.then(
|
||||||
(result) => {
|
(result) => {
|
||||||
this.setState({ story: result.story }, () => {
|
this.setState({ story: result.story, related: result.related }, () => {
|
||||||
const hash = window.location.hash.substring(1);
|
const hash = window.location.hash.substring(1);
|
||||||
if (hash) {
|
if (hash) {
|
||||||
document.getElementById(hash).scrollIntoView();
|
document.getElementById(hash).scrollIntoView();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
localForage.setItem(id, result.story);
|
localForage.setItem(id, result.story);
|
||||||
|
localForage.setItem(`related-${id}`, result.related);
|
||||||
},
|
},
|
||||||
(error) => {
|
(error) => {
|
||||||
this.setState({ error: true });
|
this.setState({ error: true });
|
||||||
@@ -72,7 +70,7 @@ class Article extends React.Component {
|
|||||||
}
|
}
|
||||||
|
|
||||||
displayComment(story, c, level) {
|
displayComment(story, c, level) {
|
||||||
const cid = c.author+c.date;
|
const cid = c.author + c.date;
|
||||||
|
|
||||||
const collapsed = this.state.collapsed.includes(cid);
|
const collapsed = this.state.collapsed.includes(cid);
|
||||||
const expanded = this.state.expanded.includes(cid);
|
const expanded = this.state.expanded.includes(cid);
|
||||||
@@ -85,19 +83,22 @@ class Article extends React.Component {
|
|||||||
<div className='info'>
|
<div className='info'>
|
||||||
<p>
|
<p>
|
||||||
{c.author === story.author ? '[OP]' : ''} {c.author || '[Deleted]'}
|
{c.author === story.author ? '[OP]' : ''} {c.author || '[Deleted]'}
|
||||||
{' '} | <HashLink to={'#'+cid} id={cid}>{moment.unix(c.date).fromNow()}</HashLink>
|
{' '} | <HashLink to={'#' + cid} id={cid}>{moment.unix(c.date).fromNow()}</HashLink>
|
||||||
|
|
||||||
{hidden || hasChildren &&
|
{hasChildren && (
|
||||||
<span className='collapser pointer' onClick={() => this.collapseComment(cid)}>–</span>
|
hidden ?
|
||||||
}
|
<span className='collapser expander pointer' onClick={() => this.expandComment(cid)}>+</span>
|
||||||
|
:
|
||||||
|
<span className='collapser pointer' onClick={() => this.collapseComment(cid)}>–</span>
|
||||||
|
)}
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div className={collapsed ? 'text hidden' : 'text'} dangerouslySetInnerHTML={{ __html: c.text }} />
|
<div className={collapsed ? 'text hidden' : 'text'} dangerouslySetInnerHTML={{ __html: c.text }} />
|
||||||
|
|
||||||
{hidden && hasChildren ?
|
{hidden && hasChildren ?
|
||||||
<div className='comment lined info pointer' onClick={() => this.expandComment(cid)}>[show {this.countComments(c)-1} more]</div>
|
<div className='comment lined info pointer' onClick={() => this.expandComment(cid)}>[show {this.countComments(c) - 1} more]</div>
|
||||||
:
|
:
|
||||||
c.comments.map(i => this.displayComment(story, i, level + 1))
|
c.comments.map(i => this.displayComment(story, i, level + 1))
|
||||||
}
|
}
|
||||||
</div>
|
</div>
|
||||||
@@ -107,6 +108,7 @@ class Article extends React.Component {
|
|||||||
render() {
|
render() {
|
||||||
const id = this.props.match.params.id;
|
const id = this.props.match.params.id;
|
||||||
const story = this.state.story;
|
const story = this.state.story;
|
||||||
|
const related = this.state.related.filter(r => r.id != id);
|
||||||
const error = this.state.error;
|
const error = this.state.error;
|
||||||
|
|
||||||
return (
|
return (
|
||||||
@@ -125,12 +127,13 @@ class Article extends React.Component {
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
{infoLine(story)}
|
{infoLine(story)}
|
||||||
|
{otherDiscussions(related)}
|
||||||
|
|
||||||
<div className='comments'>
|
<div className='comments'>
|
||||||
{story.comments.map(c => this.displayComment(story, c, 0))}
|
{story.comments.map(c => this.displayComment(story, c, 0))}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
:
|
:
|
||||||
<p>loading...</p>
|
<p>loading...</p>
|
||||||
}
|
}
|
||||||
<ToggleDot id={id} article={true} />
|
<ToggleDot id={id} article={true} />
|
||||||
@@ -139,4 +142,4 @@ class Article extends React.Component {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export default Article;
|
export default Comments;
|
@@ -1,8 +1,7 @@
|
|||||||
import React from 'react';
|
import React from 'react';
|
||||||
import { Link } from 'react-router-dom';
|
|
||||||
import { Helmet } from 'react-helmet';
|
import { Helmet } from 'react-helmet';
|
||||||
import localForage from 'localforage';
|
import localForage from 'localforage';
|
||||||
import { sourceLink, infoLine, logos } from './utils.js';
|
import { StoryItem } from '../components/StoryItem.js';
|
||||||
|
|
||||||
class Feed extends React.Component {
|
class Feed extends React.Component {
|
||||||
constructor(props) {
|
constructor(props) {
|
||||||
@@ -22,20 +21,24 @@ class Feed extends React.Component {
|
|||||||
const updated = !this.state.stories || this.state.stories[0].id !== result.stories[0].id;
|
const updated = !this.state.stories || this.state.stories[0].id !== result.stories[0].id;
|
||||||
console.log('updated:', updated);
|
console.log('updated:', updated);
|
||||||
|
|
||||||
this.setState({ stories: result.stories });
|
const { stories } = result;
|
||||||
localStorage.setItem('stories', JSON.stringify(result.stories));
|
this.setState({ stories });
|
||||||
|
localStorage.setItem('stories', JSON.stringify(stories));
|
||||||
|
|
||||||
if (updated) {
|
if (updated) {
|
||||||
localForage.clear();
|
localForage.clear();
|
||||||
result.stories.forEach((x, i) => {
|
stories.forEach((x, i) => {
|
||||||
fetch('/api/' + x.id)
|
fetch('/api/' + x.id)
|
||||||
.then(res => res.json())
|
.then(res => res.json())
|
||||||
.then(result => {
|
.then(({ story, related }) => {
|
||||||
localForage.setItem(x.id, result.story)
|
Promise.all([
|
||||||
.then(console.log('preloaded', x.id, x.title));
|
localForage.setItem(x.id, story),
|
||||||
this.props.updateCache(x.id, result.story);
|
localForage.setItem(`related-${x.id}`, related)
|
||||||
}, error => {}
|
]).then(console.log('preloaded', x.id, x.title));
|
||||||
);
|
this.props.updateCache(x.id, story);
|
||||||
|
this.props.updateCache(`related-${x.id}`, related);
|
||||||
|
}, error => { }
|
||||||
|
);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -55,27 +58,7 @@ class Feed extends React.Component {
|
|||||||
<title>Feed - QotNews</title>
|
<title>Feed - QotNews</title>
|
||||||
</Helmet>
|
</Helmet>
|
||||||
{error && <p>Connection error?</p>}
|
{error && <p>Connection error?</p>}
|
||||||
{stories ?
|
{stories ? stories.map(story => <StoryItem story={story}></StoryItem>) : <p>loading...</p>}
|
||||||
<div>
|
|
||||||
{stories.map(x =>
|
|
||||||
<div className='item' key={x.id}>
|
|
||||||
<div className='title'>
|
|
||||||
<Link className='link' to={'/' + x.id}>
|
|
||||||
<img className='source-logo' src={logos[x.source]} alt='source logo' /> {x.title}
|
|
||||||
</Link>
|
|
||||||
|
|
||||||
<span className='source'>
|
|
||||||
({sourceLink(x)})
|
|
||||||
</span>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{infoLine(x)}
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
:
|
|
||||||
<p>loading...</p>
|
|
||||||
}
|
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
}
|
}
|
@@ -1,8 +1,7 @@
|
|||||||
import React from 'react';
|
import React from 'react';
|
||||||
import { Link } from 'react-router-dom';
|
|
||||||
import { Helmet } from 'react-helmet';
|
import { Helmet } from 'react-helmet';
|
||||||
import { sourceLink, infoLine, logos } from './utils.js';
|
|
||||||
import AbortController from 'abort-controller';
|
import AbortController from 'abort-controller';
|
||||||
|
import { StoryItem } from '../components/StoryItem.js';
|
||||||
|
|
||||||
class Results extends React.Component {
|
class Results extends React.Component {
|
||||||
constructor(props) {
|
constructor(props) {
|
||||||
@@ -63,28 +62,10 @@ class Results extends React.Component {
|
|||||||
<>
|
<>
|
||||||
<p>Search results:</p>
|
<p>Search results:</p>
|
||||||
<div className='comment lined'>
|
<div className='comment lined'>
|
||||||
{stories.length ?
|
{stories ? stories.map(story => <StoryItem story={story}></StoryItem>) : <p>loading...</p>}
|
||||||
stories.map(x =>
|
|
||||||
<div className='item' key={x.id}>
|
|
||||||
<div className='title'>
|
|
||||||
<Link className='link' to={'/' + x.id}>
|
|
||||||
<img className='source-logo' src={logos[x.source]} alt='source logo' /> {x.title}
|
|
||||||
</Link>
|
|
||||||
|
|
||||||
<span className='source'>
|
|
||||||
({sourceLink(x)})
|
|
||||||
</span>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{infoLine(x)}
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
:
|
|
||||||
<p>none</p>
|
|
||||||
}
|
|
||||||
</div>
|
</div>
|
||||||
</>
|
</>
|
||||||
:
|
:
|
||||||
<p>loading...</p>
|
<p>loading...</p>
|
||||||
}
|
}
|
||||||
</div>
|
</div>
|
File diff suppressed because one or more lines are too long
5321
webclient/yarn.lock
5321
webclient/yarn.lock
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user