forked from tanner/qotnews
Compare commits
24 Commits
2f730c1f52
...
2439c113b3
Author | SHA1 | Date |
---|---|---|
Jason Schwarzenberger | 2439c113b3 | 4 years ago |
Jason Schwarzenberger | 0f5e28136d | 4 years ago |
Jason Schwarzenberger | bb1413b586 | 4 years ago |
Jason Schwarzenberger | 0a27c0da1f | 4 years ago |
Jason Schwarzenberger | fe01ea52e5 | 4 years ago |
Jason Schwarzenberger | 3daae5fa1b | 4 years ago |
Jason Schwarzenberger | 25caee17d6 | 4 years ago |
Jason Schwarzenberger | c1b6349771 | 4 years ago |
Jason | 54a4c7e55a | 4 years ago |
Jason | b12a3570b0 | 4 years ago |
Jason Schwarzenberger | 0bfa920654 | 4 years ago |
Jason Schwarzenberger | 9341b4d966 | 4 years ago |
Jason Schwarzenberger | a2e5faa3b5 | 4 years ago |
Jason Schwarzenberger | a86eb98c1a | 4 years ago |
Jason Schwarzenberger | abf7f0a802 | 4 years ago |
Jason Schwarzenberger | d288546d6f | 4 years ago |
Jason Schwarzenberger | cc130942ca | 4 years ago |
Jason Schwarzenberger | f0b14408d4 | 4 years ago |
Jason Schwarzenberger | e1830a589b | 4 years ago |
Jason Schwarzenberger | 32bc3b906b | 4 years ago |
Jason Schwarzenberger | f5e65632b8 | 4 years ago |
Jason Schwarzenberger | 1fe524207e | 4 years ago |
Jason Schwarzenberger | dc3d17b171 | 4 years ago |
Jason Schwarzenberger | 539350a83d | 4 years ago |
25 changed files with 3389 additions and 2612 deletions
@ -0,0 +1,14 @@ |
||||
from bs4 import BeautifulSoup |
||||
|
||||
def get_icons(markup): |
||||
soup = BeautifulSoup(markup, features='html.parser') |
||||
icon32 = soup.find_all('link', rel="icon", href=True, sizes="32x32") |
||||
icon16 = soup.find_all('link', rel="icon", href=True, sizes="16x16") |
||||
favicon = soup.find_all('link', rel="shortcut icon", href=True) |
||||
others = soup.find_all('link', rel="icon", href=True) |
||||
icons = icon32 + icon16 + favicon + others |
||||
base_url = '/'.join(urlref.split('/')[:3]) |
||||
icons = list(set([i.get('href') for i in icons])) |
||||
icons = [i if i.startswith('http') else base_url + i for i in icons] |
||||
|
||||
return icons |
@ -0,0 +1,64 @@ |
||||
import re |
||||
from bs4 import BeautifulSoup |
||||
|
||||
if __name__ == '__main__': |
||||
import sys |
||||
sys.path.insert(0,'.') |
||||
|
||||
from misc.time import unix |
||||
from misc.api import xml |
||||
|
||||
def _soup_get_text(soup): |
||||
if not soup: return None |
||||
if soup.text: return soup.text |
||||
|
||||
s = soup.find(text=lambda tag: isinstance(tag, bs4.CData)) |
||||
if s and s.string: return s.string.strip() |
||||
return None |
||||
|
||||
def _parse_comment(soup): |
||||
c = { |
||||
'author': '', |
||||
'authorLink': '', |
||||
'score': 0, |
||||
'date': 0, |
||||
'text': '', |
||||
'comments': [], |
||||
} |
||||
|
||||
if soup.find('link'): |
||||
title = _soup_get_text(soup.find('link')) |
||||
if title and 'By:' in title: |
||||
c['author'] = title.strip('By:').strip() |
||||
if soup.find('dc:creator'): |
||||
c['author'] = _soup_get_text(soup.find('dc:creator')) |
||||
if soup.find('link'): |
||||
c['authorLink'] = _soup_get_text(soup.find('link')) |
||||
if soup.find('description'): |
||||
c['text'] = _soup_get_text(soup.find('description')) |
||||
if soup.find('pubdate'): |
||||
c['date'] = unix(soup.find('pubdate').text) |
||||
elif soup.find('pubDate'): |
||||
c['date'] = unix(soup.find('pubDate').text) |
||||
|
||||
return c |
||||
|
||||
def get_comments(url): |
||||
regex = r"https:\/\/www\.stuff\.co\.nz\/(.*\/\d+)/[^\/]+" |
||||
p = re.compile(regex).match(url) |
||||
path = p.groups()[0] |
||||
comment_url = f'https://comments.us1.gigya.com/comments/rss/6201101/Stuff/stuff/{path}' |
||||
markup = xml(lambda x: comment_url) |
||||
if not markup: return [] |
||||
soup = BeautifulSoup(markup, features='html.parser') |
||||
comments = soup.find_all('item') |
||||
if not comments: return [] |
||||
comments = [_parse_comment(c) for c in comments] |
||||
return comments |
||||
|
||||
|
||||
# scratchpad so I can quickly develop the parser |
||||
if __name__ == '__main__': |
||||
comments = get_comments('https://www.stuff.co.nz/life-style/homed/houses/123418468/dear-jacinda-we-need-to-talk-about-housing') |
||||
print(len(comments)) |
||||
print(comments[:5]) |
@ -0,0 +1,48 @@ |
||||
import logging |
||||
logging.basicConfig( |
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', |
||||
level=logging.INFO) |
||||
|
||||
import sys |
||||
import json |
||||
import requests |
||||
|
||||
import database |
||||
import feed |
||||
import search |
||||
|
||||
database.init() |
||||
search.init() |
||||
|
||||
def _update_current_story(story, item): |
||||
logging.info('Updating story: {}'.format(str(story['ref']))) |
||||
|
||||
if story.get('url', ''): |
||||
story['text'] = '' |
||||
|
||||
valid = feed.update_story(story, urlref=item['urlref']) |
||||
if valid: |
||||
database.put_story(story) |
||||
search.put_story(story) |
||||
else: |
||||
database.del_ref(item['ref']) |
||||
logging.info('Removed ref {}'.format(item['ref'])) |
||||
|
||||
if __name__ == '__main__': |
||||
if len(sys.argv) == 2: |
||||
sid = sys.argv[1] |
||||
else: |
||||
print('Usage: python delete-story.py [story id]') |
||||
exit(1) |
||||
|
||||
item = database.get_ref_by_sid(sid) |
||||
|
||||
if item: |
||||
story = database.get_story(item['sid']).data |
||||
if story: |
||||
print('Updating story:') |
||||
_update_current_story(story, item) |
||||
else: |
||||
print('Story not found. Exiting.') |
||||
else: |
||||
print('Story not found. Exiting.') |
@ -1 +1 @@ |
||||
Subproject commit 50a94df7283e31680c5d94dd666bab58aea2e475 |
||||
Subproject commit d3d5fc74acf0be8a49e2772b42ab59278d1a3e81 |
@ -1,225 +1,231 @@ |
||||
body { |
||||
text-rendering: optimizeLegibility; |
||||
font: 1rem/1.3 sans-serif; |
||||
color: #000000; |
||||
margin-bottom: 100vh; |
||||
word-break: break-word; |
||||
font-kerning: normal; |
||||
text-rendering: optimizeLegibility; |
||||
font: 1rem/1.3 sans-serif; |
||||
color: #000000; |
||||
margin-bottom: 100vh; |
||||
word-break: break-word; |
||||
font-kerning: normal; |
||||
} |
||||
|
||||
a { |
||||
color: #000000; |
||||
text-decoration: none; |
||||
outline: none; |
||||
color: #000000; |
||||
text-decoration: none; |
||||
outline: none; |
||||
} |
||||
|
||||
input { |
||||
font-size: 1.05rem; |
||||
background-color: transparent; |
||||
border: 1px solid #828282; |
||||
margin: 0.25rem; |
||||
padding: 6px; |
||||
border-radius: 4px; |
||||
font-size: 1.05rem; |
||||
background-color: transparent; |
||||
border: 1px solid #828282; |
||||
margin: 0.25rem; |
||||
padding: 6px; |
||||
border-radius: 4px; |
||||
} |
||||
|
||||
pre { |
||||
overflow: auto; |
||||
overflow: auto; |
||||
} |
||||
|
||||
.container { |
||||
margin: 1rem auto; |
||||
max-width: 64rem; |
||||
margin: 1rem auto; |
||||
max-width: 64rem; |
||||
} |
||||
|
||||
.menu { |
||||
font-size: 1.1rem; |
||||
padding: 0 1rem; |
||||
font-size: 1.1rem; |
||||
padding: 0 1rem; |
||||
} |
||||
|
||||
.slogan { |
||||
color: #828282; |
||||
color: #828282; |
||||
} |
||||
|
||||
.theme { |
||||
float: right; |
||||
float: right; |
||||
} |
||||
|
||||
.item { |
||||
display: table; |
||||
color: #828282; |
||||
margin-bottom: 0.7rem; |
||||
display: table; |
||||
color: #828282; |
||||
margin-bottom: 0.7rem; |
||||
} |
||||
|
||||
.item .source-logo { |
||||
width: 0.9rem; |
||||
height: 0.9rem; |
||||
width: 0.9rem; |
||||
height: 0.9rem; |
||||
} |
||||
|
||||
.item a { |
||||
color: #828282; |
||||
color: #828282; |
||||
} |
||||
.item a:hover { |
||||
text-decoration: underline; |
||||
text-decoration: underline; |
||||
} |
||||
|
||||
.item a.link { |
||||
font-size: 1.1rem; |
||||
color: #000000; |
||||
font-size: 1.1rem; |
||||
color: #000000; |
||||
} |
||||
.item a.link:visited { |
||||
color: #828282; |
||||
color: #828282; |
||||
} |
||||
.item a.link:hover { |
||||
text-decoration: none; |
||||
text-decoration: none; |
||||
} |
||||
|
||||
span.source { |
||||
margin-left: 0.4rem; |
||||
margin-left: 0.4rem; |
||||
} |
||||
|
||||
.item .info a.hot { |
||||
color: #444444; |
||||
color: #444444; |
||||
} |
||||
|
||||
.article { |
||||
padding-bottom: 3rem; |
||||
padding-bottom: 3rem; |
||||
} |
||||
|
||||
.article-container { |
||||
margin: 1rem auto; |
||||
max-width: 38rem; |
||||
margin: 1rem auto; |
||||
max-width: 38rem; |
||||
} |
||||
|
||||
.article a { |
||||
border-bottom: 1px solid #222222; |
||||
border-bottom: 1px solid #222222; |
||||
} |
||||
|
||||
.article h1 { |
||||
font-size: 1.6rem; |
||||
font-size: 1.6rem; |
||||
} |
||||
|
||||
.article h2 { |
||||
font-size: 1.4rem; |
||||
font-size: 1.4rem; |
||||
} |
||||
|
||||
.article h3, .article h4 { |
||||
font-size: 1.3rem; |
||||
.article h3, |
||||
.article h4 { |
||||
font-size: 1.3rem; |
||||
} |
||||
|
||||
.article img { |
||||
max-width: 100%; |
||||
height: auto; |
||||
max-width: 100%; |
||||
height: auto; |
||||
} |
||||
|
||||
.article figure, .article video { |
||||
width: 100%; |
||||
height: auto; |
||||
margin: 0; |
||||
.article figure, |
||||
.article video { |
||||
width: 100%; |
||||
height: auto; |
||||
margin: 0; |
||||
} |
||||
|
||||
.article table { |
||||
width: 100%; |
||||
table-layout: fixed; |
||||
width: 100%; |
||||
table-layout: fixed; |
||||
} |
||||
|
||||
.article iframe { |
||||
display: none; |
||||
display: none; |
||||
} |
||||
|
||||
.article u { |
||||
border-bottom: 1px solid #222; |
||||
text-decoration: none; |
||||
border-bottom: 1px solid #222; |
||||
text-decoration: none; |
||||
} |
||||
|
||||
.article .info { |
||||
color: #828282; |
||||
color: #828282; |
||||
} |
||||
|
||||
.article .info a { |
||||
border-bottom: none; |
||||
color: #828282; |
||||
border-bottom: none; |
||||
color: #828282; |
||||
} |
||||
.article .info a:hover { |
||||
text-decoration: underline; |
||||
text-decoration: underline; |
||||
} |
||||
|
||||
.story-text { |
||||
font: 1.2rem/1.5 'Apparatus SIL', sans-serif; |
||||
margin-top: 1em; |
||||
font: 1.2rem/1.5 "Apparatus SIL", sans-serif; |
||||
margin-top: 1em; |
||||
} |
||||
|
||||
.comments { |
||||
margin-left: -1.25rem; |
||||
margin-left: -1.25rem; |
||||
} |
||||
|
||||
.comment { |
||||
padding-left: 1.25rem; |
||||
padding-left: 1.25rem; |
||||
} |
||||
|
||||
.comment.lined { |
||||
border-left: 1px solid #cccccc; |
||||
border-left: 1px solid #cccccc; |
||||
} |
||||
|
||||
.comment .text { |
||||
margin-top: -0.5rem; |
||||
margin-top: -0.5rem; |
||||
} |
||||
|
||||
.comment .text.hidden > p { |
||||
white-space: nowrap; |
||||
overflow: hidden; |
||||
text-overflow: ellipsis; |
||||
display: none; |
||||
color: #828282; |
||||
white-space: nowrap; |
||||
overflow: hidden; |
||||
text-overflow: ellipsis; |
||||
display: none; |
||||
color: #828282; |
||||
} |
||||
|
||||
.comment .text.hidden > p:first-child { |
||||
display: block; |
||||
display: block; |
||||
} |
||||
|
||||
.comment .collapser { |
||||
padding-left: 0.5rem; |
||||
padding-right: 1.5rem; |
||||
padding-left: 0.5rem; |
||||
padding-right: 1.5rem; |
||||
} |
||||
|
||||
.comment .pointer { |
||||
cursor: pointer; |
||||
cursor: pointer; |
||||
} |
||||
|
||||
.toggleDot { |
||||
position: fixed; |
||||
bottom: 1rem; |
||||
left: 1rem; |
||||
height: 3rem; |
||||
width: 3rem; |
||||
background-color: #828282; |
||||
border-radius: 50%; |
||||
position: fixed; |
||||
bottom: 1rem; |
||||
left: 1rem; |
||||
height: 3rem; |
||||
width: 3rem; |
||||
background-color: #828282; |
||||
border-radius: 50%; |
||||
} |
||||
|
||||
.toggleDot .button { |
||||
font: 2rem/1 'icomoon'; |
||||
position: relative; |
||||
top: 0.5rem; |
||||
left: 0.55rem; |
||||
font: 2rem/1 "icomoon"; |
||||
position: relative; |
||||
top: 0.5rem; |
||||
left: 0.55rem; |
||||
} |
||||
|
||||
.forwardDot { |
||||
cursor: pointer; |
||||
position: fixed; |
||||
bottom: 1rem; |
||||
right: 1rem; |
||||
height: 3rem; |
||||
width: 3rem; |
||||
background-color: #828282; |
||||
border-radius: 50%; |
||||
cursor: pointer; |
||||
position: fixed; |
||||
bottom: 1rem; |
||||
right: 1rem; |
||||
height: 3rem; |
||||
width: 3rem; |
||||
background-color: #828282; |
||||
border-radius: 50%; |
||||
} |
||||
|
||||
.forwardDot .button { |
||||
font: 2.5rem/1 'icomoon'; |
||||
position: relative; |
||||
top: 0.25rem; |
||||
left: 0.3rem; |
||||
font: 2.5rem/1 "icomoon"; |
||||
position: relative; |
||||
top: 0.25rem; |
||||
left: 0.3rem; |
||||
} |
||||
|
||||
.search form { |
||||
display: inline; |
||||
display: inline; |
||||
} |
||||
|
||||
.indented { |
||||
padding: 0 0 0 1rem; |
||||
} |
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in new issue