fix sitemap
This commit is contained in:
parent
b759f46582
commit
abf8589e02
|
@ -108,20 +108,20 @@ class Sitemap:
|
||||||
if md['type'] == 'https://schema.org/NewsArticle':
|
if md['type'] == 'https://schema.org/NewsArticle':
|
||||||
props = md['properties']
|
props = md['properties']
|
||||||
s['title'] = props['headline']
|
s['title'] = props['headline']
|
||||||
if props['author']:
|
if 'author' in props and props['author']:
|
||||||
s['author'] = props['author']['properties']['name']
|
s['author'] = props['author']['properties']['name']
|
||||||
|
|
||||||
for ld in data['json-ld']:
|
for ld in data['json-ld']:
|
||||||
if ld['@type'] == 'Article':
|
if ld['@type'] == 'Article':
|
||||||
s['title'] = ld['headline']
|
s['title'] = ld['headline']
|
||||||
if ld['author']:
|
if 'author' in ld and ld['author']:
|
||||||
s['author'] = ld['author']['name']
|
s['author'] = ld['author']['name']
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
# scratchpad so I can quickly develop the parser
|
# scratchpad so I can quickly develop the parser
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# site = Sitemap("https://www.stuff.co.nz/sitemap.xml")
|
#site = Sitemap("https://www.stuff.co.nz/sitemap.xml")
|
||||||
site = Sitemap("https://www.nzherald.co.nz/arcio/news-sitemap/")
|
site = Sitemap("https://www.nzherald.co.nz/arcio/news-sitemap/")
|
||||||
posts = site.feed()
|
posts = site.feed()
|
||||||
print(posts[:1])
|
print(posts[:1])
|
||||||
|
|
Loading…
Reference in New Issue
Block a user