fix sitemap
This commit is contained in:
parent
b759f46582
commit
abf8589e02
|
@ -108,20 +108,20 @@ class Sitemap:
|
|||
if md['type'] == 'https://schema.org/NewsArticle':
|
||||
props = md['properties']
|
||||
s['title'] = props['headline']
|
||||
if props['author']:
|
||||
if 'author' in props and props['author']:
|
||||
s['author'] = props['author']['properties']['name']
|
||||
|
||||
for ld in data['json-ld']:
|
||||
if ld['@type'] == 'Article':
|
||||
s['title'] = ld['headline']
|
||||
if ld['author']:
|
||||
if 'author' in ld and ld['author']:
|
||||
s['author'] = ld['author']['name']
|
||||
return s
|
||||
|
||||
|
||||
# scratchpad so I can quickly develop the parser
|
||||
if __name__ == '__main__':
|
||||
# site = Sitemap("https://www.stuff.co.nz/sitemap.xml")
|
||||
#site = Sitemap("https://www.stuff.co.nz/sitemap.xml")
|
||||
site = Sitemap("https://www.nzherald.co.nz/arcio/news-sitemap/")
|
||||
posts = site.feed()
|
||||
print(posts[:1])
|
||||
|
|
Loading…
Reference in New Issue
Block a user