fix sitemap

master
Jason 4 years ago
parent b759f46582
commit abf8589e02
  1. 6
      apiserver/feeds/sitemap.py

@ -108,20 +108,20 @@ class Sitemap:
if md['type'] == 'https://schema.org/NewsArticle': if md['type'] == 'https://schema.org/NewsArticle':
props = md['properties'] props = md['properties']
s['title'] = props['headline'] s['title'] = props['headline']
if props['author']: if 'author' in props and props['author']:
s['author'] = props['author']['properties']['name'] s['author'] = props['author']['properties']['name']
for ld in data['json-ld']: for ld in data['json-ld']:
if ld['@type'] == 'Article': if ld['@type'] == 'Article':
s['title'] = ld['headline'] s['title'] = ld['headline']
if ld['author']: if 'author' in ld and ld['author']:
s['author'] = ld['author']['name'] s['author'] = ld['author']['name']
return s return s
# scratchpad so I can quickly develop the parser # scratchpad so I can quickly develop the parser
if __name__ == '__main__': if __name__ == '__main__':
# site = Sitemap("https://www.stuff.co.nz/sitemap.xml") #site = Sitemap("https://www.stuff.co.nz/sitemap.xml")
site = Sitemap("https://www.nzherald.co.nz/arcio/news-sitemap/") site = Sitemap("https://www.nzherald.co.nz/arcio/news-sitemap/")
posts = site.feed() posts = site.feed()
print(posts[:1]) print(posts[:1])

Loading…
Cancel
Save