fix dates for newsroom.

This commit is contained in:
Jason Schwarzenberger 2020-11-04 11:53:16 +13:00
parent d1795eb1b8
commit d718d05a04

View File

@ -21,7 +21,8 @@ def unix(date_str):
date_tzfix = date_str date_tzfix = date_str
if ":" == date_tzfix[-3]: if ":" == date_tzfix[-3]:
date_tzfix = date_tzfix[:-3]+date_tzfix[-2:] date_tzfix = date_tzfix[:-3]+date_tzfix[-2:]
formats = ['%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%dT%H:%M:%S%z', '%Y-%m-%dT%H:%M:%S.%fZ', '%Y-%m-%dT%H:%M:%S.%f%z'] formats = ['%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%dT%H:%M:%S%z', '%Y-%m-%dT%H:%M:%S.%fZ', '%Y-%m-%dT%H:%M:%S.%f%z', '%Y-%m-%dT%H:%M:%S', '%Y-%m-%dT%H:%M:%S.%f']
formats = formats + [f.replace("T%H", " %H") for f in formats]
for f in formats: for f in formats:
try: try:
return int(datetime.strptime(date_str, f).timestamp()) return int(datetime.strptime(date_str, f).timestamp())
@ -54,11 +55,9 @@ def parse_extruct(s, data):
s['title'] = values['@value'] s['title'] = values['@value']
if 'http://ogp.me/ns/article#modified_time' in props: if 'http://ogp.me/ns/article#modified_time' in props:
for values in props['http://ogp.me/ns/article#modified_time']: for values in props['http://ogp.me/ns/article#modified_time']:
print(f"modified_time: {values['@value']}")
s['date'] = unix(values['@value']) s['date'] = unix(values['@value'])
if 'http://ogp.me/ns/article#published_time' in props: if 'http://ogp.me/ns/article#published_time' in props:
for values in props['http://ogp.me/ns/article#published_time']: for values in props['http://ogp.me/ns/article#published_time']:
print(f"published_time: {values['@value']}")
s['date'] = unix(values['@value']) s['date'] = unix(values['@value'])
for og in data['opengraph']: for og in data['opengraph']:
@ -177,4 +176,9 @@ if __name__ == '__main__':
site = Category("https://www.rnz.co.nz/news/te-manu-korihi/") site = Category("https://www.rnz.co.nz/news/te-manu-korihi/")
posts = site.feed() posts = site.feed()
print(posts[:1]) print(posts[:1])
print(site.story(posts[0]))
print("Category: Newsroom Business")
site = Category("https://www.newsroom.co.nz/business/")
posts = site.feed()
print(posts[:1])
print(site.story(posts[0])) print(site.story(posts[0]))