add regex to get a unique ref from each sitemap/category based article url.

This commit is contained in:
Jason Schwarzenberger
2020-11-17 12:38:28 +13:00
parent f5c7a658ba
commit b771b52501
7 changed files with 86 additions and 38 deletions

View File

@@ -34,9 +34,10 @@ def _get_category(category_url, excludes=None):
return links
class Category(Base):
def __init__(self, url, tz=None):
self.tz = tz
self.category_url = url
def __init__(self, config):
self.config = config
self.category_url = config.get('url')
self.tz = config.get('tz')
def feed(self, excludes=None):
links = []
@@ -45,7 +46,8 @@ class Category(Base):
elif isinstance(self.category_url, list):
for url in self.category_url:
links += _get_category(url, excludes)
return list(set(links))
links = list(set(links))
return [(self.get_id(link), link) for link in links]
# scratchpad so I can quickly develop the parser

View File

@@ -58,9 +58,10 @@ def _get_sitemap(feed_url, excludes=None):
return list(set(links))
class Sitemap(Base):
def __init__(self, url, tz=None):
self.tz = tz
self.sitemap_url = url
def __init__(self, config):
self.config = config
self.sitemap_url = config.get('url')
self.tz = config.get('tz')
def feed(self, excludes=None):
links = []
@@ -69,7 +70,8 @@ class Sitemap(Base):
elif isinstance(self.sitemap_url, list):
for url in self.sitemap_url:
links += _get_sitemap(url, excludes)
return list(set(links))
links = list(set(links))
return [(self.get_id(link), link) for link in links]
# scratchpad so I can quickly develop the parser
if __name__ == '__main__':