forked from tanner/qotnews
add regex to get a unique ref from each sitemap/category based article url.
This commit is contained in:
@@ -34,9 +34,10 @@ def _get_category(category_url, excludes=None):
|
||||
return links
|
||||
|
||||
class Category(Base):
|
||||
def __init__(self, url, tz=None):
|
||||
self.tz = tz
|
||||
self.category_url = url
|
||||
def __init__(self, config):
|
||||
self.config = config
|
||||
self.category_url = config.get('url')
|
||||
self.tz = config.get('tz')
|
||||
|
||||
def feed(self, excludes=None):
|
||||
links = []
|
||||
@@ -45,7 +46,8 @@ class Category(Base):
|
||||
elif isinstance(self.category_url, list):
|
||||
for url in self.category_url:
|
||||
links += _get_category(url, excludes)
|
||||
return list(set(links))
|
||||
links = list(set(links))
|
||||
return [(self.get_id(link), link) for link in links]
|
||||
|
||||
|
||||
# scratchpad so I can quickly develop the parser
|
||||
|
@@ -58,9 +58,10 @@ def _get_sitemap(feed_url, excludes=None):
|
||||
return list(set(links))
|
||||
|
||||
class Sitemap(Base):
|
||||
def __init__(self, url, tz=None):
|
||||
self.tz = tz
|
||||
self.sitemap_url = url
|
||||
def __init__(self, config):
|
||||
self.config = config
|
||||
self.sitemap_url = config.get('url')
|
||||
self.tz = config.get('tz')
|
||||
|
||||
def feed(self, excludes=None):
|
||||
links = []
|
||||
@@ -69,7 +70,8 @@ class Sitemap(Base):
|
||||
elif isinstance(self.sitemap_url, list):
|
||||
for url in self.sitemap_url:
|
||||
links += _get_sitemap(url, excludes)
|
||||
return list(set(links))
|
||||
links = list(set(links))
|
||||
return [(self.get_id(link), link) for link in links]
|
||||
|
||||
# scratchpad so I can quickly develop the parser
|
||||
if __name__ == '__main__':
|
||||
|
Reference in New Issue
Block a user