forked from tanner/qotnews
add an excludes
list of substrings for urls in the settings for sitemap/category.
This commit is contained in:
parent
afda5b635c
commit
4488e2c292
|
@ -43,12 +43,14 @@ def list():
|
|||
feed += [(x, key) for x in publication.feed()[:count]]
|
||||
|
||||
for key, sites in categories.items():
|
||||
count = settings.CATEGORY[key]['count']
|
||||
feed += [(x, key) for x in sites.feed()[:count]]
|
||||
count = settings.CATEGORY[key].get('count') or 0
|
||||
excludes = settings.CATEGORY[key].get('excludes')
|
||||
feed += [(x, key) for x in sites.feed(excludes)[:count]]
|
||||
|
||||
for key, sites in sitemaps.items():
|
||||
count = settings.SITEMAP[key]['count']
|
||||
feed += [(x, key) for x in sites.feed()[:count]]
|
||||
count = settings.SITEMAP[key].get('count') or 0
|
||||
excludes = settings.SITEMAP[key].get('excludes')
|
||||
feed += [(x, key) for x in sites.feed(excludes)[:count]]
|
||||
|
||||
|
||||
return feed
|
||||
|
|
|
@ -124,6 +124,9 @@ def comment_count(i):
|
|||
return sum([comment_count(c) for c in i['comments']]) + alive
|
||||
|
||||
class _Base:
|
||||
def feed(self, excludes=[]):
|
||||
return []
|
||||
|
||||
def story(self, ref):
|
||||
markup = xml(lambda x: ref)
|
||||
if not markup:
|
||||
|
@ -159,7 +162,7 @@ class Sitemap(_Base):
|
|||
def __init__(self, url):
|
||||
self.sitemap_url = url
|
||||
|
||||
def feed(self):
|
||||
def feed(self, excludes=[]):
|
||||
markup = xml(lambda x: self.sitemap_url)
|
||||
if not markup: return []
|
||||
soup = BeautifulSoup(markup, features='lxml')
|
||||
|
@ -167,6 +170,7 @@ class Sitemap(_Base):
|
|||
articles = list(filter(None, [a if a.find('lastmod') is not None else None for a in articles]))
|
||||
links = [x.find('loc').text for x in articles] or []
|
||||
links = list(set(links))
|
||||
links = list(filter(None, [None if any(e in link for e in excludes) else link for link in links]))
|
||||
return links
|
||||
|
||||
|
||||
|
@ -175,7 +179,7 @@ class Category(_Base):
|
|||
self.category_url = url
|
||||
self.base_url = '/'.join(url.split('/')[:3])
|
||||
|
||||
def feed(self):
|
||||
def feed(self, excludes=[]):
|
||||
markup = xml(lambda x: self.category_url)
|
||||
if not markup: return []
|
||||
soup = BeautifulSoup(markup, features='html.parser')
|
||||
|
|
Loading…
Reference in New Issue
Block a user