add an excludes list of substrings for urls in the settings for sitemap/category.

This commit is contained in:
Jason Schwarzenberger 2020-11-05 15:51:59 +13:00
parent afda5b635c
commit 4488e2c292
2 changed files with 12 additions and 6 deletions

View File

@ -43,12 +43,14 @@ def list():
feed += [(x, key) for x in publication.feed()[:count]] feed += [(x, key) for x in publication.feed()[:count]]
for key, sites in categories.items(): for key, sites in categories.items():
count = settings.CATEGORY[key]['count'] count = settings.CATEGORY[key].get('count') or 0
feed += [(x, key) for x in sites.feed()[:count]] excludes = settings.CATEGORY[key].get('excludes')
feed += [(x, key) for x in sites.feed(excludes)[:count]]
for key, sites in sitemaps.items(): for key, sites in sitemaps.items():
count = settings.SITEMAP[key]['count'] count = settings.SITEMAP[key].get('count') or 0
feed += [(x, key) for x in sites.feed()[:count]] excludes = settings.SITEMAP[key].get('excludes')
feed += [(x, key) for x in sites.feed(excludes)[:count]]
return feed return feed

View File

@ -124,6 +124,9 @@ def comment_count(i):
return sum([comment_count(c) for c in i['comments']]) + alive return sum([comment_count(c) for c in i['comments']]) + alive
class _Base: class _Base:
def feed(self, excludes=[]):
return []
def story(self, ref): def story(self, ref):
markup = xml(lambda x: ref) markup = xml(lambda x: ref)
if not markup: if not markup:
@ -159,7 +162,7 @@ class Sitemap(_Base):
def __init__(self, url): def __init__(self, url):
self.sitemap_url = url self.sitemap_url = url
def feed(self): def feed(self, excludes=[]):
markup = xml(lambda x: self.sitemap_url) markup = xml(lambda x: self.sitemap_url)
if not markup: return [] if not markup: return []
soup = BeautifulSoup(markup, features='lxml') soup = BeautifulSoup(markup, features='lxml')
@ -167,6 +170,7 @@ class Sitemap(_Base):
articles = list(filter(None, [a if a.find('lastmod') is not None else None for a in articles])) articles = list(filter(None, [a if a.find('lastmod') is not None else None for a in articles]))
links = [x.find('loc').text for x in articles] or [] links = [x.find('loc').text for x in articles] or []
links = list(set(links)) links = list(set(links))
links = list(filter(None, [None if any(e in link for e in excludes) else link for link in links]))
return links return links
@ -175,7 +179,7 @@ class Category(_Base):
self.category_url = url self.category_url = url
self.base_url = '/'.join(url.split('/')[:3]) self.base_url = '/'.join(url.split('/')[:3])
def feed(self): def feed(self, excludes=[]):
markup = xml(lambda x: self.category_url) markup = xml(lambda x: self.category_url)
if not markup: return [] if not markup: return []
soup = BeautifulSoup(markup, features='html.parser') soup = BeautifulSoup(markup, features='html.parser')