From 4488e2c292f187f0ab1901467f29d5f0bc84e1b1 Mon Sep 17 00:00:00 2001 From: Jason Schwarzenberger Date: Thu, 5 Nov 2020 15:51:59 +1300 Subject: [PATCH] add an `excludes` list of substrings for urls in the settings for sitemap/category. --- apiserver/feed.py | 10 ++++++---- apiserver/feeds/news.py | 8 ++++++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/apiserver/feed.py b/apiserver/feed.py index 08bfe4f..02cf38e 100644 --- a/apiserver/feed.py +++ b/apiserver/feed.py @@ -43,12 +43,14 @@ def list(): feed += [(x, key) for x in publication.feed()[:count]] for key, sites in categories.items(): - count = settings.CATEGORY[key]['count'] - feed += [(x, key) for x in sites.feed()[:count]] + count = settings.CATEGORY[key].get('count') or 0 + excludes = settings.CATEGORY[key].get('excludes') + feed += [(x, key) for x in sites.feed(excludes)[:count]] for key, sites in sitemaps.items(): - count = settings.SITEMAP[key]['count'] - feed += [(x, key) for x in sites.feed()[:count]] + count = settings.SITEMAP[key].get('count') or 0 + excludes = settings.SITEMAP[key].get('excludes') + feed += [(x, key) for x in sites.feed(excludes)[:count]] return feed diff --git a/apiserver/feeds/news.py b/apiserver/feeds/news.py index e1f2242..33226f8 100644 --- a/apiserver/feeds/news.py +++ b/apiserver/feeds/news.py @@ -124,6 +124,9 @@ def comment_count(i): return sum([comment_count(c) for c in i['comments']]) + alive class _Base: + def feed(self, excludes=[]): + return [] + def story(self, ref): markup = xml(lambda x: ref) if not markup: @@ -159,7 +162,7 @@ class Sitemap(_Base): def __init__(self, url): self.sitemap_url = url - def feed(self): + def feed(self, excludes=[]): markup = xml(lambda x: self.sitemap_url) if not markup: return [] soup = BeautifulSoup(markup, features='lxml') @@ -167,6 +170,7 @@ class Sitemap(_Base): articles = list(filter(None, [a if a.find('lastmod') is not None else None for a in articles])) links = [x.find('loc').text for x in articles] or [] links = list(set(links)) + links = list(filter(None, [None if any(e in link for e in excludes) else link for link in links])) return links @@ -175,7 +179,7 @@ class Category(_Base): self.category_url = url self.base_url = '/'.join(url.split('/')[:3]) - def feed(self): + def feed(self, excludes=[]): markup = xml(lambda x: self.category_url) if not markup: return [] soup = BeautifulSoup(markup, features='html.parser')