forked from tanner/qotnews
add an excludes
list of substrings for urls in the settings for sitemap/category.
This commit is contained in:
parent
afda5b635c
commit
4488e2c292
|
@ -43,12 +43,14 @@ def list():
|
||||||
feed += [(x, key) for x in publication.feed()[:count]]
|
feed += [(x, key) for x in publication.feed()[:count]]
|
||||||
|
|
||||||
for key, sites in categories.items():
|
for key, sites in categories.items():
|
||||||
count = settings.CATEGORY[key]['count']
|
count = settings.CATEGORY[key].get('count') or 0
|
||||||
feed += [(x, key) for x in sites.feed()[:count]]
|
excludes = settings.CATEGORY[key].get('excludes')
|
||||||
|
feed += [(x, key) for x in sites.feed(excludes)[:count]]
|
||||||
|
|
||||||
for key, sites in sitemaps.items():
|
for key, sites in sitemaps.items():
|
||||||
count = settings.SITEMAP[key]['count']
|
count = settings.SITEMAP[key].get('count') or 0
|
||||||
feed += [(x, key) for x in sites.feed()[:count]]
|
excludes = settings.SITEMAP[key].get('excludes')
|
||||||
|
feed += [(x, key) for x in sites.feed(excludes)[:count]]
|
||||||
|
|
||||||
|
|
||||||
return feed
|
return feed
|
||||||
|
|
|
@ -124,6 +124,9 @@ def comment_count(i):
|
||||||
return sum([comment_count(c) for c in i['comments']]) + alive
|
return sum([comment_count(c) for c in i['comments']]) + alive
|
||||||
|
|
||||||
class _Base:
|
class _Base:
|
||||||
|
def feed(self, excludes=[]):
|
||||||
|
return []
|
||||||
|
|
||||||
def story(self, ref):
|
def story(self, ref):
|
||||||
markup = xml(lambda x: ref)
|
markup = xml(lambda x: ref)
|
||||||
if not markup:
|
if not markup:
|
||||||
|
@ -159,7 +162,7 @@ class Sitemap(_Base):
|
||||||
def __init__(self, url):
|
def __init__(self, url):
|
||||||
self.sitemap_url = url
|
self.sitemap_url = url
|
||||||
|
|
||||||
def feed(self):
|
def feed(self, excludes=[]):
|
||||||
markup = xml(lambda x: self.sitemap_url)
|
markup = xml(lambda x: self.sitemap_url)
|
||||||
if not markup: return []
|
if not markup: return []
|
||||||
soup = BeautifulSoup(markup, features='lxml')
|
soup = BeautifulSoup(markup, features='lxml')
|
||||||
|
@ -167,6 +170,7 @@ class Sitemap(_Base):
|
||||||
articles = list(filter(None, [a if a.find('lastmod') is not None else None for a in articles]))
|
articles = list(filter(None, [a if a.find('lastmod') is not None else None for a in articles]))
|
||||||
links = [x.find('loc').text for x in articles] or []
|
links = [x.find('loc').text for x in articles] or []
|
||||||
links = list(set(links))
|
links = list(set(links))
|
||||||
|
links = list(filter(None, [None if any(e in link for e in excludes) else link for link in links]))
|
||||||
return links
|
return links
|
||||||
|
|
||||||
|
|
||||||
|
@ -175,7 +179,7 @@ class Category(_Base):
|
||||||
self.category_url = url
|
self.category_url = url
|
||||||
self.base_url = '/'.join(url.split('/')[:3])
|
self.base_url = '/'.join(url.split('/')[:3])
|
||||||
|
|
||||||
def feed(self):
|
def feed(self, excludes=[]):
|
||||||
markup = xml(lambda x: self.category_url)
|
markup = xml(lambda x: self.category_url)
|
||||||
if not markup: return []
|
if not markup: return []
|
||||||
soup = BeautifulSoup(markup, features='html.parser')
|
soup = BeautifulSoup(markup, features='html.parser')
|
||||||
|
|
Loading…
Reference in New Issue
Block a user