fix import error.

This commit is contained in:
Jason Schwarzenberger
2020-11-16 15:41:09 +13:00
parent 6a91b9402f
commit f5ccd844da
3 changed files with 2 additions and 6 deletions

View File

@@ -1,90 +0,0 @@
import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG)
if __name__ == '__main__':
import sys
sys.path.insert(0,'.')
import requests
from bs4 import BeautifulSoup
from scrapers import declutter
import extruct
import settings
from utils import clean
from misc.metadata import parse_extruct
from misc.time import unix
from misc.api import xml
def comment(i):
if 'author' not in i:
return False
c = {}
c['author'] = i.get('author', '')
c['score'] = i.get('points', 0)
c['date'] = unix(i.get('date', 0))
c['text'] = clean(i.get('text', '') or '')
c['comments'] = [comment(j) for j in i['children']]
c['comments'] = list(filter(bool, c['comments']))
return c
def comment_count(i):
alive = 1 if i['author'] else 0
return sum([comment_count(c) for c in i['comments']]) + alive
class Base:
def __init__(url, tz=None):
self.url = url
self.tz = tz
def feed(self, excludes=None):
return []
def story(self, ref):
markup = xml(lambda x: ref)
if not markup:
return False
s = {}
s['author_link'] = ''
s['score'] = 0
s['comments'] = []
s['num_comments'] = 0
s['link'] = ref
s['url'] = ref
s['date'] = 0
soup = BeautifulSoup(markup, features='html.parser')
icon32 = soup.find_all('link', rel="icon", href=True, sizes="32x32")
icon16 = soup.find_all('link', rel="icon", href=True, sizes="16x16")
favicon = soup.find_all('link', rel="shortcut icon", href=True)
others = soup.find_all('link', rel="icon", href=True)
icons = icon32 + icon16 + favicon + others
base_url = '/'.join(ref.split('/')[:3])
icons = list(set([i.get('href') for i in icons]))
icons = [i if i.startswith('http') else base_url + i for i in icons]
if icons:
s['icon'] = icons[0]
data = extruct.extract(markup)
s = parse_extruct(s, data)
if s['date']:
s['date'] = unix(s['date'], tz=self.tz)
if 'disqus' in markup:
try:
s['comments'] = declutter.get_comments(ref)
c['comments'] = list(filter(bool, c['comments']))
s['num_comments'] = comment_count(s['comments'])
except KeyboardInterrupt:
raise
except:
pass
if not s['date']:
return False
return s

View File

@@ -12,7 +12,7 @@ from bs4 import BeautifulSoup
import settings
from utils import clean
from misc.api import xml
from _news import Base
from misc.news import Base
def _filter_links(links, category_url, excludes=None):
links = list(filter(None, [link if link.startswith(category_url) else None for link in links]))

View File

@@ -14,7 +14,7 @@ import settings
from utils import clean
from misc.time import unix
from misc.api import xml
from _news import Base
from misc.news import Base
def _get_sitemap_date(a):
if a.find('lastmod'):