try to make feed only determined by the max age.

This commit is contained in:
Jason Schwarzenberger 2020-11-09 17:50:58 +13:00
parent bfa4108a8e
commit 7b8cbfc9b9
4 changed files with 30 additions and 19 deletions

View File

@ -1,4 +1,5 @@
import json import json
from datetime import datetime, timedelta
from sqlalchemy import create_engine, Column, String, ForeignKey, Integer from sqlalchemy import create_engine, Column, String, ForeignKey, Integer
from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.ext.declarative import declarative_base
@ -66,16 +67,18 @@ def get_story_by_ref(ref):
session = Session() session = Session()
return session.query(Story).filter(Story.ref==ref).first() return session.query(Story).filter(Story.ref==ref).first()
def get_reflist(amount): def get_reflist():
session = Session() session = Session()
q = session.query(Reflist).order_by(Reflist.rid.desc()).limit(amount) q = session.query(Reflist).order_by(Reflist.rid.desc())
return [dict(ref=x.ref, sid=x.sid, source=x.source) for x in q.all()] return [dict(ref=x.ref, sid=x.sid, source=x.source) for x in q.all()]
def get_stories(amount): def get_stories(maxage=60*60*24*2):
time = datetime.now().timestamp() - maxage
session = Session() session = Session()
q = session.query(Reflist, Story.meta).\ q = session.query(Reflist, Story.meta).\
join(Story).\ join(Story).\
filter(Story.title != None).\ filter(Story.title != None).\
filter(Story.meta['date'] > time).\
order_by(Story.meta['date'].desc()).\ order_by(Story.meta['date'].desc()).\
limit(amount) limit(amount)
return [x[1] for x in q] return [x[1] for x in q]

View File

@ -6,6 +6,7 @@ logging.basicConfig(
import requests import requests
import time import time
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import itertools
import settings import settings
from feeds import hackernews, reddit, tildes, substack, manual, news from feeds import hackernews, reddit, tildes, substack, manual, news
@ -27,36 +28,39 @@ sitemaps = {}
for key, value in settings.SITEMAP.items(): for key, value in settings.SITEMAP.items():
sitemaps[key] = news.Sitemap(value['url'], value.get('tz')) sitemaps[key] = news.Sitemap(value['url'], value.get('tz'))
def list(): def get_list():
feed = [] feeds = {}
if settings.NUM_HACKERNEWS: if settings.NUM_HACKERNEWS:
feed += [(x, 'hackernews') for x in hackernews.feed()[:settings.NUM_HACKERNEWS]] feeds['hackernews'] = [(x, 'hackernews') for x in hackernews.feed()[:settings.NUM_HACKERNEWS]]
if settings.NUM_REDDIT: if settings.NUM_REDDIT:
feed += [(x, 'reddit') for x in reddit.feed()[:settings.NUM_REDDIT]] feeds['reddit'] = [(x, 'reddit') for x in reddit.feed()[:settings.NUM_REDDIT]]
if settings.NUM_TILDES: if settings.NUM_TILDES:
feed += [(x, 'tildes') for x in tildes.feed()[:settings.NUM_TILDES]] feeds['tildes'] = [(x, 'tildes') for x in tildes.feed()[:settings.NUM_TILDES]]
if settings.NUM_SUBSTACK: if settings.NUM_SUBSTACK:
feed += [(x, 'substack') for x in substack.top.feed()[:settings.NUM_SUBSTACK]] feeds['substack'] = [(x, 'substack') for x in substack.top.feed()[:settings.NUM_SUBSTACK]]
for key, publication in substacks.items(): for key, publication in substacks.items():
count = settings.SUBSTACK[key]['count'] count = settings.SUBSTACK[key]['count']
feed += [(x, key) for x in publication.feed()[:count]] feeds[key] = [(x, key) for x in publication.feed()[:count]]
for key, sites in categories.items(): for key, sites in categories.items():
count = settings.CATEGORY[key].get('count') or 0 count = settings.CATEGORY[key].get('count') or 0
excludes = settings.CATEGORY[key].get('excludes') excludes = settings.CATEGORY[key].get('excludes')
tz = settings.CATEGORY[key].get('tz') tz = settings.CATEGORY[key].get('tz')
feed += [(x, key) for x in sites.feed(excludes)[:count]] feeds[key] = [(x, key) for x in sites.feed(excludes)[:count]]
for key, sites in sitemaps.items(): for key, sites in sitemaps.items():
count = settings.SITEMAP[key].get('count') or 0 count = settings.SITEMAP[key].get('count') or 0
excludes = settings.SITEMAP[key].get('excludes') excludes = settings.SITEMAP[key].get('excludes')
feed += [(x, key) for x in sites.feed(excludes)[:count]] feeds[key] = [(x, key) for x in sites.feed(excludes)[:count]]
values = feeds.values()
feed = itertools.chain.from_iterable(itertools.zip_longest(*values, fillvalue=None))
feed = list(filter(None, feed))
return feed return feed
def get_article(url): def get_article(url):

View File

@ -15,6 +15,7 @@ import traceback
import time import time
from urllib.parse import urlparse, parse_qs from urllib.parse import urlparse, parse_qs
import settings
import database import database
import search import search
import feed import feed
@ -27,7 +28,7 @@ from flask_cors import CORS
database.init() database.init()
search.init() search.init()
FEED_LENGTH = 75 news_length = 0
news_index = 0 news_index = 0
def new_id(): def new_id():
@ -42,7 +43,7 @@ cors = CORS(flask_app)
@flask_app.route('/api') @flask_app.route('/api')
def api(): def api():
stories = database.get_stories(FEED_LENGTH) stories = database.get_stories(settings.MAX_STORY_AGE)
res = Response(json.dumps({"stories": stories})) res = Response(json.dumps({"stories": stories}))
res.headers['content-type'] = 'application/json' res.headers['content-type'] = 'application/json'
return res return res
@ -145,13 +146,13 @@ def static_story(sid):
http_server = WSGIServer(('', 33842), flask_app) http_server = WSGIServer(('', 33842), flask_app)
def feed_thread(): def feed_thread():
global news_index global news_index, news_length
try: try:
while True: while True:
# onboard new stories # onboard new stories
if news_index == 0: if news_index == 0:
for ref, source in feed.list(): for ref, source in feed.get_list():
if database.get_story_by_ref(ref): if database.get_story_by_ref(ref):
continue continue
try: try:
@ -161,7 +162,8 @@ def feed_thread():
except database.IntegrityError: except database.IntegrityError:
continue continue
ref_list = database.get_reflist(FEED_LENGTH) ref_list = database.get_reflist()
news_length = len(ref_list)
# update current stories # update current stories
if news_index < len(ref_list): if news_index < len(ref_list):
@ -187,7 +189,7 @@ def feed_thread():
gevent.sleep(6) gevent.sleep(6)
news_index += 1 news_index += 1
if news_index == FEED_LENGTH: news_index = 0 if news_index >= news_length: news_index = 0
except KeyboardInterrupt: except KeyboardInterrupt:
logging.info('Ending feed thread...') logging.info('Ending feed thread...')

View File

@ -1,6 +1,8 @@
# QotNews settings # QotNews settings
# edit this file and save it as settings.py # edit this file and save it as settings.py
MAX_STORY_AGE = 3*24*60*60
# Feed Lengths # Feed Lengths
# Number of top items from each site to pull # Number of top items from each site to pull
# set to 0 to disable that site # set to 0 to disable that site