try to make feed only determined by the max age.
This commit is contained in:
parent
bfa4108a8e
commit
7b8cbfc9b9
|
@ -1,4 +1,5 @@
|
||||||
import json
|
import json
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
from sqlalchemy import create_engine, Column, String, ForeignKey, Integer
|
from sqlalchemy import create_engine, Column, String, ForeignKey, Integer
|
||||||
from sqlalchemy.ext.declarative import declarative_base
|
from sqlalchemy.ext.declarative import declarative_base
|
||||||
|
@ -66,16 +67,18 @@ def get_story_by_ref(ref):
|
||||||
session = Session()
|
session = Session()
|
||||||
return session.query(Story).filter(Story.ref==ref).first()
|
return session.query(Story).filter(Story.ref==ref).first()
|
||||||
|
|
||||||
def get_reflist(amount):
|
def get_reflist():
|
||||||
session = Session()
|
session = Session()
|
||||||
q = session.query(Reflist).order_by(Reflist.rid.desc()).limit(amount)
|
q = session.query(Reflist).order_by(Reflist.rid.desc())
|
||||||
return [dict(ref=x.ref, sid=x.sid, source=x.source) for x in q.all()]
|
return [dict(ref=x.ref, sid=x.sid, source=x.source) for x in q.all()]
|
||||||
|
|
||||||
def get_stories(amount):
|
def get_stories(maxage=60*60*24*2):
|
||||||
|
time = datetime.now().timestamp() - maxage
|
||||||
session = Session()
|
session = Session()
|
||||||
q = session.query(Reflist, Story.meta).\
|
q = session.query(Reflist, Story.meta).\
|
||||||
join(Story).\
|
join(Story).\
|
||||||
filter(Story.title != None).\
|
filter(Story.title != None).\
|
||||||
|
filter(Story.meta['date'] > time).\
|
||||||
order_by(Story.meta['date'].desc()).\
|
order_by(Story.meta['date'].desc()).\
|
||||||
limit(amount)
|
limit(amount)
|
||||||
return [x[1] for x in q]
|
return [x[1] for x in q]
|
||||||
|
|
|
@ -6,6 +6,7 @@ logging.basicConfig(
|
||||||
import requests
|
import requests
|
||||||
import time
|
import time
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
import itertools
|
||||||
|
|
||||||
import settings
|
import settings
|
||||||
from feeds import hackernews, reddit, tildes, substack, manual, news
|
from feeds import hackernews, reddit, tildes, substack, manual, news
|
||||||
|
@ -27,36 +28,39 @@ sitemaps = {}
|
||||||
for key, value in settings.SITEMAP.items():
|
for key, value in settings.SITEMAP.items():
|
||||||
sitemaps[key] = news.Sitemap(value['url'], value.get('tz'))
|
sitemaps[key] = news.Sitemap(value['url'], value.get('tz'))
|
||||||
|
|
||||||
def list():
|
def get_list():
|
||||||
feed = []
|
feeds = {}
|
||||||
|
|
||||||
if settings.NUM_HACKERNEWS:
|
if settings.NUM_HACKERNEWS:
|
||||||
feed += [(x, 'hackernews') for x in hackernews.feed()[:settings.NUM_HACKERNEWS]]
|
feeds['hackernews'] = [(x, 'hackernews') for x in hackernews.feed()[:settings.NUM_HACKERNEWS]]
|
||||||
|
|
||||||
if settings.NUM_REDDIT:
|
if settings.NUM_REDDIT:
|
||||||
feed += [(x, 'reddit') for x in reddit.feed()[:settings.NUM_REDDIT]]
|
feeds['reddit'] = [(x, 'reddit') for x in reddit.feed()[:settings.NUM_REDDIT]]
|
||||||
|
|
||||||
if settings.NUM_TILDES:
|
if settings.NUM_TILDES:
|
||||||
feed += [(x, 'tildes') for x in tildes.feed()[:settings.NUM_TILDES]]
|
feeds['tildes'] = [(x, 'tildes') for x in tildes.feed()[:settings.NUM_TILDES]]
|
||||||
|
|
||||||
if settings.NUM_SUBSTACK:
|
if settings.NUM_SUBSTACK:
|
||||||
feed += [(x, 'substack') for x in substack.top.feed()[:settings.NUM_SUBSTACK]]
|
feeds['substack'] = [(x, 'substack') for x in substack.top.feed()[:settings.NUM_SUBSTACK]]
|
||||||
|
|
||||||
for key, publication in substacks.items():
|
for key, publication in substacks.items():
|
||||||
count = settings.SUBSTACK[key]['count']
|
count = settings.SUBSTACK[key]['count']
|
||||||
feed += [(x, key) for x in publication.feed()[:count]]
|
feeds[key] = [(x, key) for x in publication.feed()[:count]]
|
||||||
|
|
||||||
for key, sites in categories.items():
|
for key, sites in categories.items():
|
||||||
count = settings.CATEGORY[key].get('count') or 0
|
count = settings.CATEGORY[key].get('count') or 0
|
||||||
excludes = settings.CATEGORY[key].get('excludes')
|
excludes = settings.CATEGORY[key].get('excludes')
|
||||||
tz = settings.CATEGORY[key].get('tz')
|
tz = settings.CATEGORY[key].get('tz')
|
||||||
feed += [(x, key) for x in sites.feed(excludes)[:count]]
|
feeds[key] = [(x, key) for x in sites.feed(excludes)[:count]]
|
||||||
|
|
||||||
for key, sites in sitemaps.items():
|
for key, sites in sitemaps.items():
|
||||||
count = settings.SITEMAP[key].get('count') or 0
|
count = settings.SITEMAP[key].get('count') or 0
|
||||||
excludes = settings.SITEMAP[key].get('excludes')
|
excludes = settings.SITEMAP[key].get('excludes')
|
||||||
feed += [(x, key) for x in sites.feed(excludes)[:count]]
|
feeds[key] = [(x, key) for x in sites.feed(excludes)[:count]]
|
||||||
|
|
||||||
|
|
||||||
|
values = feeds.values()
|
||||||
|
feed = itertools.chain.from_iterable(itertools.zip_longest(*values, fillvalue=None))
|
||||||
|
feed = list(filter(None, feed))
|
||||||
return feed
|
return feed
|
||||||
|
|
||||||
def get_article(url):
|
def get_article(url):
|
||||||
|
|
|
@ -15,6 +15,7 @@ import traceback
|
||||||
import time
|
import time
|
||||||
from urllib.parse import urlparse, parse_qs
|
from urllib.parse import urlparse, parse_qs
|
||||||
|
|
||||||
|
import settings
|
||||||
import database
|
import database
|
||||||
import search
|
import search
|
||||||
import feed
|
import feed
|
||||||
|
@ -27,7 +28,7 @@ from flask_cors import CORS
|
||||||
database.init()
|
database.init()
|
||||||
search.init()
|
search.init()
|
||||||
|
|
||||||
FEED_LENGTH = 75
|
news_length = 0
|
||||||
news_index = 0
|
news_index = 0
|
||||||
|
|
||||||
def new_id():
|
def new_id():
|
||||||
|
@ -42,7 +43,7 @@ cors = CORS(flask_app)
|
||||||
|
|
||||||
@flask_app.route('/api')
|
@flask_app.route('/api')
|
||||||
def api():
|
def api():
|
||||||
stories = database.get_stories(FEED_LENGTH)
|
stories = database.get_stories(settings.MAX_STORY_AGE)
|
||||||
res = Response(json.dumps({"stories": stories}))
|
res = Response(json.dumps({"stories": stories}))
|
||||||
res.headers['content-type'] = 'application/json'
|
res.headers['content-type'] = 'application/json'
|
||||||
return res
|
return res
|
||||||
|
@ -145,13 +146,13 @@ def static_story(sid):
|
||||||
http_server = WSGIServer(('', 33842), flask_app)
|
http_server = WSGIServer(('', 33842), flask_app)
|
||||||
|
|
||||||
def feed_thread():
|
def feed_thread():
|
||||||
global news_index
|
global news_index, news_length
|
||||||
|
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
# onboard new stories
|
# onboard new stories
|
||||||
if news_index == 0:
|
if news_index == 0:
|
||||||
for ref, source in feed.list():
|
for ref, source in feed.get_list():
|
||||||
if database.get_story_by_ref(ref):
|
if database.get_story_by_ref(ref):
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
|
@ -161,7 +162,8 @@ def feed_thread():
|
||||||
except database.IntegrityError:
|
except database.IntegrityError:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
ref_list = database.get_reflist(FEED_LENGTH)
|
ref_list = database.get_reflist()
|
||||||
|
news_length = len(ref_list)
|
||||||
|
|
||||||
# update current stories
|
# update current stories
|
||||||
if news_index < len(ref_list):
|
if news_index < len(ref_list):
|
||||||
|
@ -187,7 +189,7 @@ def feed_thread():
|
||||||
gevent.sleep(6)
|
gevent.sleep(6)
|
||||||
|
|
||||||
news_index += 1
|
news_index += 1
|
||||||
if news_index == FEED_LENGTH: news_index = 0
|
if news_index >= news_length: news_index = 0
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
logging.info('Ending feed thread...')
|
logging.info('Ending feed thread...')
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
# QotNews settings
|
# QotNews settings
|
||||||
# edit this file and save it as settings.py
|
# edit this file and save it as settings.py
|
||||||
|
|
||||||
|
MAX_STORY_AGE = 3*24*60*60
|
||||||
|
|
||||||
# Feed Lengths
|
# Feed Lengths
|
||||||
# Number of top items from each site to pull
|
# Number of top items from each site to pull
|
||||||
# set to 0 to disable that site
|
# set to 0 to disable that site
|
||||||
|
|
Loading…
Reference in New Issue
Block a user