75 lines
2.1 KiB
Python
75 lines
2.1 KiB
Python
import archive
|
|
import database
|
|
import search
|
|
|
|
import json
|
|
import requests
|
|
|
|
database.init()
|
|
archive.init()
|
|
search.init()
|
|
|
|
count = 0
|
|
|
|
def database_del_story_by_ref(ref):
|
|
try:
|
|
session = database.Session()
|
|
session.query(database.Story).filter(database.Story.ref==ref).delete()
|
|
session.commit()
|
|
except:
|
|
session.rollback()
|
|
raise
|
|
finally:
|
|
session.close()
|
|
|
|
def search_del_story(sid):
|
|
try:
|
|
r = requests.delete(search.MEILI_URL + 'indexes/qotnews/documents/'+sid, timeout=2)
|
|
if r.status_code != 202:
|
|
raise Exception('Bad response code ' + str(r.status_code))
|
|
return r.json()
|
|
except KeyboardInterrupt:
|
|
raise
|
|
except BaseException as e:
|
|
logging.error('Problem deleting MeiliSearch story: {}'.format(str(e)))
|
|
return False
|
|
|
|
with archive.ix.searcher() as searcher:
|
|
print('count all', searcher.doc_count_all())
|
|
print('count', searcher.doc_count())
|
|
|
|
for doc in searcher.documents():
|
|
try:
|
|
print('num', count, 'id', doc['id'])
|
|
count += 1
|
|
|
|
story = doc['story']
|
|
story.pop('img', None)
|
|
|
|
if 'reddit.com/r/technology' in story['link']:
|
|
print('skipping r/technology')
|
|
continue
|
|
|
|
try:
|
|
database.put_story(story)
|
|
except database.IntegrityError:
|
|
print('collision!')
|
|
old_story = database.get_story_by_ref(story['ref'])
|
|
old_story = json.loads(old_story.full_json)
|
|
if story['num_comments'] > old_story['num_comments']:
|
|
print('more comments, replacing')
|
|
database_del_story_by_ref(story['ref'])
|
|
database.put_story(story)
|
|
search_del_story(old_story['id'])
|
|
else:
|
|
print('fewer comments, skipping')
|
|
continue
|
|
|
|
search.put_story(story)
|
|
print()
|
|
except KeyboardInterrupt:
|
|
break
|
|
except BaseException as e:
|
|
print('skipping', doc['id'])
|
|
print('reason:', e)
|