qotnews/apiserver/migrate-whoosh-to-sqlite.py

75 lines
2.1 KiB
Python
Raw Normal View History

2020-06-25 23:41:15 +00:00
import archive
import database
import search
2020-06-25 23:41:15 +00:00
import json
import requests
2020-06-25 23:41:15 +00:00
database.init()
archive.init()
search.init()
count = 0
def database_del_story_by_ref(ref):
try:
session = database.Session()
session.query(database.Story).filter(database.Story.ref==ref).delete()
session.commit()
except:
session.rollback()
raise
finally:
session.close()
def search_del_story(sid):
try:
r = requests.delete(search.MEILI_URL + 'indexes/qotnews/documents/'+sid, timeout=2)
if r.status_code != 202:
raise Exception('Bad response code ' + str(r.status_code))
return r.json()
except KeyboardInterrupt:
raise
except BaseException as e:
logging.error('Problem deleting MeiliSearch story: {}'.format(str(e)))
return False
2020-06-25 23:41:15 +00:00
with archive.ix.searcher() as searcher:
print('count all', searcher.doc_count_all())
print('count', searcher.doc_count())
for doc in searcher.documents():
2020-06-25 23:41:15 +00:00
try:
print('num', count, 'id', doc['id'])
count += 1
story = doc['story']
story.pop('img', None)
if 'reddit.com/r/technology' in story['link']:
print('skipping r/technology')
continue
try:
database.put_story(story)
except database.IntegrityError:
print('collision!')
old_story = database.get_story_by_ref(story['ref'])
old_story = json.loads(old_story.full_json)
if story['num_comments'] > old_story['num_comments']:
print('more comments, replacing')
database_del_story_by_ref(story['ref'])
database.put_story(story)
search_del_story(old_story['id'])
else:
print('fewer comments, skipping')
continue
2020-06-26 00:36:26 +00:00
search.put_story(story)
2020-06-25 23:41:15 +00:00
print()
except KeyboardInterrupt:
break
2020-06-26 00:36:26 +00:00
except BaseException as e:
print('skipping', doc['id'])
2020-06-26 00:36:26 +00:00
print('reason:', e)