qotnews/apiserver/reindex.py

77 lines
1.8 KiB
Python
Raw Normal View History

import logging
logging.basicConfig(
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.INFO)
import database
from sqlalchemy import select
import search
import sys
2022-03-05 21:33:07 +00:00
import time
import json
import requests
database.init()
search.init()
2022-03-05 21:33:07 +00:00
BATCH_SIZE = 5000
def put_stories(stories):
return search.meili_api(requests.post, 'indexes/qotnews/documents', stories)
def get_update(update_id):
2022-05-31 04:24:12 +00:00
return search.meili_api(requests.get, 'tasks/{}'.format(update_id))
2022-03-05 21:33:07 +00:00
def count_stories():
try:
session = database.Session()
return session.query(database.Story).count()
finally:
session.close()
def get_story_list():
try:
session = database.Session()
return session.query(database.Story.sid).all()
finally:
session.close()
if __name__ == '__main__':
num_stories = count_stories()
print('Reindex {} stories?'.format(num_stories))
print('Press ENTER to continue, ctrl-c to cancel')
input()
2022-03-05 21:33:07 +00:00
story_list = get_story_list()
count = 1
2022-03-05 21:33:07 +00:00
while len(story_list):
stories = []
for _ in range(BATCH_SIZE):
try:
sid = story_list.pop()
except IndexError:
break
story = database.get_story(sid)
print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title))
story_obj = json.loads(story.meta_json)
stories.append(story_obj)
2022-03-05 21:33:07 +00:00
count += 1
res = put_stories(stories)
2022-05-31 04:24:12 +00:00
update_id = res['uid']
2022-03-05 21:33:07 +00:00
print('Waiting for processing', end='')
2022-05-31 04:24:12 +00:00
while get_update(update_id)['status'] != 'succeeded':
2022-03-05 21:33:07 +00:00
time.sleep(0.5)
print('.', end='', flush=True)
print()
print('Done.')