From 55c282ee696bd40cdda7389ca3179cda548935f2 Mon Sep 17 00:00:00 2001 From: Tanner Collin Date: Sat, 5 Mar 2022 21:33:07 +0000 Subject: [PATCH] Fix search to work with low-RAM server --- apiserver/reindex.py | 46 +++++++++++++++++++++++++++++++++++++------- apiserver/search.py | 15 ++++++++------- apiserver/server.py | 6 +++++- 3 files changed, 52 insertions(+), 15 deletions(-) diff --git a/apiserver/reindex.py b/apiserver/reindex.py index e6bc3d5..e908861 100644 --- a/apiserver/reindex.py +++ b/apiserver/reindex.py @@ -8,12 +8,21 @@ from sqlalchemy import select import search import sys +import time import json import requests database.init() search.init() +BATCH_SIZE = 5000 + +def put_stories(stories): + return search.meili_api(requests.post, 'indexes/qotnews/documents', stories) + +def get_update(update_id): + return search.meili_api(requests.get, 'indexes/qotnews/updates/{}'.format(update_id)) + def count_stories(): try: session = database.Session() @@ -35,11 +44,34 @@ if __name__ == '__main__': print('Press ENTER to continue, ctrl-c to cancel') input() - count = 1 - for sid in get_story_list(): - story = database.get_story(sid) - print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title)) - story_obj = json.loads(story.meta_json) - search.put_story(story_obj) - count += 1 + story_list = get_story_list() + + count = 1 + while len(story_list): + stories = [] + + for _ in range(BATCH_SIZE): + try: + sid = story_list.pop() + except IndexError: + break + + story = database.get_story(sid) + print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title)) + story_obj = json.loads(story.meta_json) + to_add = dict(title=story_obj['title'], id=story_obj['id'], date=story_obj['date']) + stories.append(to_add) + count += 1 + + res = put_stories(stories) + update_id = res['updateId'] + + print('Waiting for processing', end='') + while get_update(update_id)['status'] != 'processed': + time.sleep(0.5) + print('.', end='', flush=True) + + print() + + print('Done.') diff --git a/apiserver/search.py b/apiserver/search.py index 77d714d..4f009a0 100644 --- a/apiserver/search.py +++ b/apiserver/search.py @@ -24,13 +24,14 @@ def create_index(): return meili_api(requests.post, 'indexes', json=json) def update_rankings(): - json = ['typo', 'words', 'proximity', 'attribute', 'desc(date)', 'wordsPosition', 'exactness'] + json = ['typo', 'words', 'proximity', 'date:desc', 'exactness'] return meili_api(requests.post, 'indexes/qotnews/settings/ranking-rules', json=json) def update_attributes(): - json = ['title', 'url', 'author', 'link', 'id'] + json = ['title'] r = meili_api(requests.post, 'indexes/qotnews/settings/searchable-attributes', json=json) - meili_api(requests.delete, 'indexes/qotnews/settings/displayed-attributes', json=json) + json = ['id'] + r = meili_api(requests.post, 'indexes/qotnews/settings/displayed-attributes', json=json) return r def init(): @@ -39,10 +40,8 @@ def init(): update_attributes() def put_story(story): - story = story.copy() - story.pop('text', None) - story.pop('comments', None) - return meili_api(requests.post, 'indexes/qotnews/documents', [story]) + to_add = dict(title=story['title'], id=story['id'], date=story['date']) + return meili_api(requests.post, 'indexes/qotnews/documents', [to_add]) def search(q): params = dict(q=q, limit=250) @@ -52,4 +51,6 @@ def search(q): if __name__ == '__main__': init() + print(update_rankings()) + print(search('qot')) diff --git a/apiserver/server.py b/apiserver/server.py index 6b1d6ea..4e2116a 100644 --- a/apiserver/server.py +++ b/apiserver/server.py @@ -57,7 +57,11 @@ def apisearch(): results = search.search(q) else: results = [] - return dict(results=results) + story_metas = [database.get_story(x['id']).meta_json for x in results] + # hacky nested json + res = Response('{"results":[' + ','.join(story_metas) + ']}') + res.headers['content-type'] = 'application/json' + return res @flask_app.route('/api/submit', methods=['POST'], strict_slashes=False) def submit():