From dcedd4caa1bb0bb3cfa8dd9ee1cc4984f4bec18c Mon Sep 17 00:00:00 2001 From: Tanner Collin Date: Mon, 6 Sep 2021 00:20:21 +0000 Subject: [PATCH] Add script to reindex search, abstract search API --- apiserver/reindex.py | 45 +++++++++++++++++++++++++ apiserver/search.py | 76 ++++++++++++------------------------------ searchserver/README.md | 2 +- 3 files changed, 67 insertions(+), 56 deletions(-) create mode 100644 apiserver/reindex.py diff --git a/apiserver/reindex.py b/apiserver/reindex.py new file mode 100644 index 0000000..e6bc3d5 --- /dev/null +++ b/apiserver/reindex.py @@ -0,0 +1,45 @@ +import logging +logging.basicConfig( + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + level=logging.INFO) + +import database +from sqlalchemy import select +import search +import sys + +import json +import requests + +database.init() +search.init() + +def count_stories(): + try: + session = database.Session() + return session.query(database.Story).count() + finally: + session.close() + +def get_story_list(): + try: + session = database.Session() + return session.query(database.Story.sid).all() + finally: + session.close() + +if __name__ == '__main__': + num_stories = count_stories() + + print('Reindex {} stories?'.format(num_stories)) + print('Press ENTER to continue, ctrl-c to cancel') + input() + + count = 1 + for sid in get_story_list(): + story = database.get_story(sid) + print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title)) + story_obj = json.loads(story.meta_json) + search.put_story(story_obj) + count += 1 + diff --git a/apiserver/search.py b/apiserver/search.py index 79e2193..77d714d 100644 --- a/apiserver/search.py +++ b/apiserver/search.py @@ -7,51 +7,34 @@ import requests MEILI_URL = 'http://127.0.0.1:7700/' -def create_index(): +def meili_api(method, route, json=None, params=None): try: - json = dict(name='qotnews', uid='qotnews') - r = requests.post(MEILI_URL + 'indexes', json=json, timeout=2) - if r.status_code != 201: + r = method(MEILI_URL + route, json=json, params=params, timeout=4) + if r.status_code > 299: raise Exception('Bad response code ' + str(r.status_code)) return r.json() except KeyboardInterrupt: raise except BaseException as e: - logging.error('Problem creating MeiliSearch index: {}'.format(str(e))) + logging.error('Problem with MeiliSearch api route: %s: %s', route, str(e)) return False +def create_index(): + json = dict(uid='qotnews', primaryKey='id') + return meili_api(requests.post, 'indexes', json=json) + def update_rankings(): - try: - json = ['typo', 'words', 'proximity', 'attribute', 'desc(date)', 'wordsPosition', 'exactness'] - r = requests.post(MEILI_URL + 'indexes/qotnews/settings/ranking-rules', json=json, timeout=2) - if r.status_code != 202: - raise Exception('Bad response code ' + str(r.status_code)) - return r.json() - except KeyboardInterrupt: - raise - except BaseException as e: - logging.error('Problem setting MeiliSearch ranking rules: {}'.format(str(e))) - return False + json = ['typo', 'words', 'proximity', 'attribute', 'desc(date)', 'wordsPosition', 'exactness'] + return meili_api(requests.post, 'indexes/qotnews/settings/ranking-rules', json=json) def update_attributes(): - try: - json = ['title', 'url', 'author', 'link', 'id'] - r = requests.post(MEILI_URL + 'indexes/qotnews/settings/searchable-attributes', json=json, timeout=2) - if r.status_code != 202: - raise Exception('Bad response code ' + str(r.status_code)) - return r.json() - r = requests.delete(MEILI_URL + 'indexes/qotnews/settings/displayed-attributes', timeout=2) - if r.status_code != 202: - raise Exception('Bad response code ' + str(r.status_code)) - return r.json() - except KeyboardInterrupt: - raise - except BaseException as e: - logging.error('Problem setting MeiliSearch searchable attributes: {}'.format(str(e))) - return False + json = ['title', 'url', 'author', 'link', 'id'] + r = meili_api(requests.post, 'indexes/qotnews/settings/searchable-attributes', json=json) + meili_api(requests.delete, 'indexes/qotnews/settings/displayed-attributes', json=json) + return r def init(): - create_index() + print(create_index()) update_rankings() update_attributes() @@ -59,31 +42,14 @@ def put_story(story): story = story.copy() story.pop('text', None) story.pop('comments', None) - try: - r = requests.post(MEILI_URL + 'indexes/qotnews/documents', json=[story], timeout=2) - if r.status_code != 202: - raise Exception('Bad response code ' + str(r.status_code)) - return r.json() - except KeyboardInterrupt: - raise - except BaseException as e: - logging.error('Problem putting MeiliSearch story: {}'.format(str(e))) - return False + return meili_api(requests.post, 'indexes/qotnews/documents', [story]) def search(q): - try: - params = dict(q=q, limit=250) - r = requests.get(MEILI_URL + 'indexes/qotnews/search', params=params, timeout=2) - if r.status_code != 200: - raise Exception('Bad response code ' + str(r.status_code)) - return r.json()['hits'] - except KeyboardInterrupt: - raise - except BaseException as e: - logging.error('Problem searching MeiliSearch: {}'.format(str(e))) - return False + params = dict(q=q, limit=250) + r = meili_api(requests.get, 'indexes/qotnews/search', params=params) + return r['hits'] if __name__ == '__main__': - create_index() + init() - print(search('the')) + print(search('qot')) diff --git a/searchserver/README.md b/searchserver/README.md index 684037c..ca64762 100644 --- a/searchserver/README.md +++ b/searchserver/README.md @@ -3,7 +3,7 @@ Download MeiliSearch with: ``` -wget https://github.com/meilisearch/MeiliSearch/releases/download/v0.11.1/meilisearch-linux-amd64 +wget https://github.com/meilisearch/MeiliSearch/releases/download/v0.21.1/meilisearch-linux-amd64 chmod +x meilisearch-linux-amd64 ```