diff --git a/apiserver/reindex.py b/apiserver/reindex.py new file mode 100644 index 0000000..78db056 --- /dev/null +++ b/apiserver/reindex.py @@ -0,0 +1,67 @@ +import logging +logging.basicConfig( + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + level=logging.INFO) + +import database +from sqlalchemy import select +import search +import sys + +import time +import json +import requests +from bs4 import BeautifulSoup + +database.init() +search.init() + +BATCH_SIZE = 1000 + +def put_stories(stories): + return search.meili_api(requests.post, 'indexes/qotnews/documents', stories) + +def get_update(update_id): + return search.meili_api(requests.get, 'tasks/{}'.format(update_id)) + +if __name__ == '__main__': + num_stories = database.count_stories() + + print('Reindex {} stories?'.format(num_stories)) + print('Press ENTER to continue, ctrl-c to cancel') + input() + + story_list = database.get_story_list() + + count = 1 + while len(story_list): + stories = [] + + for _ in range(BATCH_SIZE): + try: + sid = story_list.pop() + except IndexError: + break + + story = database.get_story(sid) + print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title)) + story_obj = json.loads(story.full_json) + story_obj.pop('comments', False) + if 'text' in story_obj and story_obj['text']: + soup = BeautifulSoup(story_obj['text'], 'html.parser') + story_obj['text'] = soup.get_text() + stories.append(story_obj) + count += 1 + + res = put_stories(stories) + update_id = res['taskUid'] + + print('Waiting for processing', end='') + while get_update(update_id)['status'] != 'succeeded': + time.sleep(0.5) + print('.', end='', flush=True) + + print() + + print('Done.') + diff --git a/apiserver/search.py b/apiserver/search.py index 622cd97..3881e7c 100644 --- a/apiserver/search.py +++ b/apiserver/search.py @@ -11,7 +11,7 @@ SEARCH_ENABLED = bool(settings.MEILI_URL) def meili_api(method, route, json=None, params=None, parse_json=True): try: headers = {'Authorization': 'Bearer ' + settings.MEILI_API_KEY} - r = method(settings.MEILI_URL + route, json=json, params=params, timeout=4) + r = method(settings.MEILI_URL + route, json=json, params=params, headers=headers, timeout=4) if r.status_code > 299: raise Exception('Bad response code ' + str(r.status_code)) if parse_json: @@ -25,32 +25,36 @@ def meili_api(method, route, json=None, params=None, parse_json=True): logging.error('Problem with MeiliSearch api route: %s: %s', route, str(e)) return False -def create_index(): - json = dict(uid='qotnews', primaryKey='id') - return meili_api(requests.post, 'indexes', json=json) - def update_settings(): json = { - 'rankingRules': ['typo', 'words', 'proximity', 'date:desc', 'exactness'], - 'searchableAttributes': ['title', 'url', 'author'], - 'displayedAttributes': ['id', 'ref', 'source', 'author', 'author_link', 'score', 'date', 'title', 'link', 'url', 'num_comments'], + 'rankingRules': ['words', 'typo', 'proximity', 'attribute', 'date:desc', 'exactness'], + 'searchableAttributes': ['title', 'url', 'author', 'text'], + 'displayedAttributes': ['id', 'ref', 'source', 'author', 'author_link', 'score', 'date', 'title', 'link', 'url', 'num_comments', 'text'], + 'stopWords': ['a', 'an', 'the', 'and', 'or', 'but', 'if', 'in', 'on', 'at', 'by', 'for', 'with', 'to', 'from', 'of', 'is', 'it', 'that', 'this'], } - return meili_api(requests.post, 'indexes/qotnews/settings', json=json) + return meili_api(requests.patch, 'indexes/qotnews/settings', json=json) def init(): if not SEARCH_ENABLED: logging.info('Search is not enabled, skipping init.') return - print(create_index()) update_settings() def put_story(story): if not SEARCH_ENABLED: return return meili_api(requests.post, 'indexes/qotnews/documents', [story]) -def search(q): +def search(q, in_article=False): if not SEARCH_ENABLED: return [] + json = dict(q=q, limit=settings.FEED_LENGTH) + + if True: + json['attributesToSearchOn'] = ['text'] + json['attributesToCrop'] = ['text'] + json['attributesToRetrieve'] = ['id', 'ref', 'source', 'author', 'author_link', 'score', 'date', 'title', 'link', 'url', 'num_comments'] + json['cropLength'] = 80 + r = meili_api(requests.post, 'indexes/qotnews/search', json=json, parse_json=False) return r diff --git a/apiserver/server.py b/apiserver/server.py index efacb1e..b7710c5 100644 --- a/apiserver/server.py +++ b/apiserver/server.py @@ -131,8 +131,9 @@ def apistats(): @flask_app.route('/api/search', strict_slashes=False) def apisearch(): q = request.args.get('q', '') + in_article = request.args.get('article', False) if len(q) >= 3: - results = search.search(q) + results = search.search(q, in_article) else: results = '[]' res = Response(results) @@ -262,7 +263,7 @@ def static_story(sid): show_comments=request.path.endswith('/c'), ) -http_server = WSGIServer(('', 33842), flask_app) +http_server = WSGIServer(('0.0.0.0', 33842), flask_app) def feed_thread(): global news_index, ref_list, current_item diff --git a/webclient/src/Results.js b/webclient/src/Results.js index b0703d9..241649e 100644 --- a/webclient/src/Results.js +++ b/webclient/src/Results.js @@ -78,6 +78,10 @@ function Results() { {infoLine(x)} + + {!!x?._formatted && +
{x._formatted.text.replace(/\n/g, ' ')}
+ } ) :