parent
f46cafdc90
commit
d614ad0743
4 changed files with 155 additions and 40 deletions
@ -1,21 +1,67 @@ |
|||||||
import archive |
import archive |
||||||
import database |
import database |
||||||
|
import search |
||||||
|
|
||||||
import json |
import json |
||||||
|
import requests |
||||||
|
|
||||||
database.init() |
database.init() |
||||||
archive.init() |
archive.init() |
||||||
|
search.init() |
||||||
|
|
||||||
|
count = 0 |
||||||
|
|
||||||
|
def database_del_story_by_ref(ref): |
||||||
|
try: |
||||||
|
session = database.Session() |
||||||
|
session.query(database.Story).filter(database.Story.ref==ref).delete() |
||||||
|
session.commit() |
||||||
|
except: |
||||||
|
session.rollback() |
||||||
|
raise |
||||||
|
finally: |
||||||
|
session.close() |
||||||
|
|
||||||
|
def search_del_story(sid): |
||||||
|
try: |
||||||
|
r = requests.delete(search.MEILI_URL + 'indexes/qotnews/documents/'+sid, timeout=2) |
||||||
|
if r.status_code != 202: |
||||||
|
raise Exception('Bad response code ' + str(r.status_code)) |
||||||
|
return r.json() |
||||||
|
except KeyboardInterrupt: |
||||||
|
raise |
||||||
|
except BaseException as e: |
||||||
|
logging.error('Problem deleting MeiliSearch story: {}'.format(str(e))) |
||||||
|
return False |
||||||
|
|
||||||
with archive.ix.searcher() as searcher: |
with archive.ix.searcher() as searcher: |
||||||
for docnum in searcher.document_numbers(): |
print('count all', searcher.doc_count_all()) |
||||||
|
print('count', searcher.doc_count()) |
||||||
|
|
||||||
|
for doc in searcher.documents(): |
||||||
try: |
try: |
||||||
#if docnum > 500: |
print('num', count, 'id', doc['id']) |
||||||
# break |
count += 1 |
||||||
|
|
||||||
|
try: |
||||||
|
database.put_story(doc['story']) |
||||||
|
except database.IntegrityError: |
||||||
|
print('collision!') |
||||||
|
old_story = database.get_story_by_ref(doc['story']['ref']) |
||||||
|
story = json.loads(old_story.full_json) |
||||||
|
if doc['story']['num_comments'] > story['num_comments']: |
||||||
|
print('more comments, replacing') |
||||||
|
database_del_story_by_ref(doc['story']['ref']) |
||||||
|
database.put_story(doc['story']) |
||||||
|
search_del_story(story['id']) |
||||||
|
else: |
||||||
|
print('fewer comments, skipping') |
||||||
|
continue |
||||||
|
|
||||||
print('docnum', docnum) |
search.put_story(doc['story']) |
||||||
res = searcher.stored_fields(docnum) |
|
||||||
print('id', res['id']) |
|
||||||
database.put_story(res['story']) |
|
||||||
print() |
print() |
||||||
|
except KeyboardInterrupt: |
||||||
|
break |
||||||
except BaseException as e: |
except BaseException as e: |
||||||
print('skipping', docnum) |
print('skipping', doc['id']) |
||||||
print('reason:', e) |
print('reason:', e) |
||||||
|
@ -0,0 +1,57 @@ |
|||||||
|
import logging |
||||||
|
logging.basicConfig( |
||||||
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', |
||||||
|
level=logging.DEBUG) |
||||||
|
|
||||||
|
import requests |
||||||
|
|
||||||
|
MEILI_URL = 'http://127.0.0.1:7700/' |
||||||
|
|
||||||
|
def create_index(): |
||||||
|
try: |
||||||
|
json = dict(name='qotnews', uid='qotnews') |
||||||
|
r = requests.post(MEILI_URL + 'indexes', json=json, timeout=2) |
||||||
|
if r.status_code != 201: |
||||||
|
raise Exception('Bad response code ' + str(r.status_code)) |
||||||
|
return r.json() |
||||||
|
except KeyboardInterrupt: |
||||||
|
raise |
||||||
|
except BaseException as e: |
||||||
|
logging.error('Problem creating MeiliSearch index: {}'.format(str(e))) |
||||||
|
return False |
||||||
|
|
||||||
|
def init(): |
||||||
|
create_index() |
||||||
|
|
||||||
|
def put_story(story): |
||||||
|
story = story.copy() |
||||||
|
story.pop('text', None) |
||||||
|
story.pop('comments', None) |
||||||
|
try: |
||||||
|
r = requests.post(MEILI_URL + 'indexes/qotnews/documents', json=[story], timeout=2) |
||||||
|
if r.status_code != 202: |
||||||
|
raise Exception('Bad response code ' + str(r.status_code)) |
||||||
|
return r.json() |
||||||
|
except KeyboardInterrupt: |
||||||
|
raise |
||||||
|
except BaseException as e: |
||||||
|
logging.error('Problem putting MeiliSearch story: {}'.format(str(e))) |
||||||
|
return False |
||||||
|
|
||||||
|
def search(q): |
||||||
|
try: |
||||||
|
params = dict(q=q, limit=250) |
||||||
|
r = requests.get(MEILI_URL + 'indexes/qotnews/search', params=params, timeout=2) |
||||||
|
if r.status_code != 200: |
||||||
|
raise Exception('Bad response code ' + str(r.status_code)) |
||||||
|
return r.json()['hits'] |
||||||
|
except KeyboardInterrupt: |
||||||
|
raise |
||||||
|
except BaseException as e: |
||||||
|
logging.error('Problem searching MeiliSearch: {}'.format(str(e))) |
||||||
|
return False |
||||||
|
|
||||||
|
if __name__ == '__main__': |
||||||
|
create_index() |
||||||
|
|
||||||
|
print(search('the')) |
Loading…
Reference in new issue