Move scripts into own folder
This commit is contained in:
50
apiserver/scripts/delete-story.py
Normal file
50
apiserver/scripts/delete-story.py
Normal file
@@ -0,0 +1,50 @@
|
||||
import database
|
||||
import search
|
||||
import sys
|
||||
|
||||
import json
|
||||
import requests
|
||||
|
||||
database.init()
|
||||
search.init()
|
||||
|
||||
def database_del_story(sid):
|
||||
try:
|
||||
session = database.Session()
|
||||
session.query(database.Story).filter(database.Story.sid==sid).delete()
|
||||
session.commit()
|
||||
except:
|
||||
session.rollback()
|
||||
raise
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
def search_del_story(sid):
|
||||
try:
|
||||
r = requests.delete(search.MEILI_URL + 'indexes/qotnews/documents/'+sid, timeout=2)
|
||||
if r.status_code != 202:
|
||||
raise Exception('Bad response code ' + str(r.status_code))
|
||||
return r.json()
|
||||
except KeyboardInterrupt:
|
||||
raise
|
||||
except BaseException as e:
|
||||
logging.error('Problem deleting MeiliSearch story: {}'.format(str(e)))
|
||||
return False
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) == 2:
|
||||
sid = sys.argv[1]
|
||||
else:
|
||||
print('Usage: python delete-story.py [story id]')
|
||||
exit(1)
|
||||
|
||||
story = database.get_story(sid)
|
||||
|
||||
if story:
|
||||
print('Deleting story:')
|
||||
print(story.title)
|
||||
database_del_story(sid)
|
||||
search_del_story(sid)
|
||||
database.del_ref(story.ref)
|
||||
else:
|
||||
print('Story not found. Exiting.')
|
58
apiserver/scripts/fix-stories.py
Normal file
58
apiserver/scripts/fix-stories.py
Normal file
@@ -0,0 +1,58 @@
|
||||
import time
|
||||
import json
|
||||
import logging
|
||||
|
||||
import feed
|
||||
import database
|
||||
import search
|
||||
|
||||
database.init()
|
||||
|
||||
def fix_gzip_bug(story_list):
|
||||
FIX_THRESHOLD = 150
|
||||
|
||||
count = 1
|
||||
for sid in story_list:
|
||||
try:
|
||||
sid = sid[0]
|
||||
story = database.get_story(sid)
|
||||
full_json = json.loads(story.full_json)
|
||||
meta_json = json.loads(story.meta_json)
|
||||
text = full_json.get('text', '')
|
||||
|
||||
count = text.count('<EFBFBD>')
|
||||
if not count: continue
|
||||
|
||||
ratio = count / len(text) * 1000
|
||||
print('Bad story:', sid, 'Num ?:', count, 'Ratio:', ratio)
|
||||
if ratio < FIX_THRESHOLD: continue
|
||||
|
||||
print('Attempting to fix...')
|
||||
|
||||
valid = feed.update_story(meta_json, is_manual=True)
|
||||
if valid:
|
||||
database.put_story(meta_json)
|
||||
search.put_story(meta_json)
|
||||
print('Success')
|
||||
else:
|
||||
print('Story was not valid')
|
||||
|
||||
time.sleep(3)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
raise
|
||||
except BaseException as e:
|
||||
logging.exception(e)
|
||||
breakpoint()
|
||||
|
||||
if __name__ == '__main__':
|
||||
num_stories = database.count_stories()
|
||||
|
||||
print('Fix {} stories?'.format(num_stories))
|
||||
print('Press ENTER to continue, ctrl-c to cancel')
|
||||
input()
|
||||
|
||||
story_list = database.get_story_list()
|
||||
|
||||
fix_gzip_bug(story_list)
|
||||
|
62
apiserver/scripts/reindex.py
Normal file
62
apiserver/scripts/reindex.py
Normal file
@@ -0,0 +1,62 @@
|
||||
import logging
|
||||
logging.basicConfig(
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
level=logging.INFO)
|
||||
|
||||
import database
|
||||
from sqlalchemy import select
|
||||
import search
|
||||
import sys
|
||||
|
||||
import time
|
||||
import json
|
||||
import requests
|
||||
|
||||
database.init()
|
||||
search.init()
|
||||
|
||||
BATCH_SIZE = 5000
|
||||
|
||||
def put_stories(stories):
|
||||
return search.meili_api(requests.post, 'indexes/qotnews/documents', stories)
|
||||
|
||||
def get_update(update_id):
|
||||
return search.meili_api(requests.get, 'tasks/{}'.format(update_id))
|
||||
|
||||
if __name__ == '__main__':
|
||||
num_stories = database.count_stories()
|
||||
|
||||
print('Reindex {} stories?'.format(num_stories))
|
||||
print('Press ENTER to continue, ctrl-c to cancel')
|
||||
input()
|
||||
|
||||
story_list = database.get_story_list()
|
||||
|
||||
count = 1
|
||||
while len(story_list):
|
||||
stories = []
|
||||
|
||||
for _ in range(BATCH_SIZE):
|
||||
try:
|
||||
sid = story_list.pop()
|
||||
except IndexError:
|
||||
break
|
||||
|
||||
story = database.get_story(sid)
|
||||
print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title))
|
||||
story_obj = json.loads(story.meta_json)
|
||||
stories.append(story_obj)
|
||||
count += 1
|
||||
|
||||
res = put_stories(stories)
|
||||
update_id = res['uid']
|
||||
|
||||
print('Waiting for processing', end='')
|
||||
while get_update(update_id)['status'] != 'succeeded':
|
||||
time.sleep(0.5)
|
||||
print('.', end='', flush=True)
|
||||
|
||||
print()
|
||||
|
||||
print('Done.')
|
||||
|
23
apiserver/scripts/tests.py
Normal file
23
apiserver/scripts/tests.py
Normal file
@@ -0,0 +1,23 @@
|
||||
import time
|
||||
import requests
|
||||
|
||||
def test_search_api():
|
||||
num_tests = 100
|
||||
total_time = 0
|
||||
|
||||
for i in range(num_tests):
|
||||
start = time.time()
|
||||
|
||||
res = requests.get('http://127.0.0.1:33842/api/search?q=iphone')
|
||||
res.raise_for_status()
|
||||
|
||||
duration = time.time() - start
|
||||
total_time += duration
|
||||
|
||||
avg_time = total_time / num_tests
|
||||
|
||||
print('Average search time:', avg_time)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_search_api()
|
Reference in New Issue
Block a user