Move scripts into own folder
This commit is contained in:
		
							
								
								
									
										50
									
								
								apiserver/scripts/delete-story.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								apiserver/scripts/delete-story.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,50 @@ | ||||
| import database | ||||
| import search | ||||
| import sys | ||||
|  | ||||
| import json | ||||
| import requests | ||||
|  | ||||
| database.init() | ||||
| search.init() | ||||
|  | ||||
| def database_del_story(sid): | ||||
|     try: | ||||
|         session = database.Session() | ||||
|         session.query(database.Story).filter(database.Story.sid==sid).delete() | ||||
|         session.commit() | ||||
|     except: | ||||
|         session.rollback() | ||||
|         raise | ||||
|     finally: | ||||
|         session.close() | ||||
|  | ||||
| def search_del_story(sid): | ||||
|     try: | ||||
|         r = requests.delete(search.MEILI_URL + 'indexes/qotnews/documents/'+sid, timeout=2) | ||||
|         if r.status_code != 202: | ||||
|             raise Exception('Bad response code ' + str(r.status_code)) | ||||
|         return r.json() | ||||
|     except KeyboardInterrupt: | ||||
|         raise | ||||
|     except BaseException as e: | ||||
|         logging.error('Problem deleting MeiliSearch story: {}'.format(str(e))) | ||||
|         return False | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     if len(sys.argv) == 2: | ||||
|         sid = sys.argv[1] | ||||
|     else: | ||||
|         print('Usage: python delete-story.py [story id]') | ||||
|         exit(1) | ||||
|  | ||||
|     story = database.get_story(sid) | ||||
|  | ||||
|     if story: | ||||
|         print('Deleting story:') | ||||
|         print(story.title) | ||||
|         database_del_story(sid) | ||||
|         search_del_story(sid) | ||||
|         database.del_ref(story.ref) | ||||
|     else: | ||||
|         print('Story not found. Exiting.') | ||||
							
								
								
									
										58
									
								
								apiserver/scripts/fix-stories.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								apiserver/scripts/fix-stories.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,58 @@ | ||||
| import time | ||||
| import json | ||||
| import logging | ||||
|  | ||||
| import feed | ||||
| import database | ||||
| import search | ||||
|  | ||||
| database.init() | ||||
|  | ||||
| def fix_gzip_bug(story_list): | ||||
|     FIX_THRESHOLD = 150 | ||||
|  | ||||
|     count = 1 | ||||
|     for sid in story_list: | ||||
|         try: | ||||
|             sid = sid[0] | ||||
|             story = database.get_story(sid) | ||||
|             full_json = json.loads(story.full_json) | ||||
|             meta_json = json.loads(story.meta_json) | ||||
|             text = full_json.get('text', '') | ||||
|  | ||||
|             count = text.count('<EFBFBD>') | ||||
|             if not count: continue | ||||
|  | ||||
|             ratio = count / len(text) * 1000 | ||||
|             print('Bad story:', sid, 'Num ?:', count, 'Ratio:', ratio) | ||||
|             if ratio < FIX_THRESHOLD: continue | ||||
|  | ||||
|             print('Attempting to fix...') | ||||
|  | ||||
|             valid = feed.update_story(meta_json, is_manual=True) | ||||
|             if valid: | ||||
|                 database.put_story(meta_json) | ||||
|                 search.put_story(meta_json) | ||||
|                 print('Success') | ||||
|             else: | ||||
|                 print('Story was not valid') | ||||
|  | ||||
|             time.sleep(3) | ||||
|  | ||||
|         except KeyboardInterrupt: | ||||
|             raise | ||||
|         except BaseException as e: | ||||
|             logging.exception(e) | ||||
|             breakpoint() | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     num_stories = database.count_stories() | ||||
|  | ||||
|     print('Fix {} stories?'.format(num_stories)) | ||||
|     print('Press ENTER to continue, ctrl-c to cancel') | ||||
|     input() | ||||
|  | ||||
|     story_list = database.get_story_list() | ||||
|  | ||||
|     fix_gzip_bug(story_list) | ||||
|  | ||||
							
								
								
									
										62
									
								
								apiserver/scripts/reindex.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										62
									
								
								apiserver/scripts/reindex.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,62 @@ | ||||
| import logging | ||||
| logging.basicConfig( | ||||
|         format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | ||||
|         level=logging.INFO) | ||||
|  | ||||
| import database | ||||
| from sqlalchemy import select | ||||
| import search | ||||
| import sys | ||||
|  | ||||
| import time | ||||
| import json | ||||
| import requests | ||||
|  | ||||
| database.init() | ||||
| search.init() | ||||
|  | ||||
| BATCH_SIZE = 5000 | ||||
|  | ||||
| def put_stories(stories): | ||||
|     return search.meili_api(requests.post, 'indexes/qotnews/documents', stories) | ||||
|  | ||||
| def get_update(update_id): | ||||
|     return search.meili_api(requests.get, 'tasks/{}'.format(update_id)) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     num_stories = database.count_stories() | ||||
|  | ||||
|     print('Reindex {} stories?'.format(num_stories)) | ||||
|     print('Press ENTER to continue, ctrl-c to cancel') | ||||
|     input() | ||||
|  | ||||
|     story_list = database.get_story_list() | ||||
|  | ||||
|     count = 1 | ||||
|     while len(story_list): | ||||
|         stories = [] | ||||
|  | ||||
|         for _ in range(BATCH_SIZE): | ||||
|             try: | ||||
|                 sid = story_list.pop() | ||||
|             except IndexError: | ||||
|                 break | ||||
|  | ||||
|             story = database.get_story(sid) | ||||
|             print('Indexing {}/{} id: {} title: {}'.format(count, num_stories, sid[0], story.title)) | ||||
|             story_obj = json.loads(story.meta_json) | ||||
|             stories.append(story_obj) | ||||
|             count += 1 | ||||
|  | ||||
|         res = put_stories(stories) | ||||
|         update_id = res['uid'] | ||||
|  | ||||
|         print('Waiting for processing', end='') | ||||
|         while get_update(update_id)['status'] != 'succeeded': | ||||
|             time.sleep(0.5) | ||||
|             print('.', end='', flush=True) | ||||
|  | ||||
|         print() | ||||
|  | ||||
|     print('Done.') | ||||
|  | ||||
							
								
								
									
										23
									
								
								apiserver/scripts/tests.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								apiserver/scripts/tests.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,23 @@ | ||||
| import time | ||||
| import requests | ||||
|  | ||||
| def test_search_api(): | ||||
|     num_tests = 100 | ||||
|     total_time = 0 | ||||
|  | ||||
|     for i in range(num_tests): | ||||
|         start = time.time() | ||||
|  | ||||
|         res = requests.get('http://127.0.0.1:33842/api/search?q=iphone') | ||||
|         res.raise_for_status() | ||||
|  | ||||
|         duration = time.time() - start | ||||
|         total_time += duration | ||||
|  | ||||
|     avg_time = total_time / num_tests | ||||
|  | ||||
|     print('Average search time:', avg_time) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     test_search_api() | ||||
		Reference in New Issue
	
	Block a user