Integrate with external MeiliSearch server
This commit is contained in:
		| @@ -14,10 +14,10 @@ class Story(Base): | ||||
|     __tablename__ = 'stories' | ||||
|  | ||||
|     sid = Column(String(16), primary_key=True) | ||||
|     ref = Column(String(16), unique=True) | ||||
|     meta_json = Column(String) | ||||
|     full_json = Column(String) | ||||
|     title = Column(String) | ||||
|     date = Column(Integer) | ||||
|  | ||||
| class Reflist(Base): | ||||
|     __tablename__ = 'reflist' | ||||
| @@ -25,6 +25,7 @@ class Reflist(Base): | ||||
|     rid = Column(Integer, primary_key=True) | ||||
|     ref = Column(String(16), unique=True) | ||||
|     sid = Column(String, ForeignKey('stories.sid'), unique=True) | ||||
|     source = Column(String(16)) | ||||
|  | ||||
| def init(): | ||||
|     Base.metadata.create_all(engine) | ||||
| @@ -34,6 +35,7 @@ def get_story(sid): | ||||
|     return session.query(Story).get(sid) | ||||
|  | ||||
| def put_story(story): | ||||
|     story = story.copy() | ||||
|     full_json = json.dumps(story) | ||||
|  | ||||
|     story.pop('text', None) | ||||
| @@ -44,10 +46,10 @@ def put_story(story): | ||||
|         session = Session() | ||||
|         s = Story( | ||||
|             sid=story['id'], | ||||
|             ref=story['ref'], | ||||
|             full_json=full_json, | ||||
|             meta_json=meta_json, | ||||
|             title=story.get('title', None), | ||||
|             date=story.get('date', None), | ||||
|         ) | ||||
|         session.merge(s) | ||||
|         session.commit() | ||||
| @@ -57,14 +59,14 @@ def put_story(story): | ||||
|     finally: | ||||
|         session.close()  | ||||
|  | ||||
| def search(q): | ||||
| def get_story_by_ref(ref): | ||||
|     session = Session() | ||||
|     return session.query(Story).filter(Story.title.contains(q)) | ||||
|     return session.query(Story).filter(Story.ref==ref).first() | ||||
|  | ||||
| def get_reflist(amount): | ||||
|     session = Session() | ||||
|     q = session.query(Reflist).order_by(Reflist.rid.desc()).limit(amount) | ||||
|     return [dict(ref=x.ref, sid=x.sid) for x in q.all()] | ||||
|     return [dict(ref=x.ref, sid=x.sid, source=x.source) for x in q.all()] | ||||
|  | ||||
| def get_stories(amount): | ||||
|     session = Session() | ||||
| @@ -75,10 +77,10 @@ def get_stories(amount): | ||||
|             limit(amount) | ||||
|     return [x[1] for x in q] | ||||
|  | ||||
| def put_ref(ref, sid): | ||||
| def put_ref(ref, sid, source): | ||||
|     try: | ||||
|         session = Session() | ||||
|         r = Reflist(ref=ref, sid=sid) | ||||
|         r = Reflist(ref=ref, sid=sid, source=source) | ||||
|         session.add(r) | ||||
|         session.commit() | ||||
|     except: | ||||
| @@ -101,4 +103,4 @@ def del_ref(ref): | ||||
| if __name__ == '__main__': | ||||
|     init() | ||||
|  | ||||
|     print(get_stories(5)) | ||||
|     print(get_story_by_ref('hgi3sy')) | ||||
|   | ||||
| @@ -1,21 +1,67 @@ | ||||
| import archive | ||||
| import database | ||||
| import search | ||||
|  | ||||
| import json | ||||
| import requests | ||||
|  | ||||
| database.init() | ||||
| archive.init() | ||||
| search.init() | ||||
|  | ||||
| count = 0 | ||||
|  | ||||
| def database_del_story_by_ref(ref): | ||||
|     try: | ||||
|         session = database.Session() | ||||
|         session.query(database.Story).filter(database.Story.ref==ref).delete() | ||||
|         session.commit() | ||||
|     except: | ||||
|         session.rollback() | ||||
|         raise | ||||
|     finally: | ||||
|         session.close() | ||||
|  | ||||
| def search_del_story(sid): | ||||
|     try: | ||||
|         r = requests.delete(search.MEILI_URL + 'indexes/qotnews/documents/'+sid, timeout=2) | ||||
|         if r.status_code != 202: | ||||
|             raise Exception('Bad response code ' + str(r.status_code)) | ||||
|         return r.json() | ||||
|     except KeyboardInterrupt: | ||||
|         raise | ||||
|     except BaseException as e: | ||||
|         logging.error('Problem deleting MeiliSearch story: {}'.format(str(e))) | ||||
|         return False | ||||
|  | ||||
| with archive.ix.searcher() as searcher: | ||||
|     for docnum in searcher.document_numbers(): | ||||
|         try: | ||||
|             #if docnum > 500: | ||||
|             #    break | ||||
|     print('count all', searcher.doc_count_all()) | ||||
|     print('count', searcher.doc_count()) | ||||
|  | ||||
|             print('docnum', docnum) | ||||
|             res = searcher.stored_fields(docnum) | ||||
|             print('id', res['id']) | ||||
|             database.put_story(res['story']) | ||||
|     for doc in searcher.documents(): | ||||
|         try: | ||||
|             print('num', count, 'id', doc['id']) | ||||
|             count += 1 | ||||
|  | ||||
|             try: | ||||
|                 database.put_story(doc['story']) | ||||
|             except database.IntegrityError: | ||||
|                 print('collision!') | ||||
|                 old_story = database.get_story_by_ref(doc['story']['ref']) | ||||
|                 story = json.loads(old_story.full_json) | ||||
|                 if doc['story']['num_comments'] > story['num_comments']: | ||||
|                     print('more comments, replacing') | ||||
|                     database_del_story_by_ref(doc['story']['ref']) | ||||
|                     database.put_story(doc['story']) | ||||
|                     search_del_story(story['id']) | ||||
|                 else: | ||||
|                     print('fewer comments, skipping') | ||||
|                     continue | ||||
|  | ||||
|             search.put_story(doc['story']) | ||||
|             print() | ||||
|         except KeyboardInterrupt: | ||||
|             break | ||||
|         except BaseException as e: | ||||
|             print('skipping', docnum) | ||||
|             print('skipping', doc['id']) | ||||
|             print('reason:', e) | ||||
|   | ||||
							
								
								
									
										57
									
								
								apiserver/search.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								apiserver/search.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,57 @@ | ||||
| import logging | ||||
| logging.basicConfig( | ||||
|         format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | ||||
|         level=logging.DEBUG) | ||||
|  | ||||
| import requests | ||||
|  | ||||
| MEILI_URL = 'http://127.0.0.1:7700/' | ||||
|  | ||||
| def create_index(): | ||||
|     try: | ||||
|         json = dict(name='qotnews', uid='qotnews') | ||||
|         r = requests.post(MEILI_URL + 'indexes', json=json, timeout=2) | ||||
|         if r.status_code != 201: | ||||
|             raise Exception('Bad response code ' + str(r.status_code)) | ||||
|         return r.json() | ||||
|     except KeyboardInterrupt: | ||||
|         raise | ||||
|     except BaseException as e: | ||||
|         logging.error('Problem creating MeiliSearch index: {}'.format(str(e))) | ||||
|         return False | ||||
|  | ||||
| def init(): | ||||
|     create_index() | ||||
|  | ||||
| def put_story(story): | ||||
|     story = story.copy() | ||||
|     story.pop('text', None) | ||||
|     story.pop('comments', None) | ||||
|     try: | ||||
|         r = requests.post(MEILI_URL + 'indexes/qotnews/documents', json=[story], timeout=2) | ||||
|         if r.status_code != 202: | ||||
|             raise Exception('Bad response code ' + str(r.status_code)) | ||||
|         return r.json() | ||||
|     except KeyboardInterrupt: | ||||
|         raise | ||||
|     except BaseException as e: | ||||
|         logging.error('Problem putting MeiliSearch story: {}'.format(str(e))) | ||||
|         return False | ||||
|  | ||||
| def search(q): | ||||
|     try: | ||||
|         params = dict(q=q, limit=250) | ||||
|         r = requests.get(MEILI_URL + 'indexes/qotnews/search', params=params, timeout=2) | ||||
|         if r.status_code != 200: | ||||
|             raise Exception('Bad response code ' + str(r.status_code)) | ||||
|         return r.json()['hits'] | ||||
|     except KeyboardInterrupt: | ||||
|         raise | ||||
|     except BaseException as e: | ||||
|         logging.error('Problem searching MeiliSearch: {}'.format(str(e))) | ||||
|         return False | ||||
|      | ||||
| if __name__ == '__main__': | ||||
|     create_index() | ||||
|  | ||||
|     print(search('the')) | ||||
| @@ -11,6 +11,7 @@ import time | ||||
| from urllib.parse import urlparse, parse_qs | ||||
|  | ||||
| import database | ||||
| import search | ||||
| import feed | ||||
| from utils import gen_rand_id | ||||
|  | ||||
| @@ -25,6 +26,7 @@ from gevent.pywsgi import WSGIServer | ||||
| monkey.patch_all() | ||||
|  | ||||
| database.init() | ||||
| search.init() | ||||
|  | ||||
| FEED_LENGTH = 75 | ||||
| news_index = 0 | ||||
| @@ -48,16 +50,13 @@ def api(): | ||||
|     return res | ||||
|  | ||||
| @flask_app.route('/api/search', strict_slashes=False) | ||||
| def search(): | ||||
| def apisearch(): | ||||
|     q = request.args.get('q', '') | ||||
|     if len(q) >= 3: | ||||
|         results = [x.meta_json for x in database.search(q)] | ||||
|         results = search.search(q) | ||||
|     else: | ||||
|         results = [] | ||||
|     # hacky nested json | ||||
|     res = Response('{"results":[' + ','.join(results) + ']}') | ||||
|     res.headers['content-type'] = 'application/json' | ||||
|     return res | ||||
|     return dict(results=results) | ||||
|  | ||||
| @flask_app.route('/api/submit', methods=['POST'], strict_slashes=False) | ||||
| def submit(): | ||||
| @@ -75,19 +74,24 @@ def submit(): | ||||
|         elif 'reddit.com' in parse.hostname and 'comments' in url: | ||||
|             source = 'reddit' | ||||
|             ref = parse.path.split('/')[4] | ||||
|         elif 'news.t0.vc' in parse.hostname: | ||||
|             raise Exception('Invalid article') | ||||
|         else: | ||||
|             source = 'manual' | ||||
|             ref = url | ||||
|  | ||||
|         # TODO: return existing refs | ||||
|  | ||||
|         story = dict(id=nid, ref=ref, source=source) | ||||
|         valid = feed.update_story(story, is_manual=True) | ||||
|         if valid: | ||||
|             database.put_story(story) | ||||
|             return {'nid': nid} | ||||
|         existing = database.get_story_by_ref(ref) | ||||
|         if existing: | ||||
|             return {'nid': existing.sid} | ||||
|         else: | ||||
|             raise Exception('Invalid article') | ||||
|             story = dict(id=nid, ref=ref, source=source) | ||||
|             valid = feed.update_story(story, is_manual=True) | ||||
|             if valid: | ||||
|                 database.put_story(story) | ||||
|                 search.put_story(story) | ||||
|                 return {'nid': nid} | ||||
|             else: | ||||
|                 raise Exception('Invalid article') | ||||
|  | ||||
|     except BaseException as e: | ||||
|         logging.error('Problem with article submission: {} - {}'.format(e.__class__.__name__, str(e))) | ||||
| @@ -148,31 +152,37 @@ def feed_thread(): | ||||
|  | ||||
|     try: | ||||
|         while True: | ||||
|             ref_list = database.get_reflist(FEED_LENGTH) | ||||
|  | ||||
|             # onboard new stories | ||||
|             if news_index == 0: | ||||
|                 for ref, source in feed.list(): | ||||
|                     if database.get_story_by_ref(ref): | ||||
|                         continue | ||||
|                     try: | ||||
|                         nid = new_id() | ||||
|                         database.put_ref(ref, nid) | ||||
|                         database.put_story(dict(id=nid, ref=ref, source=source)) | ||||
|                         database.put_ref(ref, nid, source) | ||||
|                         logging.info('Added ref ' + ref) | ||||
|                     except database.IntegrityError: | ||||
|                         continue | ||||
|  | ||||
|             ref_list = database.get_reflist(FEED_LENGTH) | ||||
|  | ||||
|             # update current stories | ||||
|             if news_index < len(ref_list): | ||||
|                 update_ref = ref_list[news_index]['ref'] | ||||
|                 update_sid = ref_list[news_index]['sid'] | ||||
|                 story_json = database.get_story(update_sid).full_json | ||||
|                 story = json.loads(story_json) | ||||
|                 item = ref_list[news_index] | ||||
|  | ||||
|                 try: | ||||
|                     story_json = database.get_story(item['sid']).full_json | ||||
|                     story = json.loads(story_json) | ||||
|                 except AttributeError: | ||||
|                     story = dict(id=item['sid'], ref=item['ref'], source=item['source']) | ||||
|  | ||||
|                 valid = feed.update_story(story) | ||||
|                 if valid: | ||||
|                     database.put_story(story) | ||||
|                     search.put_story(story) | ||||
|                 else: | ||||
|                     database.del_ref(update_ref) | ||||
|                     logging.info('Removed ref {}'.format(update_ref)) | ||||
|                     database.del_ref(item['ref']) | ||||
|                     logging.info('Removed ref {}'.format(item['ref'])) | ||||
|  | ||||
|             gevent.sleep(6) | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user