Remove Whoosh

2020-11-02 00:22:40 +00:00
parent cbc802b7e9
commit e59acefda9
5 changed files with 0 additions and 153 deletions
--- a/apiserver/archive.py
+++ b/apiserver/archive.py
@@ -1,52 +0,0 @@
-from whoosh.analysis import StemmingAnalyzer, CharsetFilter, NgramFilter
-from whoosh.index import create_in, open_dir, exists_in
-from whoosh.fields import *
-from whoosh.qparser import QueryParser
-from whoosh.support.charset import accent_map
-
-analyzer = StemmingAnalyzer() | CharsetFilter(accent_map) | NgramFilter(minsize=3)
-
-title_field = TEXT(analyzer=analyzer, stored=True)
-id_field = ID(unique=True, stored=True)
-
-schema = Schema(
-        id=id_field,
-        title=title_field,
-        story=STORED,
-        )
-
-ARCHIVE_LOCATION = 'data/archive'
-
-ix = None
-
-def init():
-    global ix
-
-    if exists_in(ARCHIVE_LOCATION):
-        ix = open_dir(ARCHIVE_LOCATION)
-    else:
-        ix = create_in(ARCHIVE_LOCATION, schema)
-
-def update(story):
-    writer = ix.writer()
-    writer.update_document(
-            id=story['id'],
-            title=story['title'],
-            story=story,
-            )
-    writer.commit()
-
-def get_story(sid):
-    with ix.searcher() as searcher:
-        result = searcher.document(id=sid)
-        return result['story'] if result else None
-
-def search(search):
-    with ix.searcher() as searcher:
-        query = QueryParser('title', ix.schema).parse(search)
-        results = searcher.search(query)
-        stories = [r['story'] for r in results]
-        for s in stories:
-            s.pop('text', '')
-            s.pop('comments', '')
-        return stories
--- a/apiserver/data/archive/.gitkeep
+++ b/apiserver/data/archive/.gitkeep
--- a/apiserver/migrate-shelve-to-whoosh.py
+++ b/apiserver/migrate-shelve-to-whoosh.py
@@ -1,26 +0,0 @@
-import shelve
-
-import archive
-
-archive.init()
-
-#with shelve.open('data/data') as db:
-#    to_delete = []
-#
-#    for s in db.values():
-#        if 'title' in s:
-#            archive.update(s)
-#        if 'id' in s:
-#            to_delete.append(s['id'])
-#
-#    for id in to_delete:
-#        del db[id]
-#
-#    for s in db['news_cache'].values():
-#        if 'title' in s:
-#            archive.update(s)
-
-#with shelve.open('data/whoosh') as db:
-#    for s in db['news_cache'].values():
-#        if 'title' in s and not archive.get_story(s['id']):
-#            archive.update(s)
--- a/apiserver/migrate-whoosh-to-sqlite.py
+++ b/apiserver/migrate-whoosh-to-sqlite.py
@@ -1,74 +0,0 @@
-import archive
-import database
-import search
-
-import json
-import requests
-
-database.init()
-archive.init()
-search.init()
-
-count = 0
-
-def database_del_story_by_ref(ref):
-    try:
-        session = database.Session()
-        session.query(database.Story).filter(database.Story.ref==ref).delete()
-        session.commit()
-    except:
-        session.rollback()
-        raise
-    finally:
-        session.close()
-
-def search_del_story(sid):
-    try:
-        r = requests.delete(search.MEILI_URL + 'indexes/qotnews/documents/'+sid, timeout=2)
-        if r.status_code != 202:
-            raise Exception('Bad response code ' + str(r.status_code))
-        return r.json()
-    except KeyboardInterrupt:
-        raise
-    except BaseException as e:
-        logging.error('Problem deleting MeiliSearch story: {}'.format(str(e)))
-        return False
-
-with archive.ix.searcher() as searcher:
-    print('count all', searcher.doc_count_all())
-    print('count', searcher.doc_count())
-
-    for doc in searcher.documents():
-        try:
-            print('num', count, 'id', doc['id'])
-            count += 1
-
-            story = doc['story']
-            story.pop('img', None)
-
-            if 'reddit.com/r/technology' in story['link']:
-                print('skipping r/technology')
-                continue
-
-            try:
-                database.put_story(story)
-            except database.IntegrityError:
-                print('collision!')
-                old_story = database.get_story_by_ref(story['ref'])
-                old_story = json.loads(old_story.full_json)
-                if story['num_comments'] > old_story['num_comments']:
-                    print('more comments, replacing')
-                    database_del_story_by_ref(story['ref'])
-                    database.put_story(story)
-                    search_del_story(old_story['id'])
-                else:
-                    print('fewer comments, skipping')
-                    continue
-
-            search.put_story(story)
-            print()
-        except KeyboardInterrupt:
-            break
-        except BaseException as e:
-            print('skipping', doc['id'])
-            print('reason:', e)
--- a/apiserver/requirements.txt
+++ b/apiserver/requirements.txt
@@ -25,6 +25,5 @@ urllib3==1.25.9
 webencodings==0.5.1
 websocket-client==0.57.0
 Werkzeug==1.0.1
-Whoosh==2.7.4
 zope.event==4.4
 zope.interface==5.1.0