Remove Whoosh

2020-11-02 00:22:40 +00:00 · 2020-11-02 00:22:40 +00:00 · e59acefda9
commit e59acefda9
parent cbc802b7e9
5 changed files with 0 additions and 153 deletions
--- a/apiserver/archive.py
+++ b/apiserver/archive.py
@ -1,52 +0,0 @@
 from whoosh.analysis import StemmingAnalyzer, CharsetFilter, NgramFilter
 from whoosh.index import create_in, open_dir, exists_in
 from whoosh.fields import *
 from whoosh.qparser import QueryParser
 from whoosh.support.charset import accent_map
 analyzer = StemmingAnalyzer() | CharsetFilter(accent_map) | NgramFilter(minsize=3)
 title_field = TEXT(analyzer=analyzer, stored=True)
 id_field = ID(unique=True, stored=True)
 schema = Schema(
        id=id_field,
        title=title_field,
        story=STORED,
        )
 ARCHIVE_LOCATION = 'data/archive'
 ix = None
 def init():
    global ix
    if exists_in(ARCHIVE_LOCATION):
        ix = open_dir(ARCHIVE_LOCATION)
    else:
        ix = create_in(ARCHIVE_LOCATION, schema)
 def update(story):
    writer = ix.writer()
    writer.update_document(
            id=story['id'],
            title=story['title'],
            story=story,
            )
    writer.commit()
 def get_story(sid):
    with ix.searcher() as searcher:
        result = searcher.document(id=sid)
        return result['story'] if result else None
 def search(search):
    with ix.searcher() as searcher:
        query = QueryParser('title', ix.schema).parse(search)
        results = searcher.search(query)
        stories = [r['story'] for r in results]
        for s in stories:
            s.pop('text', '')
            s.pop('comments', '')
        return stories
--- a/apiserver/data/archive/.gitkeep
+++ b/apiserver/data/archive/.gitkeep
--- a/apiserver/migrate-shelve-to-whoosh.py
+++ b/apiserver/migrate-shelve-to-whoosh.py
@ -1,26 +0,0 @@
 import shelve
 import archive
 archive.init()
 #with shelve.open('data/data') as db:
 #    to_delete = []
 #
 #    for s in db.values():
 #        if 'title' in s:
 #            archive.update(s)
 #        if 'id' in s:
 #            to_delete.append(s['id'])
 #
 #    for id in to_delete:
 #        del db[id]
 #
 #    for s in db['news_cache'].values():
 #        if 'title' in s:
 #            archive.update(s)
 #with shelve.open('data/whoosh') as db:
 #    for s in db['news_cache'].values():
 #        if 'title' in s and not archive.get_story(s['id']):
 #            archive.update(s)
--- a/apiserver/migrate-whoosh-to-sqlite.py
+++ b/apiserver/migrate-whoosh-to-sqlite.py
@ -1,74 +0,0 @@
 import archive
 import database
 import search
 import json
 import requests
 database.init()
 archive.init()
 search.init()
 count = 0
 def database_del_story_by_ref(ref):
    try:
        session = database.Session()
        session.query(database.Story).filter(database.Story.ref==ref).delete()
        session.commit()
    except:
        session.rollback()
        raise
    finally:
        session.close()
 def search_del_story(sid):
    try:
        r = requests.delete(search.MEILI_URL + 'indexes/qotnews/documents/'+sid, timeout=2)
        if r.status_code != 202:
            raise Exception('Bad response code ' + str(r.status_code))
        return r.json()
    except KeyboardInterrupt:
        raise
    except BaseException as e:
        logging.error('Problem deleting MeiliSearch story: {}'.format(str(e)))
        return False
 with archive.ix.searcher() as searcher:
    print('count all', searcher.doc_count_all())
    print('count', searcher.doc_count())
    for doc in searcher.documents():
        try:
            print('num', count, 'id', doc['id'])
            count += 1
            story = doc['story']
            story.pop('img', None)
            if 'reddit.com/r/technology' in story['link']:
                print('skipping r/technology')
                continue
            try:
                database.put_story(story)
            except database.IntegrityError:
                print('collision!')
                old_story = database.get_story_by_ref(story['ref'])
                old_story = json.loads(old_story.full_json)
                if story['num_comments'] > old_story['num_comments']:
                    print('more comments, replacing')
                    database_del_story_by_ref(story['ref'])
                    database.put_story(story)
                    search_del_story(old_story['id'])
                else:
                    print('fewer comments, skipping')
                    continue
            search.put_story(story)
            print()
        except KeyboardInterrupt:
            break
        except BaseException as e:
            print('skipping', doc['id'])
            print('reason:', e)
--- a/apiserver/requirements.txt
+++ b/apiserver/requirements.txt
@ -25,6 +25,5 @@ urllib3==1.25.9
 webencodings==0.5.1
 websocket-client==0.57.0
 Werkzeug==1.0.1
 Whoosh==2.7.4
 zope.event==4.4
 zope.interface==5.1.0