qotnews/apiserver/migrate-whoosh-to-sqlite.py

import archive
import database
import search

import json
import requests

database.init()
archive.init()
search.init()

count = 0

def database_del_story_by_ref(ref):
    try:
        session = database.Session()
        session.query(database.Story).filter(database.Story.ref==ref).delete()
        session.commit()
    except:
        session.rollback()
        raise
    finally:
        session.close()

def search_del_story(sid):
    try:
        r = requests.delete(search.MEILI_URL + 'indexes/qotnews/documents/'+sid, timeout=2)
        if r.status_code != 202:
            raise Exception('Bad response code ' + str(r.status_code))
        return r.json()
    except KeyboardInterrupt:
        raise
    except BaseException as e:
        logging.error('Problem deleting MeiliSearch story: {}'.format(str(e)))
        return False

with archive.ix.searcher() as searcher:
    print('count all', searcher.doc_count_all())
    print('count', searcher.doc_count())

    for doc in searcher.documents():
        try:
            print('num', count, 'id', doc['id'])
            count += 1

            story = doc['story']
            story.pop('img', None)

            if 'reddit.com/r/technology' in story['link']:
                print('skipping r/technology')
                continue

            try:
                database.put_story(story)
            except database.IntegrityError:
                print('collision!')
                old_story = database.get_story_by_ref(story['ref'])
                old_story = json.loads(old_story.full_json)
                if story['num_comments'] > old_story['num_comments']:
                    print('more comments, replacing')
                    database_del_story_by_ref(story['ref'])
                    database.put_story(story)
                    search_del_story(old_story['id'])
                else:
                    print('fewer comments, skipping')
                    continue

            search.put_story(story)
            print()
        except KeyboardInterrupt:
            break
        except BaseException as e:
            print('skipping', doc['id'])
            print('reason:', e)