Merge branch 'develop' into feature/ocr

2022-11-02 14:43:13 +01:00
parent b8118957cb a6342a675f
commit 1d02c8617a
4 changed files with 66 additions and 18 deletions
@@ -14,17 +14,20 @@ Under the hood, it uses the excellent [MiniSearch](https://github.com/lucaong/mi

 ## Features

+- Find your notes faster than ever
+    - Workflow similar to the "Quick Switcher" core plugin
 - Automatic document scoring using the [BM25 algorithm](https://github.com/lucaong/minisearch/issues/129#issuecomment-1046257399)
  - The relevance of a document against a query depends on the number of times the query terms appear in the document, its filename, and its headings
- Can search other plaintext files and PDFs (configurable in settings)
- Workflow similar to "Quick Switcher" plugins
+- Can search other plaintext files and PDFs
+    - Opt-in in settings
+    - PDF indexing is disabled on iOS
 - Keyboard first: you never have to use your mouse
 - Resistance to typos
 - Switch between Vault and In-file search to quickly skim multiple results in a single note
 - Supports `"expressions in quotes"` and `-exclusions`
 - Directly Insert a `[[link]]` from the search results
 - Respects Obsidian's "Excluded Files" list - results are downranked, not hidden
- Optional support for Vim navigation keys (ctrl + j, k, n, p)
+- Supports Vim navigation keys (ctrl + j, k, n, p)

 **Note:** support of Chinese, Japanese, Korean, etc. depends on [this additional plugin](https://github.com/aidenlx/cm-chs-patch). Please read its documentation for more information.

@@ -121,17 +124,15 @@ See [styles.css](./assets/styles.css) for more information.

 **Omnisearch makes Obsidian sluggish at startup.**

- You may have _big_ documents. Huge notes (like novels) can freeze the interface for a short time when being indexed. Enabling the setting "_Persist cache on disk_" may help you in this case.
+- You may have _big_ documents. Huge notes (like novels) can freeze the interface for a short time when being indexed. While Omnisearch uses a cache between sessions, it's still rebuilt at startup to keep it up-to-date.

-**I have thousands of notes, and at startup I have to wait a few seconds before making a query, or else Omnisearch does not return all the expected results.**
+**I have thousands of notes, and at startup I have to wait a few seconds before Omnisearch gives me the context of a result.**

- Enabling the setting "_Persist cache on disk_" may help you in this case.
+- Omnisearch refreshes its index at startup. During this time, you can still find notes, but Omnisearch is not able to show you the excerpts.

 **Omnisearch gives inconsistent/invalid results, or there are errors in the developer console.**

- Go in Omnisearch settings.
- If applicable, disable and re-enable "*Persist cache on disk*".
- Restart Obsidian to clear the cache and force a reindex.
+- Restart Obsidian to force a reindex of Omnisearch

 **A query should return a result that does not appear.**

@@ -1,6 +1,35 @@
 import Dexie from 'dexie'

-class OmnisearchCache extends Dexie {
+export class OmnisearchCache extends Dexie {
+  public static readonly dbVersion = 6
+  public static readonly dbPrefix = 'omnisearch/cache/'
+  public static readonly dbName = OmnisearchCache.dbPrefix + app.appId
+
+  private static instance: OmnisearchCache
+
+  /**
+   * Deletes Omnisearch databases that have an older version than the current one
+   */
+  public static async clearOldDatabases(): Promise<void> {
+    const toDelete = (await indexedDB.databases()).filter(
+      db =>
+        db.name?.startsWith(OmnisearchCache.dbPrefix) &&
+        // version multiplied by 10 https://github.com/dexie/Dexie.js/issues/59
+        db.version !== OmnisearchCache.dbVersion * 10
+    )
+    if (toDelete.length) {
+      console.log('Omnisearch - Those IndexedDb databases will be deleted:')
+      for (const db of toDelete) {
+        if (db.name) {
+          console.log(db.name + ' ' + db.version)
+          indexedDB.deleteDatabase(db.name)
+        }
+      }
+    }
+  }
+
+  //#region Table declarations
+
  pdf!: Dexie.Table<
    { path: string; hash: string; size: number; text: string },
    string
@@ -8,9 +37,19 @@ class OmnisearchCache extends Dexie {
  searchHistory!: Dexie.Table<{ id?: number; query: string }, number>
  minisearch!: Dexie.Table<{ date: string; data: string }, string>

-  constructor() {
-    super('omnisearch/cache/' + app.appId)
-    this.version(5).stores({
+  //#endregion Table declarations
+
+  public static getInstance() {
+    if (!OmnisearchCache.instance) {
+      OmnisearchCache.instance = new OmnisearchCache()
+    }
+    return OmnisearchCache.instance
+  }
+
+  private constructor() {
+    super(OmnisearchCache.dbName)
+    // Database structure
+    this.version(OmnisearchCache.dbVersion).stores({
      pdf: 'path, hash, size',
      searchHistory: '++id',
      minisearch: 'date',
@@ -18,4 +57,4 @@ class OmnisearchCache extends Dexie {
  }
 }

-export const database = new OmnisearchCache()
+export const database = OmnisearchCache.getInstance()
@@ -11,12 +11,14 @@ import api from './tools/api'
 import { isFilePlaintext, wait } from './tools/utils'
 import * as NotesIndex from './notes-index'
 import * as FileLoader from './file-loader'
+import { OmnisearchCache } from './database'

 export default class OmnisearchPlugin extends Plugin {
  private ribbonButton?: HTMLElement

  async onload(): Promise<void> {
    await cleanOldCacheFiles()
+    await OmnisearchCache.clearOldDatabases()
    await loadSettings(this)

    // Initialize minisearch
@@ -105,6 +107,14 @@ export default class OmnisearchPlugin extends Plugin {
 * Read the files and feed them to Minisearch
 */
 async function populateIndex(): Promise<void> {
+  // We use a tmp minisearch instance to leave the main instance mostly untouched.
+  // Otherwise, we'd have to clear the main instance, and (asynchronously) load the notes.
+  // That would cause a "downtime" in Omnisearch while the index is being gradually rebuilt.
+  //
+  // With the tmp method, we still have access to the cache data while all the
+  // fresh indexing is done in the background.
+  // Once all notes are loaded in tmp, we (synchronously) export tmp and import it into main.
+  // That can cause a small freeze, but no downtime.
  const tmpEngine = SearchEngine.getTmpEngine()

  // Load plain text files
@@ -123,7 +133,7 @@ async function populateIndex(): Promise<void> {
    console.time('Omnisearch - Timing')
    const pdfs = await FileLoader.getPDFFiles()
    // Index them
-    await tmpEngine.addAllToMinisearch(pdfs)
+    await SearchEngine.getEngine().addAllToMinisearch(pdfs)
    console.log(`Omnisearch - Indexed ${pdfs.length} PDFs`)
    console.timeEnd('Omnisearch - Timing')
  }
@@ -141,7 +151,7 @@ async function populateIndex(): Promise<void> {
  // Load PDFs into the main search engine, and write cache
  SearchEngine.loadTmpDataIntoMain()
  SearchEngine.isIndexing.set(false)
-  await tmpEngine.writeToCache()
+  await SearchEngine.getEngine().writeToCache()

  // Clear memory
  SearchEngine.clearTmp()
@@ -317,8 +317,6 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = {
  weightH2: 1.3,
  weightH3: 1.1,

-  // persistCache: false,
-
  welcomeMessage: '',
 } as const