Squashed commit of the following:

commit 603b9bbde4c6efc90c81032e4e765c64d3075e75 Author: Simon Cambier <simon.cambier@protonmail.com> Date: Tue Oct 11 21:47:03 2022 +0200 Basic PDF indexing ok commit 200331bb5c5111493af1e1f6ef8cd4bbfbdbfd4f Author: Simon Cambier <simon.cambier@protonmail.com> Date: Tue Oct 11 20:56:44 2022 +0200 Tweaks and comments commit 434b9662d40c5fea9d8b28d43828b11916db8c94 Author: Simon Cambier <simon.cambier@ores.be> Date: Tue Oct 11 16:22:55 2022 +0200 Refactoring notes & minisearch cache commit 7253c676c8ed161782ba8e33f0c4c162880925ad Author: Simon Cambier <simon.cambier@protonmail.com> Date: Tue Oct 11 09:50:33 2022 +0200 wip commit 77736e6ef6f28ccfddb64fb768732927d43bbd77 Author: Simon Cambier <simon.cambier@protonmail.com> Date: Mon Oct 10 20:49:02 2022 +0200 Small rewrites & deps updates commit 59845fdb89eb6a3ad3f3f9ad75b39e7a3e604c45 Author: Simon Cambier <simon.cambier@protonmail.com> Date: Mon Oct 10 12:22:11 2022 +0200 wasm + worker ok commit 1cf3b506e56147586cd0ebcc003642c5230e04cc Author: Simon Cambier <simon.cambier@protonmail.com> Date: Sun Oct 2 20:04:49 2022 +0200 no disk access, of course commit eb3dd9dd4f616a479a53e10856f6c96c6725e911 Author: Simon Cambier <simon.cambier@protonmail.com> Date: Sun Oct 2 19:08:48 2022 +0200 Rollup build ok commit 54f2b7e615456c0e1b1504691689d1ba2c72d9e8 Author: Simon Cambier <simon.cambier@protonmail.com> Date: Sun Oct 2 16:03:31 2022 +0200 Rollup build + wasm PoC
2022-10-11 21:54:11 +02:00
parent cf7f6af257
commit 7ddae6dc08
28 changed files with 18437 additions and 923 deletions
--- a/src/search.ts
+++ b/src/search.ts
@@ -4,12 +4,12 @@ import {
  chsRegex,
  type IndexedNote,
  type ResultNote,
-  searchIndexFilePath,
+  minisearchCacheFilePath,
  type SearchMatch,
  SPACE_OR_PUNCTUATION,
 } from './globals'
 import {
-  canIndexPDFs,
+  isFileIndexable,
  isFilePlaintext,
  removeDiacritics,
  stringsToRegex,
@@ -18,13 +18,15 @@ import {
 } from './utils'
 import type { Query } from './query'
 import { settings } from './settings'
-import {
-  getNoteFromCache,
-  isCacheOutdated,
-  loadNotesCache,
-  resetNotesCache,
-} from './notes'
-import {addToIndex, indexPDFs, removeFromIndex, saveIndexToFile} from './notes-index'
+// import {
+//   getNoteFromCache,
+//   isCacheOutdated,
+//   loadNotesCache,
+//   resetNotesCache,
+// } from './notes'
+import * as NotesIndex from './notes-index'
+import PQueue from 'p-queue-compat'
+import { cacheManager } from './cache-manager'

 export let minisearchInstance: MiniSearch<IndexedNote>

@@ -60,15 +62,18 @@ export async function initGlobalSearchIndex(): Promise<void> {
    storeFields: ['tags'],
  }

-  if (
-    settings.storeIndexInFile &&
-    (await app.vault.adapter.exists(searchIndexFilePath))
-  ) {
+  // Default instance
+  minisearchInstance = new MiniSearch(options)
+
+  // Load Minisearch cache, if it exists
+  if (await app.vault.adapter.exists(minisearchCacheFilePath)) {
    try {
-      const json = await app.vault.adapter.read(searchIndexFilePath)
-      minisearchInstance = MiniSearch.loadJSON(json, options)
+      const json = await cacheManager.readMinisearchIndex()
+      if (json) {
+        // If we have cache data, reload it
+        minisearchInstance = MiniSearch.loadJSON(json, options)
+      }
      console.log('Omnisearch - MiniSearch index loaded from the file')
-      await loadNotesCache()
    } catch (e) {
      console.trace(
        'Omnisearch - Could not load MiniSearch index from the file'
@@ -77,10 +82,9 @@ export async function initGlobalSearchIndex(): Promise<void> {
    }
  }

-  if (!minisearchInstance) {
-    minisearchInstance = new MiniSearch(options)
-    resetNotesCache()
-  }
+  // if (!minisearchInstance) {
+  //   resetNotesCache()
+  // }

  // Index files that are already present
  const start = new Date().getTime()
@@ -89,32 +93,28 @@ export async function initGlobalSearchIndex(): Promise<void> {

  let files
  let notesSuffix
-  if (settings.storeIndexInFile) {
-    files = allFiles.filter(file => isCacheOutdated(file))
+  if (settings.persistCache) {
+    files = allFiles.filter(file => cacheManager.isCacheOutdated(file))
    notesSuffix = 'modified notes'
  } else {
    files = allFiles
    notesSuffix = 'notes'
  }

-  console.log(`Omnisearch - indexing ${files.length} ${notesSuffix}`)
-
-  // This is basically the same behavior as MiniSearch's `addAllAsync()`.
-  // We index markdown and plaintext files by batches of 10
-  let promises: Promise<void>[] = []
-  for (let i = 0; i < files.length; ++i) {
-    const file = files[i]
-    if (getNoteFromCache(file.path)) {
-      removeFromIndex(file.path)
-    }
-    promises.push(addToIndex(file))
-    if (i % 10 === 0) {
-      await wait(1)
-      await Promise.all(promises)
-      promises = []
-    }
+  if (files.length > 0) {
+    console.log(`Omnisearch - Indexing ${files.length} ${notesSuffix}`)
  }
-  await Promise.all(promises)
+
+  // Read and index all the files into the search engine
+  const queue = new PQueue({ concurrency: 10 })
+  for (const file of files) {
+    if (cacheManager.getNoteFromCache(file.path)) {
+      NotesIndex.removeFromIndex(file.path)
+    }
+    queue.add(() => NotesIndex.addToIndexAndCache(file))
+  }
+
+  await queue.onEmpty()

  if (files.length > 0) {
    const message = `Omnisearch - Indexed ${files.length} ${notesSuffix} in ${
@@ -127,10 +127,10 @@ export async function initGlobalSearchIndex(): Promise<void> {
      new Notice(message)
    }

-    await saveIndexToFile()
+    await cacheManager.writeMinisearchIndex(minisearchInstance)

    // PDFs are indexed later, since they're heavier
-    await indexPDFs()
+    await NotesIndex.indexPDFs()
  }
 }

@@ -172,9 +172,10 @@ async function search(query: Query): Promise<SearchResult[]> {
  const exactTerms = query.getExactTerms()
  if (exactTerms.length) {
    results = results.filter(r => {
-      const title = getNoteFromCache(r.id)?.path.toLowerCase() ?? ''
+      const title =
+        cacheManager.getNoteFromCache(r.id)?.path.toLowerCase() ?? ''
      const content = stripMarkdownCharacters(
-        getNoteFromCache(r.id)?.content ?? ''
+        cacheManager.getNoteFromCache(r.id)?.content ?? ''
      ).toLowerCase()
      return exactTerms.every(q => content.includes(q) || title.includes(q))
    })
@@ -185,7 +186,7 @@ async function search(query: Query): Promise<SearchResult[]> {
  if (exclusions.length) {
    results = results.filter(r => {
      const content = stripMarkdownCharacters(
-        getNoteFromCache(r.id)?.content ?? ''
+        cacheManager.getNoteFromCache(r.id)?.content ?? ''
      ).toLowerCase()
      return exclusions.every(q => !content.includes(q.value))
    })
@@ -253,7 +254,7 @@ export async function getSuggestions(

  // Map the raw results to get usable suggestions
  return results.map(result => {
-    const note = getNoteFromCache(result.id)
+    const note = cacheManager.getNoteFromCache(result.id)
    if (!note) {
      throw new Error(`Note "${result.id}" not indexed`)
    }