Refactored code to split search and index

2022-09-30 21:33:44 +02:00
parent 342e36aa08
commit d47309f576
6 changed files with 197 additions and 195 deletions
--- a/src/components/ModalVault.svelte
+++ b/src/components/ModalVault.svelte
@@ -5,13 +5,14 @@
  import ModalContainer from './ModalContainer.svelte'
  import { eventBus, type ResultNote } from 'src/globals'
  import { createNote, openNote } from 'src/notes'
-  import { getSuggestions, reindexNotes } from 'src/search'
+  import { getSuggestions } from 'src/search'
  import { getCtrlKeyLabel, getExtension, loopIndex } from 'src/utils'
  import { OmnisearchInFileModal, type OmnisearchVaultModal } from 'src/modals'
  import ResultItemVault from './ResultItemVault.svelte'
  import { Query } from 'src/query'
  import { saveSearchHistory, searchHistory } from 'src/search-history'
  import { settings } from '../settings'
+  import { reindexNotes } from '../notes-index'

  export let modal: OmnisearchVaultModal
  let selectedIndex = 0
--- a/src/main.ts
+++ b/src/main.ts
@@ -1,10 +1,5 @@
 import { Plugin, TFile } from 'obsidian'
-import {
-  addNoteToReindex,
-  addToIndex,
-  initGlobalSearchIndex,
-  removeFromIndex,
-} from './search'
+import { initGlobalSearchIndex } from './search'
 import { OmnisearchInFileModal, OmnisearchVaultModal } from './modals'
 import { loadSettings, settings, SettingsTab, showContext } from './settings'
 import { eventBus } from './globals'
@@ -12,6 +7,7 @@ import { registerAPI } from '@vanakat/plugin-api'
 import api from './api'
 import { loadSearchHistory } from './search-history'
 import { isFileIndexable } from './utils'
+import { addNoteToReindex, addToIndex, removeFromIndex } from './notes-index'

 function _registerAPI(plugin: OmnisearchPlugin): void {
  registerAPI('omnisearch', api, plugin as any)
--- a/src/notes-index.ts
+++ b/src/notes-index.ts
@@ -0,0 +1,180 @@
+import { Notice, TAbstractFile, TFile } from 'obsidian'
+import {
+  extractHeadingsFromCache,
+  getAliasesFromMetadata,
+  getTagsFromMetadata,
+  isFileIndexable,
+  removeDiacritics,
+  wait,
+} from './utils'
+import {
+  addNoteToCache,
+  getNonExistingNotes,
+  getNonExistingNotesFromCache,
+  getNoteFromCache,
+  removeAnchors,
+  removeNoteFromCache,
+  saveNotesCacheToFile,
+} from './notes'
+import { getPdfText } from './pdf-parser'
+import type { IndexedNote } from './globals'
+import { searchIndexFilePath } from './globals'
+import { settings } from './settings'
+import { minisearchInstance } from './search'
+
+let isIndexChanged: boolean
+
+/**
+ * Adds a file to the index
+ * @param file
+ * @returns
+ */
+export async function addToIndex(file: TAbstractFile): Promise<void> {
+  if (!(file instanceof TFile) || !isFileIndexable(file.path)) {
+    return
+  }
+
+  // Check if the file was already indexed as non-existent,
+  // and if so, remove it from the index (before adding it again)
+  if (getNoteFromCache(file.path)?.doesNotExist) {
+    removeFromIndex(file.path)
+  }
+
+  try {
+    // console.log(`Omnisearch - adding ${file.path} to index`)
+
+    // Look for links that lead to non-existing files,
+    // and index them as well
+    const metadata = app.metadataCache.getFileCache(file)
+    if (metadata) {
+      const nonExisting = getNonExistingNotes(file, metadata)
+      for (const name of nonExisting.filter(o => !getNoteFromCache(o))) {
+        addNonExistingToIndex(name, file.path)
+      }
+    }
+
+    if (getNoteFromCache(file.path)) {
+      throw new Error(`${file.basename} is already indexed`)
+    }
+
+    let content
+    if (file.path.endsWith('.pdf')) {
+      content = removeDiacritics(await getPdfText(file as TFile))
+    } else {
+      // Fetch content from the cache to index it as-is
+      content = removeDiacritics(await app.vault.cachedRead(file))
+    }
+
+    // Make the document and index it
+    const note: IndexedNote = {
+      basename: removeDiacritics(file.basename),
+      content,
+      path: file.path,
+      mtime: file.stat.mtime,
+
+      tags: getTagsFromMetadata(metadata),
+      aliases: getAliasesFromMetadata(metadata).join(''),
+      headings1: metadata
+        ? extractHeadingsFromCache(metadata, 1).join(' ')
+        : '',
+      headings2: metadata
+        ? extractHeadingsFromCache(metadata, 2).join(' ')
+        : '',
+      headings3: metadata
+        ? extractHeadingsFromCache(metadata, 3).join(' ')
+        : '',
+    }
+
+    minisearchInstance.add(note)
+    isIndexChanged = true
+    addNoteToCache(note.path, note)
+  } catch (e) {
+    console.trace('Error while indexing ' + file.basename)
+    console.error(e)
+  }
+}
+
+/**
+ * Index a non-existing note.
+ * Useful to find internal links that lead (yet) to nowhere
+ * @param name
+ * @param parent The note referencing the
+ */
+export function addNonExistingToIndex(name: string, parent: string): void {
+  name = removeAnchors(name)
+  const filename = name + (name.endsWith('.md') ? '' : '.md')
+  if (getNoteFromCache(filename)) return
+
+  const note = {
+    path: filename,
+    basename: name,
+    mtime: 0,
+
+    content: '',
+    aliases: '',
+    headings1: '',
+    headings2: '',
+    headings3: '',
+
+    doesNotExist: true,
+    parent,
+  } as IndexedNote
+  minisearchInstance.add(note)
+  isIndexChanged = true
+  addNoteToCache(filename, note)
+}
+
+/**
+ * Removes a file from the index, by its path
+ * @param path
+ */
+export function removeFromIndex(path: string): void {
+  if (!isFileIndexable(path)) {
+    console.info(`"${path}" is not an indexable file`)
+    return
+  }
+  const note = getNoteFromCache(path)
+  if (note) {
+    minisearchInstance.remove(note)
+    isIndexChanged = true
+    removeNoteFromCache(path)
+    getNonExistingNotesFromCache()
+      .filter(n => n.parent === path)
+      .forEach(n => {
+        removeFromIndex(n.path)
+      })
+  } else {
+    console.warn(`not not found under path ${path}`)
+  }
+}
+
+const notesToReindex = new Set<TAbstractFile>()
+
+export function addNoteToReindex(note: TAbstractFile): void {
+  notesToReindex.add(note)
+}
+
+export async function reindexNotes(): Promise<void> {
+  if (settings.showIndexingNotices && notesToReindex.size > 0) {
+    new Notice(`Omnisearch - Reindexing ${notesToReindex.size} notes`, 2000)
+  }
+  for (const note of notesToReindex) {
+    removeFromIndex(note.path)
+    await addToIndex(note)
+    await wait(0)
+  }
+  notesToReindex.clear()
+
+  await saveIndexToFile()
+}
+
+export async function saveIndexToFile(): Promise<void> {
+  if (settings.storeIndexInFile && minisearchInstance && isIndexChanged) {
+    const json = JSON.stringify(minisearchInstance)
+    await app.vault.adapter.write(searchIndexFilePath, json)
+    console.log('Omnisearch - Index saved on disk')
+
+    await saveNotesCacheToFile()
+    isIndexChanged = false
+  }
+}
--- a/src/notes.ts
+++ b/src/notes.ts
@@ -1,15 +1,10 @@
+import { type CachedMetadata, MarkdownView, TFile } from 'obsidian'
 import {
-  MarkdownView,
-  TFile,
-  WorkspaceLeaf,
-  type CachedMetadata,
-} from 'obsidian'
-import {
-  notesCacheFilePath,
  type IndexedNote,
+  notesCacheFilePath,
  type ResultNote,
 } from './globals'
-import { stringsToRegex, wait } from './utils'
+import { stringsToRegex } from './utils'
 import { settings } from './settings'

 /**
@@ -37,10 +32,7 @@ export async function loadNotesCache(): Promise<void> {
      console.error(e)
    }
  }
-
-  if (!notesCache) {
-    notesCache = {}
-  }
+  notesCache ||= {}
 }

 export function getNoteFromCache(key: string): IndexedNote | undefined {
--- a/src/pdf-parser.ts
+++ b/src/pdf-parser.ts
@@ -1,6 +1,3 @@
-// import PDFJs from 'pdfjs-dist'
-// import pdfjsWorker from 'pdfjs-dist/build/pdf.worker.entry'
-import type { TextItem } from 'pdfjs-dist/types/src/display/api'
 import type { TFile } from 'obsidian'
 import {loadPdfJs} from "obsidian";

@@ -14,7 +11,7 @@ export async function getPdfText(file: TFile): Promise<string> {
  const pageTexts = Array.from({ length: doc.numPages }, async (v, i) => {
    const page = await doc.getPage(i + 1)
    const content = await page.getTextContent()
-    return (content.items as TextItem[]).map(token => token.str).join('')
+    return (content.items as any[]).map(token => token.str).join('')
  })
  return (await Promise.all(pageTexts)).join('')
 }
--- a/src/search.ts
+++ b/src/search.ts
@@ -1,17 +1,14 @@
-import { Notice, TAbstractFile, TFile } from 'obsidian'
+import { Notice } from 'obsidian'
 import MiniSearch, { type Options, type SearchResult } from 'minisearch'
 import {
  chsRegex,
-  searchIndexFilePath,
-  SPACE_OR_PUNCTUATION,
  type IndexedNote,
  type ResultNote,
+  searchIndexFilePath,
  type SearchMatch,
+  SPACE_OR_PUNCTUATION,
 } from './globals'
 import {
-  extractHeadingsFromCache,
-  getAliasesFromMetadata,
-  getTagsFromMetadata,
  isFileIndexable,
  removeDiacritics,
  stringsToRegex,
@@ -21,21 +18,15 @@ import {
 import type { Query } from './query'
 import { settings } from './settings'
 import {
-  removeNoteFromCache,
  getNoteFromCache,
-  getNonExistingNotes,
-  resetNotesCache,
-  addNoteToCache,
-  removeAnchors,
-  getNonExistingNotesFromCache,
-  loadNotesCache,
-  saveNotesCacheToFile,
  isCacheOutdated,
+  loadNotesCache,
+  resetNotesCache,
 } from './notes'
-import { getPdfText } from './pdf-parser'
+import { addToIndex, removeFromIndex, saveIndexToFile } from './notes-index'
+
+export let minisearchInstance: MiniSearch<IndexedNote>

-let minisearchInstance: MiniSearch<IndexedNote>
-let isIndexChanged: boolean
 const tokenize = (text: string): string[] => {
  const tokens = text.split(SPACE_OR_PUNCTUATION)
  const chsSegmenter = (app as any).plugins.plugins['cm-chs-patch']
@@ -293,158 +284,3 @@ export async function getSuggestions(
    return resultNote
  })
 }
-
-/**
- * Adds a file to the index
- * @param file
- * @returns
- */
-export async function addToIndex(file: TAbstractFile): Promise<void> {
-  if (!(file instanceof TFile) || !isFileIndexable(file.path)) {
-    return
-  }
-
-  // Check if the file was already indexed as non-existent,
-  // and if so, remove it from the index (before adding it again)
-  if (getNoteFromCache(file.path)?.doesNotExist) {
-    removeFromIndex(file.path)
-  }
-
-  try {
-    // console.log(`Omnisearch - adding ${file.path} to index`)
-
-    // Look for links that lead to non-existing files,
-    // and index them as well
-    const metadata = app.metadataCache.getFileCache(file)
-    if (metadata) {
-      const nonExisting = getNonExistingNotes(file, metadata)
-      for (const name of nonExisting.filter(o => !getNoteFromCache(o))) {
-        addNonExistingToIndex(name, file.path)
-      }
-    }
-
-    if (getNoteFromCache(file.path)) {
-      throw new Error(`${file.basename} is already indexed`)
-    }
-
-    let content
-    if (file.path.endsWith('.pdf')) {
-      content = removeDiacritics(await getPdfText(file as TFile))
-    } else {
-      // Fetch content from the cache to index it as-is
-      content = removeDiacritics(await app.vault.cachedRead(file))
-    }
-
-    // Make the document and index it
-    const note: IndexedNote = {
-      basename: removeDiacritics(file.basename),
-      content,
-      path: file.path,
-      mtime: file.stat.mtime,
-
-      tags: getTagsFromMetadata(metadata),
-      aliases: getAliasesFromMetadata(metadata).join(''),
-      headings1: metadata
-        ? extractHeadingsFromCache(metadata, 1).join(' ')
-        : '',
-      headings2: metadata
-        ? extractHeadingsFromCache(metadata, 2).join(' ')
-        : '',
-      headings3: metadata
-        ? extractHeadingsFromCache(metadata, 3).join(' ')
-        : '',
-    }
-
-    minisearchInstance.add(note)
-    isIndexChanged = true
-    addNoteToCache(note.path, note)
-  } catch (e) {
-    console.trace('Error while indexing ' + file.basename)
-    console.error(e)
-  }
-}
-
-/**
- * Index a non-existing note.
- * Useful to find internal links that lead (yet) to nowhere
- * @param name
- * @param parent The note referencing the
- */
-export function addNonExistingToIndex(name: string, parent: string): void {
-  name = removeAnchors(name)
-  const filename = name + (name.endsWith('.md') ? '' : '.md')
-  if (getNoteFromCache(filename)) return
-
-  const note = {
-    path: filename,
-    basename: name,
-    mtime: 0,
-
-    content: '',
-    aliases: '',
-    headings1: '',
-    headings2: '',
-    headings3: '',
-
-    doesNotExist: true,
-    parent,
-  } as IndexedNote
-  minisearchInstance.add(note)
-  isIndexChanged = true
-  addNoteToCache(filename, note)
-}
-
-/**
- * Removes a file from the index, by its path
- * @param path
- */
-export function removeFromIndex(path: string): void {
-  if (!isFileIndexable(path)) {
-    console.info(`"${path}" is not an indexable file`)
-    return
-  }
-  const note = getNoteFromCache(path)
-  if (note) {
-    minisearchInstance.remove(note)
-    isIndexChanged = true
-    removeNoteFromCache(path)
-    getNonExistingNotesFromCache()
-      .filter(n => n.parent === path)
-      .forEach(n => {
-        removeFromIndex(n.path)
-      })
-  } else {
-    console.warn(`not not found under path ${path}`)
-  }
-}
-
-const notesToReindex = new Set<TAbstractFile>()
-
-export function addNoteToReindex(note: TAbstractFile): void {
-  notesToReindex.add(note)
-}
-
-export async function reindexNotes(): Promise<void> {
-  if (settings.showIndexingNotices && notesToReindex.size > 0) {
-    new Notice(`Omnisearch - Reindexing ${notesToReindex.size} notes`, 2000)
-  }
-  for (const note of notesToReindex) {
-    removeFromIndex(note.path)
-    await addToIndex(note)
-    await wait(0)
-  }
-  notesToReindex.clear()
-
-  await saveIndexToFile()
-}
-
-async function saveIndexToFile(): Promise<void> {
-  if (settings.storeIndexInFile && minisearchInstance && isIndexChanged) {
-    const json = JSON.stringify(minisearchInstance)
-    await app.vault.adapter.write(searchIndexFilePath, json)
-    console.log('Omnisearch - Index saved on disk')
-
-    await saveNotesCacheToFile()
-    isIndexChanged = false
-  }
-}