Massive refactor to get rid of the global app.

2024-05-25 22:49:50 +02:00
parent bbe7b112ed
commit 6566a2e958
24 changed files with 1532 additions and 1146 deletions
--- a/src/cache-manager.ts
+++ b/src/cache-manager.ts
@@ -1,9 +1,5 @@
 import { Notice, TFile } from 'obsidian'
-import {
-  type DocumentRef,
-  getTextExtractor,
-  type IndexedDocument,
-} from './globals'
+import type { DocumentRef, IndexedDocument } from './globals'
 import {
  extractHeadingsFromCache,
  getAliasesFromMetadata,
@@ -11,168 +7,19 @@ import {
  isFileCanvas,
  isFileFromDataloomPlugin,
  isFileImage,
-  isFilePDF,
  isFileOffice,
-  isFilePlaintext,
-  isFilenameIndexable,
+  isFilePDF,
  logDebug,
  makeMD5,
  removeDiacritics,
  stripMarkdownCharacters,
 } from './tools/utils'
 import type { CanvasData } from 'obsidian/canvas'
-import type { AsPlainObject } from 'minisearch'
 import type MiniSearch from 'minisearch'
-import { getObsidianApp } from './stores/obsidian-app'
-import { OmnisearchCache } from './database'
-import { getSettings } from './settings'
+import type { AsPlainObject } from 'minisearch'
+import type OmnisearchPlugin from './main'

-/**
- * This function is responsible for extracting the text from a file and
- * returning it as an `IndexedDocument` object.
- * @param path
- */
-async function getAndMapIndexedDocument(
-  path: string
-): Promise<IndexedDocument> {
-  const app = getObsidianApp()
-  const settings = getSettings()
-  const file = app.vault.getAbstractFileByPath(path)
-  if (!file) throw new Error(`Invalid file path: "${path}"`)
-  if (!(file instanceof TFile)) throw new Error(`Not a TFile: "${path}"`)
-  let content: string | null = null
-
-  const extractor = getTextExtractor()
-
-  // ** Plain text **
-  // Just read the file content
-  if (isFilePlaintext(path)) {
-    content = await app.vault.cachedRead(file)
-  }
-
-  // ** Canvas **
-  // Extract the text fields from the json
-  else if (isFileCanvas(path)) {
-    const canvas = JSON.parse(await app.vault.cachedRead(file)) as CanvasData
-    let texts: string[] = []
-    // Concatenate text from the canvas fields
-    for (const node of canvas.nodes) {
-      if (node.type === 'text') {
-        texts.push(node.text)
-      } else if (node.type === 'file') {
-        texts.push(node.file)
-      }
-    }
-    for (const edge of canvas.edges.filter(e => !!e.label)) {
-      texts.push(edge.label!)
-    }
-    content = texts.join('\r\n')
-  }
-
-  // ** Dataloom plugin **
-  else if (isFileFromDataloomPlugin(path)) {
-    try {
-      const data = JSON.parse(await app.vault.cachedRead(file))
-      // data is a json object, we recursively iterate the keys
-      // and concatenate the values if the key is "markdown"
-      const texts: string[] = []
-      const iterate = (obj: any) => {
-        for (const key in obj) {
-          if (typeof obj[key] === 'object') {
-            iterate(obj[key])
-          } else if (key === 'content') {
-            texts.push(obj[key])
-          }
-        }
-      }
-      iterate(data)
-      content = texts.join('\r\n')
-    } catch (e) {
-      console.error('Omnisearch: Error while parsing Dataloom file', path)
-      console.error(e)
-    }
-  }
-
-  // ** Image **
-  else if (
-    isFileImage(path) &&
-    settings.imagesIndexing &&
-    extractor?.canFileBeExtracted(path)
-  ) {
-    content = await extractor.extractText(file)
-  }
-  // ** PDF **
-  else if (
-    isFilePDF(path) &&
-    settings.PDFIndexing &&
-    extractor?.canFileBeExtracted(path)
-  ) {
-    content = await extractor.extractText(file)
-  }
-
-  // ** Office document **
-  else if (
-    isFileOffice(path) &&
-    settings.officeIndexing &&
-    extractor?.canFileBeExtracted(path)
-  ) {
-    content = await extractor.extractText(file)
-  }
-
-  // ** Unsupported files **
-  else if (isFilenameIndexable(path)) {
-    content = file.path
-  }
-
-  if (content === null || content === undefined) {
-    // This shouldn't happen
-    console.warn(`Omnisearch: ${content} content for file`, file.path)
-    content = ''
-  }
-  const metadata = app.metadataCache.getFileCache(file)
-
-  // Look for links that lead to non-existing files,
-  // and add them to the index.
-  if (metadata) {
-    // // FIXME: https://github.com/scambier/obsidian-omnisearch/issues/129
-    // const nonExisting = getNonExistingNotes(file, metadata)
-    // for (const name of nonExisting.filter(
-    //   o => !cacheManager.getLiveDocument(o)
-    // )) {
-    //   NotesIndex.addNonExistingToIndex(name, file.path)
-    // }
-
-    // EXCALIDRAW
-    // Remove the json code
-    if (metadata.frontmatter?.['excalidraw-plugin']) {
-      const comments =
-        metadata.sections?.filter(s => s.type === 'comment') ?? []
-      for (const { start, end } of comments.map(c => c.position)) {
-        content =
-          content.substring(0, start.offset - 1) + content.substring(end.offset)
-      }
-    }
-  }
-
-  const tags = getTagsFromMetadata(metadata)
-  return {
-    basename: file.basename,
-    content,
-    /** Content without diacritics and markdown chars */
-    cleanedContent: stripMarkdownCharacters(removeDiacritics(content)),
-    path: file.path,
-    mtime: file.stat.mtime,
-
-    tags: tags,
-    unmarkedTags: tags.map(t => t.replace('#', '')),
-    aliases: getAliasesFromMetadata(metadata).join(''),
-    headings1: metadata ? extractHeadingsFromCache(metadata, 1).join(' ') : '',
-    headings2: metadata ? extractHeadingsFromCache(metadata, 2).join(' ') : '',
-    headings3: metadata ? extractHeadingsFromCache(metadata, 3).join(' ') : '',
-  }
-}
-
-class CacheManager {
+export class CacheManager {
  /**
   * Show an empty input field next time the user opens Omnisearch modal
   */
@@ -184,13 +31,15 @@ class CacheManager {
   */
  private documents: Map<string, IndexedDocument> = new Map()

+  constructor(private plugin: OmnisearchPlugin) {}
+
  /**
   * Set or update the live cache with the content of the given file.
   * @param path
   */
  public async addToLiveCache(path: string): Promise<void> {
    try {
-      const doc = await getAndMapIndexedDocument(path)
+      const doc = await this.getAndMapIndexedDocument(path)
      if (!doc.path) {
        console.error(
          `Missing .path field in IndexedDocument "${doc.basename}", skipping`
@@ -224,7 +73,8 @@ class CacheManager {
      return
    }
    this.nextQueryIsEmpty = false
-    const database = OmnisearchCache.getInstance()
+    // TODO: rename
+    const database = this.plugin.cache
    let history = await database.searchHistory.toArray()
    history = history.filter(s => s.query !== query).reverse()
    history.unshift({ query })
@@ -237,7 +87,7 @@ class CacheManager {
   * @returns The search history, in reverse chronological order
   */
  public async getSearchHistory(): Promise<ReadonlyArray<string>> {
-    const data = (await OmnisearchCache.getInstance().searchHistory.toArray())
+    const data = (await this.plugin.cache.searchHistory.toArray())
      .reverse()
      .map(o => o.query)
    if (this.nextQueryIsEmpty) {
@@ -246,8 +96,6 @@ class CacheManager {
    return data
  }

-  //#region Minisearch
-
  public getDocumentsChecksum(documents: IndexedDocument[]): string {
    return makeMD5(
      JSON.stringify(
@@ -263,13 +111,15 @@ class CacheManager {
    )
  }

+  //#region Minisearch
+
  public async getMinisearchCache(): Promise<{
    paths: DocumentRef[]
    data: AsPlainObject
  } | null> {
    try {
      const cachedIndex = (
-        await OmnisearchCache.getInstance().minisearch.toArray()
+        await this.plugin.cache.minisearch.toArray()
      )[0]
      return cachedIndex
    } catch (e) {
@@ -287,7 +137,8 @@ class CacheManager {
    indexed: Map<string, number>
  ): Promise<void> {
    const paths = Array.from(indexed).map(([k, v]) => ({ path: k, mtime: v }))
-    const database = OmnisearchCache.getInstance()
+    // TODO: rename
+    const database = this.plugin.cache
    await database.minisearch.clear()
    await database.minisearch.add({
      date: new Date().toISOString(),
@@ -297,7 +148,197 @@ class CacheManager {
    console.log('Omnisearch - Search cache written')
  }

-  //#endregion Minisearch
-}
+  public isFileIndexable(path: string): boolean {
+    return this.isFilenameIndexable(path) || this.isContentIndexable(path)
+  }

-export const cacheManager = new CacheManager()
+  //#endregion Minisearch
+
+  public isContentIndexable(path: string): boolean {
+    const settings = this.plugin.settings
+    const hasTextExtractor = !!this.plugin.getTextExtractor()
+    const canIndexPDF = hasTextExtractor && settings.PDFIndexing
+    const canIndexImages = hasTextExtractor && settings.imagesIndexing
+    return (
+      this.isFilePlaintext(path) ||
+      isFileCanvas(path) ||
+      isFileFromDataloomPlugin(path) ||
+      (canIndexPDF && isFilePDF(path)) ||
+      (canIndexImages && isFileImage(path))
+    )
+  }
+
+  public isFilenameIndexable(path: string): boolean {
+    return (
+      this.canIndexUnsupportedFiles() ||
+      this.isFilePlaintext(path) ||
+      isFileCanvas(path) ||
+      isFileFromDataloomPlugin(path)
+    )
+  }
+
+  public canIndexUnsupportedFiles(): boolean {
+    return (
+      this.plugin.settings.unsupportedFilesIndexing === 'yes' ||
+      (this.plugin.settings.unsupportedFilesIndexing === 'default' &&
+        !!this.plugin.app.vault.getConfig('showUnsupportedFiles'))
+    )
+  }
+
+  private isFilePlaintext(path: string): boolean {
+    return [...this.plugin.settings.indexedFileTypes, 'md'].some(t =>
+      path.endsWith(`.${t}`)
+    )
+  }
+
+  /**
+   * This function is responsible for extracting the text from a file and
+   * returning it as an `IndexedDocument` object.
+   * @param path
+   */
+  private async getAndMapIndexedDocument(
+    path: string
+  ): Promise<IndexedDocument> {
+    const app = this.plugin.app
+    const file = app.vault.getAbstractFileByPath(path)
+    if (!file) throw new Error(`Invalid file path: "${path}"`)
+    if (!(file instanceof TFile)) throw new Error(`Not a TFile: "${path}"`)
+    let content: string | null = null
+
+    const extractor = this.plugin.getTextExtractor()
+
+    // ** Plain text **
+    // Just read the file content
+    if (this.isFilePlaintext(path)) {
+      content = await app.vault.cachedRead(file)
+    }
+
+    // ** Canvas **
+    // Extract the text fields from the json
+    else if (isFileCanvas(path)) {
+      const canvas = JSON.parse(await app.vault.cachedRead(file)) as CanvasData
+      let texts: string[] = []
+      // Concatenate text from the canvas fields
+      for (const node of canvas.nodes) {
+        if (node.type === 'text') {
+          texts.push(node.text)
+        } else if (node.type === 'file') {
+          texts.push(node.file)
+        }
+      }
+      for (const edge of canvas.edges.filter(e => !!e.label)) {
+        texts.push(edge.label!)
+      }
+      content = texts.join('\r\n')
+    }
+
+    // ** Dataloom plugin **
+    else if (isFileFromDataloomPlugin(path)) {
+      try {
+        const data = JSON.parse(await app.vault.cachedRead(file))
+        // data is a json object, we recursively iterate the keys
+        // and concatenate the values if the key is "markdown"
+        const texts: string[] = []
+        const iterate = (obj: any) => {
+          for (const key in obj) {
+            if (typeof obj[key] === 'object') {
+              iterate(obj[key])
+            } else if (key === 'content') {
+              texts.push(obj[key])
+            }
+          }
+        }
+        iterate(data)
+        content = texts.join('\r\n')
+      } catch (e) {
+        console.error('Omnisearch: Error while parsing Dataloom file', path)
+        console.error(e)
+      }
+    }
+
+    // ** Image **
+    else if (
+      isFileImage(path) &&
+      this.plugin.settings.imagesIndexing &&
+      extractor?.canFileBeExtracted(path)
+    ) {
+      content = await extractor.extractText(file)
+    }
+    // ** PDF **
+    else if (
+      isFilePDF(path) &&
+      this.plugin.settings.PDFIndexing &&
+      extractor?.canFileBeExtracted(path)
+    ) {
+      content = await extractor.extractText(file)
+    }
+
+    // ** Office document **
+    else if (
+      isFileOffice(path) &&
+      this.plugin.settings.officeIndexing &&
+      extractor?.canFileBeExtracted(path)
+    ) {
+      content = await extractor.extractText(file)
+    }
+
+    // ** Unsupported files **
+    else if (this.isFilenameIndexable(path)) {
+      content = file.path
+    }
+
+    if (content === null || content === undefined) {
+      // This shouldn't happen
+      console.warn(`Omnisearch: ${content} content for file`, file.path)
+      content = ''
+    }
+    const metadata = app.metadataCache.getFileCache(file)
+
+    // Look for links that lead to non-existing files,
+    // and add them to the index.
+    if (metadata) {
+      // // FIXME: https://github.com/scambier/obsidian-omnisearch/issues/129
+      // const nonExisting = getNonExistingNotes(file, metadata)
+      // for (const name of nonExisting.filter(
+      //   o => !cacheManager.getLiveDocument(o)
+      // )) {
+      //   NotesIndex.addNonExistingToIndex(name, file.path)
+      // }
+
+      // EXCALIDRAW
+      // Remove the json code
+      if (metadata.frontmatter?.['excalidraw-plugin']) {
+        const comments =
+          metadata.sections?.filter(s => s.type === 'comment') ?? []
+        for (const { start, end } of comments.map(c => c.position)) {
+          content =
+            content.substring(0, start.offset - 1) +
+            content.substring(end.offset)
+        }
+      }
+    }
+
+    const tags = getTagsFromMetadata(metadata)
+    return {
+      basename: file.basename,
+      content,
+      /** Content without diacritics and markdown chars */
+      cleanedContent: stripMarkdownCharacters(removeDiacritics(content)),
+      path: file.path,
+      mtime: file.stat.mtime,
+
+      tags: tags,
+      unmarkedTags: tags.map(t => t.replace('#', '')),
+      aliases: getAliasesFromMetadata(metadata).join(''),
+      headings1: metadata
+        ? extractHeadingsFromCache(metadata, 1).join(' ')
+        : '',
+      headings2: metadata
+        ? extractHeadingsFromCache(metadata, 2).join(' ')
+        : '',
+      headings3: metadata
+        ? extractHeadingsFromCache(metadata, 3).join(' ')
+        : '',
+    }
+  }
+}