#254 - Index unsupported files

2023-07-17 11:49:54 +02:00
parent 6da4a3e785
commit 1a109d6d89
4 changed files with 62 additions and 12 deletions
--- a/src/cache-manager.ts
+++ b/src/cache-manager.ts
@@ -12,6 +12,7 @@ import {
  isFileCanvas,
  isFileFromDataloomPlugin,
  isFilePlaintext,
+  isFilenameIndexable,
  logDebug,
  makeMD5,
  removeDiacritics,
@@ -87,8 +88,11 @@ async function getAndMapIndexedDocument(
  // ** Image or PDF **
  else if (extractor?.canFileBeExtracted(path)) {
    content = await extractor.extractText(file)
-  } else {
-    throw new Error(`Unsupported file type: "${path}"`)
+  }
+  
+  // ** Unsupported files **
+  else if (isFilenameIndexable(path)) {
+    content = file.path
  }

  if (content === null || content === undefined) {
--- a/src/components/ModalInFile.svelte
+++ b/src/components/ModalInFile.svelte
@@ -18,7 +18,6 @@
  import { Query } from 'src/search/query'
  import { openNote } from 'src/tools/notes'
  import { searchEngine } from 'src/search/omnisearch'
-  import { cacheManager } from 'src/cache-manager'

  export let modal: OmnisearchInFileModal
  export let parent: OmnisearchVaultModal | null = null
--- a/src/settings.ts
+++ b/src/settings.ts
@@ -37,6 +37,8 @@ export interface OmnisearchSettings extends WeightingSettings {
  imagesIndexing: boolean
  /** Enable Dataloom indexing */
  dataloomIndexing: boolean
+  /** Enable indexing of unknown files */
+  unsupportedFilesIndexing: 'yes' | 'no' | 'default'
  /** Activate the small 🔍 button on Obsidian's ribbon */
  ribbonIcon: boolean
  /** Display the small contextual excerpt in search results */
@@ -163,28 +165,49 @@ export class SettingsTab extends PluginSettingTab {
      )
      .setDisabled(!getTextExtractor())

-    // Additional files to index
+    // Additional text files to index
    const indexedFileTypesDesc = new DocumentFragment()
    indexedFileTypesDesc.createSpan({}, span => {
-      span.innerHTML = `In addition to standard <code>md</code> files, Omnisearch can also index other <strong style="color: var(--text-accent)">plaintext</strong> files.<br/>
-      Add extensions separated by a space, without the dot. Example: "<code>txt org</code>".<br />
+      span.innerHTML = `In addition to standard <code>md</code> files, Omnisearch can also index other <strong style="color: var(--text-accent)">PLAINTEXT</strong> files.<br/>
+      Add extensions separated by a space, without the dot. Example: "<code>txt org csv</code>".<br />
      ⚠️ <span style="color: var(--text-accent)">Using extensions of non-plaintext files (like .docx or .pptx) WILL cause crashes,
      because Omnisearch will try to index their content.</span><br />
      ${needsARestart}`
    })
    new Setting(containerEl)
-      .setName('Additional files to index')
+      .setName('Additional TEXT files to index')
      .setDesc(indexedFileTypesDesc)
      .addText(component => {
        component
          .setValue(settings.indexedFileTypes.join(' '))
-          .setPlaceholder('Example: txt org')
+          .setPlaceholder('Example: txt org csv')
          .onChange(async v => {
            settings.indexedFileTypes = v.split(' ')
            await saveSettings(this.plugin)
          })
      })

+    // Unsupported files
+    const indexUnsupportedDesc = new DocumentFragment()
+    indexUnsupportedDesc.createSpan({}, span => {
+      span.innerHTML = `
+      Omnisearch can index file<strong>names</strong> of "unsupported" files, such as e.g. <pre style="display:inline">.mp4</pre> or <pre style="display:inline">.xlsx</pre>.<br/>
+      "Obsidian setting" will respect the value of "<em>Files & Links > Detect all file extensions</em>".
+      <br />${needsARestart}`
+    })
+    new Setting(containerEl)
+      .setName('Index unsupported files (beta)')
+      .setDesc(indexUnsupportedDesc)
+      .addDropdown(dropdown => {
+        dropdown
+          .addOptions({ yes: 'Yes', no: 'No', default: 'Obsidian setting' })
+          .setValue(settings.unsupportedFilesIndexing)
+          .onChange(async v => {
+            ;(settings.unsupportedFilesIndexing as any) = v
+            await saveSettings(this.plugin)
+          })
+      })
+
    //#endregion Indexing

    //#region Behavior
@@ -505,6 +528,7 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = {
  PDFIndexing: false,
  imagesIndexing: false,
  dataloomIndexing: false,
+  unsupportedFilesIndexing: 'no',
  splitCamelCase: false,
  openInNewPane: false,

@@ -539,5 +563,13 @@ export async function saveSettings(plugin: Plugin): Promise<void> {
 }

 export function isPluginDisabled(): boolean {
-  return app.loadLocalStorage(K_DISABLE_OMNISEARCH) == '1'
+  return app.loadLocalStorage(K_DISABLE_OMNISEARCH) === '1'
+}
+
+export function canIndexUnsupportedFiles(): boolean {
+  return (
+    settings.unsupportedFilesIndexing === 'yes' ||
+    (settings.unsupportedFilesIndexing === 'default' &&
+      !!app.vault.getConfig('showUnsupportedFiles'))
+  )
 }
--- a/src/tools/utils.ts
+++ b/src/tools/utils.ts
@@ -18,7 +18,7 @@ import {
  SPACE_OR_PUNCTUATION,
  type SearchMatch,
 } from '../globals'
-import { settings } from '../settings'
+import { canIndexUnsupportedFiles, settings } from '../settings'
 import { type BinaryLike, createHash } from 'crypto'
 import { md5 } from 'pure-md5'

@@ -26,7 +26,7 @@ export function highlighter(str: string): string {
  return `<span class="${highlightClass}">${str}</span>`
 }

-export function highlighterGroups(substring: string, ...args: any[]) {
+export function highlighterGroups(_substring: string, ...args: any[]) {
  // args[0] is the single char preceding args[1], which is the word we want to highlight
  if (!!args[1].trim())
    return `<span>${args[0]}</span><span class="${highlightClass}">${args[1]}</span>`
@@ -263,7 +263,7 @@ export function getCtrlKeyLabel(): 'ctrl' | '⌘' {
  return Platform.isMacOS ? '⌘' : 'ctrl'
 }

-export function isFileIndexable(path: string): boolean {
+export function isContentIndexable(path: string): boolean {
  const hasTextExtractor = !!getTextExtractor()
  const canIndexPDF = hasTextExtractor && settings.PDFIndexing
  const canIndexImages = hasTextExtractor && settings.imagesIndexing
@@ -276,6 +276,21 @@ export function isFileIndexable(path: string): boolean {
  )
 }

+export function isFilenameIndexable(path: string): boolean {
+  return (
+    canIndexUnsupportedFiles() ||
+    isFilePlaintext(path) ||
+    isFileCanvas(path) ||
+    isFileFromDataloomPlugin(path) ||
+    isFilePDF(path) ||
+    isFileImage(path)    
+  )
+}
+
+export function isFileIndexable(path: string): boolean {
+  return isFilenameIndexable(path) || isContentIndexable(path)
+}
+
 export function isFileImage(path: string): boolean {
  const ext = getExtension(path)
  return ext === 'png' || ext === 'jpg' || ext === 'jpeg' || ext === 'webp'