Merge branch 'master' of https://github.com/scambier/obsidian-omnisearch

# Conflicts: # src/settings.ts
2024-01-20 12:05:25 +01:00
parent 6187b5d76f e618d4ca47
commit 0f7418b22e
3 changed files with 41 additions and 3 deletions
@@ -13,6 +13,7 @@ import {
  isFileFromDataloomPlugin,
  isFileImage,
  isFilePDF,
+  isFileOffice,
  isFilePlaintext,
  isFilenameIndexable,
  logDebug,
@@ -106,6 +107,15 @@ async function getAndMapIndexedDocument(
    content = await extractor.extractText(file)
  }

+  // ** Office document **
+  else if (
+    isFileOffice(path) &&
+    settings.officeIndexing &&
+    extractor?.canFileBeExtracted(path)
+  ) {
+    content = await extractor.extractText(file)
+  }
+
  // ** Unsupported files **
  else if (isFilenameIndexable(path)) {
    content = file.path
@@ -37,6 +37,9 @@ export interface OmnisearchSettings extends WeightingSettings {
  PDFIndexing: boolean
  /** Enable Images indexing */
  imagesIndexing: boolean
+  /** Enable Office documents indexing */
+  officeIndexing: boolean
+
  /** Enable indexing of unknown files */
  unsupportedFilesIndexing: 'yes' | 'no' | 'default'
  /** Activate the small 🔍 button on Obsidian's ribbon */
@@ -158,11 +161,30 @@ export class SettingsTab extends PluginSettingTab {
      )
      .setDisabled(!getTextExtractor())

+    // Office Documents Indexing
+    const indexOfficesDesc = new DocumentFragment()
+    indexOfficesDesc.createSpan({}, span => {
+      span.innerHTML = `Omnisearch will use Text Extractor to index the content of your office documents (currently <pre style="display:inline">.docx</pre> and <pre style="display:inline">.xlsx</pre>)`
+    })
+    new Setting(containerEl)
+      .setName(
+        `Documents content indexing ${getTextExtractor() ? '' : '⚠️ Disabled'}`
+      )
+      .setDesc(indexOfficesDesc)
+      .addToggle(toggle =>
+        toggle.setValue(settings.officeIndexing).onChange(async v => {
+          await database.clearCache()
+          settings.officeIndexing = v
+          await saveSettings(this.plugin)
+        })
+      )
+      .setDisabled(!getTextExtractor())
+
    // Index filenames of unsupported files
    const indexUnsupportedDesc = new DocumentFragment()
    indexUnsupportedDesc.createSpan({}, span => {
      span.innerHTML = `
-      Omnisearch can index file<strong>names</strong> of "unsupported" files, such as e.g. <pre style="display:inline">.mp4</pre>, <pre style="display:inline">.xlsx</pre>, 
+      Omnisearch can index file<strong>names</strong> of "unsupported" files, such as e.g. <pre style="display:inline">.mp4</pre>
      or non-extracted PDFs & images.<br/>
      "Obsidian setting" will respect the value of "Files & Links > Detect all file extensions"`
    })
@@ -175,7 +197,7 @@ export class SettingsTab extends PluginSettingTab {
          .setValue(settings.unsupportedFilesIndexing)
          .onChange(async v => {
            await database.clearCache()
-            ;(settings.unsupportedFilesIndexing as any) = v
+              ; (settings.unsupportedFilesIndexing as any) = v
            await saveSettings(this.plugin)
          })
      })
@@ -185,7 +207,7 @@ export class SettingsTab extends PluginSettingTab {
    indexedFileTypesDesc.createSpan({}, span => {
      span.innerHTML = `In addition to standard <code>md</code> files, Omnisearch can also index other <strong style="color: var(--text-accent)">PLAINTEXT</strong> files.<br/>
      Add extensions separated by a space, without the dot. Example: "<code>txt org csv</code>".<br />
-      ⚠️ <span style="color: var(--text-accent)">Using extensions of non-plaintext files (like .docx or .pptx) WILL cause crashes,
+      ⚠️ <span style="color: var(--text-accent)">Using extensions of non-plaintext files (like .pptx) WILL cause crashes,
      because Omnisearch will try to index their content.</span>`
    })
    new Setting(containerEl)
@@ -602,6 +624,7 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = {
  ignoreDiacritics: true,
  indexedFileTypes: [] as string[],
  PDFIndexing: false,
+  officeIndexing: false,
  imagesIndexing: false,
  unsupportedFilesIndexing: 'no',
  splitCamelCase: false,
@@ -174,6 +174,11 @@ export function isFilePDF(path: string): boolean {
  return getExtension(path) === 'pdf'
 }

+export function isFileOffice(path: string): boolean {
+  const ext = getExtension(path)
+  return ext === 'docx' || ext === 'xlsx'
+}
+
 export function isFilePlaintext(path: string): boolean {
  return [...settings.indexedFileTypes, 'md'].some(t => path.endsWith(`.${t}`))
 }