#254 - Index unsupported files

This commit is contained in:
Simon Cambier
2023-07-17 11:49:54 +02:00
parent 6da4a3e785
commit 1a109d6d89
4 changed files with 62 additions and 12 deletions

View File

@@ -12,6 +12,7 @@ import {
isFileCanvas,
isFileFromDataloomPlugin,
isFilePlaintext,
isFilenameIndexable,
logDebug,
makeMD5,
removeDiacritics,
@@ -87,8 +88,11 @@ async function getAndMapIndexedDocument(
// ** Image or PDF **
else if (extractor?.canFileBeExtracted(path)) {
content = await extractor.extractText(file)
} else {
throw new Error(`Unsupported file type: "${path}"`)
}
// ** Unsupported files **
else if (isFilenameIndexable(path)) {
content = file.path
}
if (content === null || content === undefined) {

View File

@@ -18,7 +18,6 @@
import { Query } from 'src/search/query'
import { openNote } from 'src/tools/notes'
import { searchEngine } from 'src/search/omnisearch'
import { cacheManager } from 'src/cache-manager'
export let modal: OmnisearchInFileModal
export let parent: OmnisearchVaultModal | null = null

View File

@@ -37,6 +37,8 @@ export interface OmnisearchSettings extends WeightingSettings {
imagesIndexing: boolean
/** Enable Dataloom indexing */
dataloomIndexing: boolean
/** Enable indexing of unknown files */
unsupportedFilesIndexing: 'yes' | 'no' | 'default'
/** Activate the small 🔍 button on Obsidian's ribbon */
ribbonIcon: boolean
/** Display the small contextual excerpt in search results */
@@ -163,28 +165,49 @@ export class SettingsTab extends PluginSettingTab {
)
.setDisabled(!getTextExtractor())
// Additional files to index
// Additional text files to index
const indexedFileTypesDesc = new DocumentFragment()
indexedFileTypesDesc.createSpan({}, span => {
span.innerHTML = `In addition to standard <code>md</code> files, Omnisearch can also index other <strong style="color: var(--text-accent)">plaintext</strong> files.<br/>
Add extensions separated by a space, without the dot. Example: "<code>txt org</code>".<br />
span.innerHTML = `In addition to standard <code>md</code> files, Omnisearch can also index other <strong style="color: var(--text-accent)">PLAINTEXT</strong> files.<br/>
Add extensions separated by a space, without the dot. Example: "<code>txt org csv</code>".<br />
⚠️ <span style="color: var(--text-accent)">Using extensions of non-plaintext files (like .docx or .pptx) WILL cause crashes,
because Omnisearch will try to index their content.</span><br />
${needsARestart}`
})
new Setting(containerEl)
.setName('Additional files to index')
.setName('Additional TEXT files to index')
.setDesc(indexedFileTypesDesc)
.addText(component => {
component
.setValue(settings.indexedFileTypes.join(' '))
.setPlaceholder('Example: txt org')
.setPlaceholder('Example: txt org csv')
.onChange(async v => {
settings.indexedFileTypes = v.split(' ')
await saveSettings(this.plugin)
})
})
// Unsupported files
const indexUnsupportedDesc = new DocumentFragment()
indexUnsupportedDesc.createSpan({}, span => {
span.innerHTML = `
Omnisearch can index file<strong>names</strong> of "unsupported" files, such as e.g. <pre style="display:inline">.mp4</pre> or <pre style="display:inline">.xlsx</pre>.<br/>
"Obsidian setting" will respect the value of "<em>Files & Links > Detect all file extensions</em>".
<br />${needsARestart}`
})
new Setting(containerEl)
.setName('Index unsupported files (beta)')
.setDesc(indexUnsupportedDesc)
.addDropdown(dropdown => {
dropdown
.addOptions({ yes: 'Yes', no: 'No', default: 'Obsidian setting' })
.setValue(settings.unsupportedFilesIndexing)
.onChange(async v => {
;(settings.unsupportedFilesIndexing as any) = v
await saveSettings(this.plugin)
})
})
//#endregion Indexing
//#region Behavior
@@ -505,6 +528,7 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = {
PDFIndexing: false,
imagesIndexing: false,
dataloomIndexing: false,
unsupportedFilesIndexing: 'no',
splitCamelCase: false,
openInNewPane: false,
@@ -539,5 +563,13 @@ export async function saveSettings(plugin: Plugin): Promise<void> {
}
export function isPluginDisabled(): boolean {
return app.loadLocalStorage(K_DISABLE_OMNISEARCH) == '1'
return app.loadLocalStorage(K_DISABLE_OMNISEARCH) === '1'
}
export function canIndexUnsupportedFiles(): boolean {
return (
settings.unsupportedFilesIndexing === 'yes' ||
(settings.unsupportedFilesIndexing === 'default' &&
!!app.vault.getConfig('showUnsupportedFiles'))
)
}

View File

@@ -18,7 +18,7 @@ import {
SPACE_OR_PUNCTUATION,
type SearchMatch,
} from '../globals'
import { settings } from '../settings'
import { canIndexUnsupportedFiles, settings } from '../settings'
import { type BinaryLike, createHash } from 'crypto'
import { md5 } from 'pure-md5'
@@ -26,7 +26,7 @@ export function highlighter(str: string): string {
return `<span class="${highlightClass}">${str}</span>`
}
export function highlighterGroups(substring: string, ...args: any[]) {
export function highlighterGroups(_substring: string, ...args: any[]) {
// args[0] is the single char preceding args[1], which is the word we want to highlight
if (!!args[1].trim())
return `<span>${args[0]}</span><span class="${highlightClass}">${args[1]}</span>`
@@ -263,7 +263,7 @@ export function getCtrlKeyLabel(): 'ctrl' | '⌘' {
return Platform.isMacOS ? '⌘' : 'ctrl'
}
export function isFileIndexable(path: string): boolean {
export function isContentIndexable(path: string): boolean {
const hasTextExtractor = !!getTextExtractor()
const canIndexPDF = hasTextExtractor && settings.PDFIndexing
const canIndexImages = hasTextExtractor && settings.imagesIndexing
@@ -276,6 +276,21 @@ export function isFileIndexable(path: string): boolean {
)
}
export function isFilenameIndexable(path: string): boolean {
return (
canIndexUnsupportedFiles() ||
isFilePlaintext(path) ||
isFileCanvas(path) ||
isFileFromDataloomPlugin(path) ||
isFilePDF(path) ||
isFileImage(path)
)
}
export function isFileIndexable(path: string): boolean {
return isFilenameIndexable(path) || isContentIndexable(path)
}
export function isFileImage(path: string): boolean {
const ext = getExtension(path)
return ext === 'png' || ext === 'jpg' || ext === 'jpeg' || ext === 'webp'