From 887dc669f382a42c59c875a07e565dbef6fbeac4 Mon Sep 17 00:00:00 2001 From: Simon Cambier Date: Sat, 17 Jun 2023 18:08:39 +0200 Subject: [PATCH 01/11] #225 - hyphenated words should now be indexed whole _and_ split --- src/globals.ts | 2 +- src/search/omnisearch.ts | 16 +++++++++++----- src/tools/utils.ts | 25 +++++++++++++++++++++++-- 3 files changed, 35 insertions(+), 8 deletions(-) diff --git a/src/globals.ts b/src/globals.ts index db14f9b..cba6c3a 100644 --- a/src/globals.ts +++ b/src/globals.ts @@ -106,4 +106,4 @@ export function isCacheEnabled(): boolean { } export const SPACE_OR_PUNCTUATION = - /[|\n\r -#%-*,-/:;?@[-\]_{}\u00A0\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C77\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166E\u1680\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2000-\u200A\u2010-\u2029\u202F-\u2043\u2045-\u2051\u2053-\u205F\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4F\u3000-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]+/u + /[|\n\r -#%-*,.\/:;?@[-\]_{}\u00A0\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C77\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166E\u1680\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2000-\u200A\u2010-\u2029\u202F-\u2043\u2045-\u2051\u2053-\u205F\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4F\u3000-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]+/u diff --git a/src/search/omnisearch.ts b/src/search/omnisearch.ts index 3094682..bc65cff 100644 --- a/src/search/omnisearch.ts +++ b/src/search/omnisearch.ts @@ -12,6 +12,7 @@ import { logDebug, removeDiacritics, splitCamelCase, + splitHyphens, stringsToRegex, stripMarkdownCharacters, warnDebug, @@ -22,17 +23,22 @@ import { cacheManager } from '../cache-manager' import { sortBy } from 'lodash-es' const tokenize = (text: string): string[] => { - const tokens = text.split(SPACE_OR_PUNCTUATION) + let tokens = text.split(SPACE_OR_PUNCTUATION) + + // When enabled, we only use the chsSegmenter, + // and not the other custom tokenizers const chsSegmenter = getChsSegmenter() if (chsSegmenter) { - return tokens.flatMap(word => + tokens = tokens.flatMap(word => chsRegex.test(word) ? chsSegmenter.cut(word) : [word] ) } else { - if (settings.splitCamelCase) - return [...tokens, ...tokens.flatMap(splitCamelCase)] - return tokens + // Split camelCase tokens into "camel" and "case + tokens = [...tokens, ...tokens.flatMap(splitCamelCase)] + // Split hyphenated tokens + tokens = [...tokens, ...tokens.flatMap(splitHyphens)] } + return tokens } export class Omnisearch { diff --git a/src/tools/utils.ts b/src/tools/utils.ts index be6a6d5..719d308 100644 --- a/src/tools/utils.ts +++ b/src/tools/utils.ts @@ -318,11 +318,32 @@ export function chunkArray(arr: T[], len: number): T[][] { } /** - * Converts a 'fooBarBAZLorem' into ['foo', 'Bar', 'BAZ', 'Lorem] + * Converts a 'fooBarBAZLorem' into ['foo', 'Bar', 'BAZ', 'Lorem'] + * If the string isn't camelCase, returns an empty array * @param text */ export function splitCamelCase(text: string): string[] { - return text.replace(/([a-z](?=[A-Z]))/g, '$1 ').split(' ') + const split = text + .replace(/([a-z](?=[A-Z]))/g, '$1 ') + .split(' ') + .filter(t => t) + if (split.length > 1) { + return split + } + return [] +} + +/** + * Converts a 'foo-bar-baz' into ['foo', 'bar', 'baz'] + * If the string isn't hyphenated, returns an empty array + * @param text + */ +export function splitHyphens(text: string): string[] { + const split = text.split('-').filter(t => t) + if (split.length > 1) { + return split + } + return [] } export function logDebug(...args: any[]): void { From 22fa98a44b0873886b80661a3a5d0df7db4c238e Mon Sep 17 00:00:00 2001 From: Simon Cambier Date: Sat, 17 Jun 2023 19:18:30 +0200 Subject: [PATCH 02/11] #225 - improved highlighting --- src/tools/utils.ts | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/tools/utils.ts b/src/tools/utils.ts index 719d308..19699d1 100644 --- a/src/tools/utils.ts +++ b/src/tools/utils.ts @@ -26,14 +26,10 @@ export function highlighter(str: string): string { return `${str}` } -export function highlighterGroups(...args: any[]) { - if ( - args[1] !== null && - args[1] !== undefined && - args[2] !== null && - args[2] !== undefined - ) - return `${args[1]}${args[2]}` +export function highlighterGroups(substring: string, ...args: any[]) { + // args[0] is the single char preceding args[1], which is the word we want to highlight + if (!!args[1].trim()) + return `${args[0]}${args[1]}` return '<no content>' } @@ -91,15 +87,20 @@ export function getAllIndices(text: string, regex: RegExp): SearchMatch[] { */ export function stringsToRegex(strings: string[]): RegExp { if (!strings.length) return /^$/g + + // sort strings by decreasing length, so that longer strings are matched first + strings.sort((a, b) => b.length - a.length) + const joined = '(' + // Default word split is not applied if the user uses the cm-chs-patch plugin (getChsSegmenter() ? '' : // Split on start of line, spaces, punctuation, or capital letters (for camelCase) + // We also add the hyphen to the list of characters that can split words settings.splitCamelCase - ? `^|${SPACE_OR_PUNCTUATION.source}|[A-Z]` - : `^|${SPACE_OR_PUNCTUATION.source}`) + + ? `^|${SPACE_OR_PUNCTUATION.source}|\-|[A-Z]` + : `^|${SPACE_OR_PUNCTUATION.source}|\-`) + ')' + `(${strings.map(s => escapeRegex(s)).join('|')})` From 5c3d8277e718772217ab6630dc835cd59953cef3 Mon Sep 17 00:00:00 2001 From: Simon Cambier Date: Sat, 17 Jun 2023 19:26:05 +0200 Subject: [PATCH 03/11] 1.15.0-beta.1 --- manifest-beta.json | 2 +- package.json | 2 +- versions.json | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/manifest-beta.json b/manifest-beta.json index 72ef329..7aa2cd1 100644 --- a/manifest-beta.json +++ b/manifest-beta.json @@ -1,7 +1,7 @@ { "id": "omnisearch", "name": "Omnisearch", - "version": "1.14.2", + "version": "1.15.0-beta.1", "minAppVersion": "1.0.0", "description": "A search engine that just works", "author": "Simon Cambier", diff --git a/package.json b/package.json index 737eb48..24b73c3 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "scambier.obsidian-search", - "version": "1.14.2", + "version": "1.15.0-beta.1", "description": "A search engine for Obsidian", "main": "dist/main.js", "scripts": { diff --git a/versions.json b/versions.json index 29feac9..24ae378 100644 --- a/versions.json +++ b/versions.json @@ -109,5 +109,6 @@ "1.14.1-beta.1": "1.0.0", "1.14.1-beta.2": "1.0.0", "1.14.1": "1.0.0", - "1.14.2": "1.0.0" + "1.14.2": "1.0.0", + "1.15.0-beta.1": "1.0.0" } \ No newline at end of file From be2a724c0c8149c030a43b876c0b0648ac38f346 Mon Sep 17 00:00:00 2001 From: Simon Cambier Date: Sun, 18 Jun 2023 10:05:26 +0200 Subject: [PATCH 04/11] Improving quotes to allow `"hyphenated-words"` --- src/search/query.ts | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/search/query.ts b/src/search/query.ts index 848b2d9..065df00 100644 --- a/src/search/query.ts +++ b/src/search/query.ts @@ -9,11 +9,10 @@ type Keywords = { } & { text: string[] } export class Query { - query: Keywords & { exclude: Keywords } - /** - * @deprecated - */ - extensions: string[] = [] + query: Keywords & { + exclude: Keywords + } + #inQuotes: string[] constructor(text = '') { if (settings.ignoreDiacritics) { @@ -44,7 +43,10 @@ export class Query { } } this.query = parsed - this.extensions = this.query.ext ?? [] + + // Get strings in quotes, and remove the quotes + this.#inQuotes = + text.match(/"([^"]+)"/g)?.map(o => o.replace(/"/g, '')) ?? [] } public isEmpty(): boolean { @@ -62,7 +64,7 @@ export class Query { public segmentsToStr(): string { return this.query.text.join(' ') } - + public getTags(): string[] { return this.query.text.filter(o => o.startsWith('#')) } @@ -72,6 +74,11 @@ export class Query { } public getExactTerms(): string[] { - return this.query.text.filter(o => o.split(' ').length > 1) + return [ + ...new Set([ + ...this.query.text.filter(o => o.split(' ').length > 1), + ...this.#inQuotes, + ]), + ] } } From 34ef17b4ed05bf1f0782a9d71cef7393ff93aa8b Mon Sep 17 00:00:00 2001 From: Simon Cambier Date: Thu, 22 Jun 2023 19:47:00 +0200 Subject: [PATCH 05/11] Setting for fuzziness --- src/search/omnisearch.ts | 17 +++++++++++++++-- src/settings.ts | 33 +++++++++++++++++++++++++++++++-- 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/src/search/omnisearch.ts b/src/search/omnisearch.ts index bc65cff..1930066 100644 --- a/src/search/omnisearch.ts +++ b/src/search/omnisearch.ts @@ -188,12 +188,26 @@ export class Omnisearch { logDebug('Starting search for', query) + let fuzziness: number + switch (settings.fuzziness) { + case '0': + fuzziness = 0 + break + case '1': + fuzziness = 0.1 + break + default: + fuzziness = 0.2 + break + } + let results = this.minisearch.search(query.segmentsToStr(), { prefix: term => term.length >= options.prefixLength, // length <= 3: no fuzziness // length <= 5: fuzziness of 10% // length > 5: fuzziness of 20% - fuzzy: term => (term.length <= 3 ? 0 : term.length <= 5 ? 0.1 : 0.2), + fuzzy: term => + term.length <= 3 ? 0 : term.length <= 5 ? fuzziness / 2 : fuzziness, combineWith: 'AND', boost: { basename: settings.weightBasename, @@ -216,7 +230,6 @@ export class Omnisearch { ext.startsWith(e.startsWith('.') ? e : '.' + e) ) }) - console.log(query.query.ext, results.length) } // Filter query results that match the path diff --git a/src/settings.ts b/src/settings.ts index 32d2485..6ea57c4 100644 --- a/src/settings.ts +++ b/src/settings.ts @@ -7,7 +7,11 @@ import { } from 'obsidian' import { writable } from 'svelte/store' import { database } from './database' -import { K_DISABLE_OMNISEARCH, getTextExtractor, isCacheEnabled } from './globals' +import { + K_DISABLE_OMNISEARCH, + getTextExtractor, + isCacheEnabled, +} from './globals' import type OmnisearchPlugin from './main' interface WeightingSettings { @@ -49,6 +53,7 @@ export interface OmnisearchSettings extends WeightingSettings { splitCamelCase: boolean openInNewPane: boolean verboseLogging: boolean + fuzziness: '0' | '1' | '2' } /** @@ -259,6 +264,29 @@ export class SettingsTab extends PluginSettingTab { }) ) + // Fuzziness + new Setting(containerEl) + .setName('Fuzziness') + .setDesc( + "Define the level of fuzziness for the search. The higher the fuzziness, the more results you'll get." + ) + .addDropdown(dropdown => + dropdown + .addOptions({ + 0: 'Exact match', + 1: 'Not too fuzzy', + 2: 'Fuzzy enough', + }) + .setValue(settings.fuzziness) + .onChange(async v => { + if (!['0', '1', '2'].includes(v)) { + v = '2' + } + settings.fuzziness = v as '0' | '1' | '2' + await saveSettings(this.plugin) + }) + ) + //#endregion Behavior //#region User Interface @@ -467,6 +495,7 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = { highlight: true, showPreviousQueryResults: true, simpleSearch: false, + fuzziness: '0', weightBasename: 3, weightDirectory: 2, @@ -491,4 +520,4 @@ export async function saveSettings(plugin: Plugin): Promise { export function isPluginDisabled(): boolean { return app.loadLocalStorage(K_DISABLE_OMNISEARCH) == '1' -} \ No newline at end of file +} From afca0694cada667bbadb05d87a154fe6f5f1115f Mon Sep 17 00:00:00 2001 From: Simon Cambier Date: Sat, 15 Jul 2023 21:58:22 +0200 Subject: [PATCH 06/11] #256 - Support for .loom files for the Dataloom plugin --- src/cache-manager.ts | 25 +++++++++++++++++++++++++ src/tools/utils.ts | 5 +++++ 2 files changed, 30 insertions(+) diff --git a/src/cache-manager.ts b/src/cache-manager.ts index e2a7ca5..6ee9715 100644 --- a/src/cache-manager.ts +++ b/src/cache-manager.ts @@ -10,6 +10,7 @@ import { getAliasesFromMetadata, getTagsFromMetadata, isFileCanvas, + isFileFromDataloomPlugin, isFilePlaintext, logDebug, makeMD5, @@ -58,6 +59,30 @@ async function getAndMapIndexedDocument( content = texts.join('\r\n') } + // ** Dataloom plugin ** + else if (isFileFromDataloomPlugin(path)) { + try { + const data = JSON.parse(await app.vault.cachedRead(file)) + // data is a json object, we recursively iterate the keys + // and concatenate the values if the key is "markdown" + const texts: string[] = [] + const iterate = (obj: any) => { + for (const key in obj) { + if (typeof obj[key] === 'object') { + iterate(obj[key]) + } else if (key === 'markdown') { + texts.push(obj[key]) + } + } + } + iterate(data) + content = texts.join('\r\n') + } catch (e) { + console.error('Omnisearch: Error while parsing Dataloom file', path) + console.error(e) + } + } + // ** Image or PDF ** else if (extractor?.canFileBeExtracted(path)) { content = await extractor.extractText(file) diff --git a/src/tools/utils.ts b/src/tools/utils.ts index 19699d1..839af56 100644 --- a/src/tools/utils.ts +++ b/src/tools/utils.ts @@ -270,6 +270,7 @@ export function isFileIndexable(path: string): boolean { return ( isFilePlaintext(path) || isFileCanvas(path) || + isFileFromDataloomPlugin(path) || (canIndexPDF && isFilePDF(path)) || (canIndexImages && isFileImage(path)) ) @@ -292,6 +293,10 @@ export function isFileCanvas(path: string): boolean { return path.endsWith('.canvas') } +export function isFileFromDataloomPlugin(path: string): boolean { + return path.endsWith('.loom') +} + export function getExtension(path: string): string { const split = path.split('.') return split[split.length - 1] ?? '' From a842d1768fbe1487e21bc691015293ce05ac0d00 Mon Sep 17 00:00:00 2001 From: Simon Cambier Date: Sun, 16 Jul 2023 21:52:08 +0200 Subject: [PATCH 07/11] #256 - .loom indexing is behind a setting for now --- src/cache-manager.ts | 3 ++- src/settings.ts | 22 +++++++++++++++++++++- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/cache-manager.ts b/src/cache-manager.ts index 6ee9715..4c4b4f8 100644 --- a/src/cache-manager.ts +++ b/src/cache-manager.ts @@ -19,6 +19,7 @@ import { import type { CanvasData } from 'obsidian/canvas' import type { AsPlainObject } from 'minisearch' import type MiniSearch from 'minisearch' +import { settings } from './settings' /** * This function is responsible for extracting the text from a file and @@ -60,7 +61,7 @@ async function getAndMapIndexedDocument( } // ** Dataloom plugin ** - else if (isFileFromDataloomPlugin(path)) { + else if (settings.dataloomIndexing && isFileFromDataloomPlugin(path)) { try { const data = JSON.parse(await app.vault.cachedRead(file)) // data is a json object, we recursively iterate the keys diff --git a/src/settings.ts b/src/settings.ts index 6ea57c4..24c7cbe 100644 --- a/src/settings.ts +++ b/src/settings.ts @@ -33,8 +33,10 @@ export interface OmnisearchSettings extends WeightingSettings { indexedFileTypes: string[] /** Enable PDF indexing */ PDFIndexing: boolean - /** Enable PDF indexing */ + /** Enable Images indexing */ imagesIndexing: boolean + /** Enable Dataloom indexing */ + dataloomIndexing: boolean /** Activate the small 🔍 button on Obsidian's ribbon */ ribbonIcon: boolean /** Display the small contextual excerpt in search results */ @@ -144,6 +146,23 @@ export class SettingsTab extends PluginSettingTab { ) .setDisabled(!getTextExtractor()) + // Dataloom Indexing + const indexDataLoomDesc = new DocumentFragment() + indexDataLoomDesc.createSpan({}, span => { + span.innerHTML = `Include DataLoom
.loom
files in search results +
${needsARestart}` + }) + new Setting(containerEl) + .setName('DataLoom indexing (beta)') + .setDesc(indexDataLoomDesc) + .addToggle(toggle => + toggle.setValue(settings.dataloomIndexing).onChange(async v => { + settings.dataloomIndexing = v + await saveSettings(this.plugin) + }) + ) + .setDisabled(!getTextExtractor()) + // Additional files to index const indexedFileTypesDesc = new DocumentFragment() indexedFileTypesDesc.createSpan({}, span => { @@ -485,6 +504,7 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = { indexedFileTypes: [] as string[], PDFIndexing: false, imagesIndexing: false, + dataloomIndexing: false, splitCamelCase: false, openInNewPane: false, From 46fb100f3549f24110d9f8a6244d6f54dfcf0924 Mon Sep 17 00:00:00 2001 From: Simon Cambier Date: Sun, 16 Jul 2023 22:06:00 +0200 Subject: [PATCH 08/11] Removed file non longer used --- src/file-loader.ts | 50 ---------------------------------------------- 1 file changed, 50 deletions(-) delete mode 100644 src/file-loader.ts diff --git a/src/file-loader.ts b/src/file-loader.ts deleted file mode 100644 index 503735f..0000000 --- a/src/file-loader.ts +++ /dev/null @@ -1,50 +0,0 @@ -import { isFileImage, isFilePDF, isFilePlaintext } from './tools/utils' -import type { TFile } from 'obsidian' -import type { IndexedDocument } from './globals' -import { cacheManager } from './cache-manager' - -/** - * Return all plaintext files as IndexedDocuments - */ -export async function getPlainTextFiles(): Promise { - const allFiles = app.vault.getFiles().filter(f => isFilePlaintext(f.path)) - const data: IndexedDocument[] = [] - for (const file of allFiles) { - const doc = await cacheManager.getDocument(file.path) - data.push(doc) - // await cacheManager.updateLiveDocument(file.path, doc) - } - return data -} - -/** - * Return all PDFs as IndexedDocuments. - */ -export async function getPDFAsDocuments(): Promise { - const files = app.vault.getFiles().filter(f => isFilePDF(f.path)) - return await getBinaryFiles(files) -} - -/** - * Return all imageas as IndexedDocuments. - */ -export async function getImagesAsDocuments(): Promise { - const files = app.vault.getFiles().filter(f => isFileImage(f.path)) - return await getBinaryFiles(files) -} - -async function getBinaryFiles(files: TFile[]): Promise { - const data: IndexedDocument[] = [] - const input = [] - for (const file of files) { - input.push( - new Promise(async (resolve, _reject) => { - const doc = await cacheManager.getDocument(file.path) - data.push(doc) - return resolve(null) - }) - ) - } - await Promise.all(input) - return data -} From 6da4a3e7857bd0251f70c223833bb9ed1283fe4a Mon Sep 17 00:00:00 2001 From: Simon Cambier Date: Mon, 17 Jul 2023 11:49:41 +0200 Subject: [PATCH 09/11] #256 - Also index .dashboard --- src/tools/utils.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/utils.ts b/src/tools/utils.ts index 839af56..8c6bf82 100644 --- a/src/tools/utils.ts +++ b/src/tools/utils.ts @@ -294,7 +294,7 @@ export function isFileCanvas(path: string): boolean { } export function isFileFromDataloomPlugin(path: string): boolean { - return path.endsWith('.loom') + return path.endsWith('.loom') || path.endsWith('.dashboard') } export function getExtension(path: string): string { From 1a109d6d8934b3d1abc516cd12ea8037cffde316 Mon Sep 17 00:00:00 2001 From: Simon Cambier Date: Mon, 17 Jul 2023 11:49:54 +0200 Subject: [PATCH 10/11] #254 - Index unsupported files --- src/cache-manager.ts | 8 ++++-- src/components/ModalInFile.svelte | 1 - src/settings.ts | 44 ++++++++++++++++++++++++++----- src/tools/utils.ts | 21 ++++++++++++--- 4 files changed, 62 insertions(+), 12 deletions(-) diff --git a/src/cache-manager.ts b/src/cache-manager.ts index 4c4b4f8..90da4a1 100644 --- a/src/cache-manager.ts +++ b/src/cache-manager.ts @@ -12,6 +12,7 @@ import { isFileCanvas, isFileFromDataloomPlugin, isFilePlaintext, + isFilenameIndexable, logDebug, makeMD5, removeDiacritics, @@ -87,8 +88,11 @@ async function getAndMapIndexedDocument( // ** Image or PDF ** else if (extractor?.canFileBeExtracted(path)) { content = await extractor.extractText(file) - } else { - throw new Error(`Unsupported file type: "${path}"`) + } + + // ** Unsupported files ** + else if (isFilenameIndexable(path)) { + content = file.path } if (content === null || content === undefined) { diff --git a/src/components/ModalInFile.svelte b/src/components/ModalInFile.svelte index ab8f376..5b8739a 100644 --- a/src/components/ModalInFile.svelte +++ b/src/components/ModalInFile.svelte @@ -18,7 +18,6 @@ import { Query } from 'src/search/query' import { openNote } from 'src/tools/notes' import { searchEngine } from 'src/search/omnisearch' - import { cacheManager } from 'src/cache-manager' export let modal: OmnisearchInFileModal export let parent: OmnisearchVaultModal | null = null diff --git a/src/settings.ts b/src/settings.ts index 24c7cbe..9f6f438 100644 --- a/src/settings.ts +++ b/src/settings.ts @@ -37,6 +37,8 @@ export interface OmnisearchSettings extends WeightingSettings { imagesIndexing: boolean /** Enable Dataloom indexing */ dataloomIndexing: boolean + /** Enable indexing of unknown files */ + unsupportedFilesIndexing: 'yes' | 'no' | 'default' /** Activate the small 🔍 button on Obsidian's ribbon */ ribbonIcon: boolean /** Display the small contextual excerpt in search results */ @@ -163,28 +165,49 @@ export class SettingsTab extends PluginSettingTab { ) .setDisabled(!getTextExtractor()) - // Additional files to index + // Additional text files to index const indexedFileTypesDesc = new DocumentFragment() indexedFileTypesDesc.createSpan({}, span => { - span.innerHTML = `In addition to standard md files, Omnisearch can also index other plaintext files.
- Add extensions separated by a space, without the dot. Example: "txt org".
+ span.innerHTML = `In addition to standard md files, Omnisearch can also index other PLAINTEXT files.
+ Add extensions separated by a space, without the dot. Example: "txt org csv".
⚠️ Using extensions of non-plaintext files (like .docx or .pptx) WILL cause crashes, because Omnisearch will try to index their content.
${needsARestart}` }) new Setting(containerEl) - .setName('Additional files to index') + .setName('Additional TEXT files to index') .setDesc(indexedFileTypesDesc) .addText(component => { component .setValue(settings.indexedFileTypes.join(' ')) - .setPlaceholder('Example: txt org') + .setPlaceholder('Example: txt org csv') .onChange(async v => { settings.indexedFileTypes = v.split(' ') await saveSettings(this.plugin) }) }) + // Unsupported files + const indexUnsupportedDesc = new DocumentFragment() + indexUnsupportedDesc.createSpan({}, span => { + span.innerHTML = ` + Omnisearch can index filenames of "unsupported" files, such as e.g.
.mp4
or
.xlsx
.
+ "Obsidian setting" will respect the value of "Files & Links > Detect all file extensions". +
${needsARestart}` + }) + new Setting(containerEl) + .setName('Index unsupported files (beta)') + .setDesc(indexUnsupportedDesc) + .addDropdown(dropdown => { + dropdown + .addOptions({ yes: 'Yes', no: 'No', default: 'Obsidian setting' }) + .setValue(settings.unsupportedFilesIndexing) + .onChange(async v => { + ;(settings.unsupportedFilesIndexing as any) = v + await saveSettings(this.plugin) + }) + }) + //#endregion Indexing //#region Behavior @@ -505,6 +528,7 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = { PDFIndexing: false, imagesIndexing: false, dataloomIndexing: false, + unsupportedFilesIndexing: 'no', splitCamelCase: false, openInNewPane: false, @@ -539,5 +563,13 @@ export async function saveSettings(plugin: Plugin): Promise { } export function isPluginDisabled(): boolean { - return app.loadLocalStorage(K_DISABLE_OMNISEARCH) == '1' + return app.loadLocalStorage(K_DISABLE_OMNISEARCH) === '1' +} + +export function canIndexUnsupportedFiles(): boolean { + return ( + settings.unsupportedFilesIndexing === 'yes' || + (settings.unsupportedFilesIndexing === 'default' && + !!app.vault.getConfig('showUnsupportedFiles')) + ) } diff --git a/src/tools/utils.ts b/src/tools/utils.ts index 8c6bf82..49b11f1 100644 --- a/src/tools/utils.ts +++ b/src/tools/utils.ts @@ -18,7 +18,7 @@ import { SPACE_OR_PUNCTUATION, type SearchMatch, } from '../globals' -import { settings } from '../settings' +import { canIndexUnsupportedFiles, settings } from '../settings' import { type BinaryLike, createHash } from 'crypto' import { md5 } from 'pure-md5' @@ -26,7 +26,7 @@ export function highlighter(str: string): string { return `${str}` } -export function highlighterGroups(substring: string, ...args: any[]) { +export function highlighterGroups(_substring: string, ...args: any[]) { // args[0] is the single char preceding args[1], which is the word we want to highlight if (!!args[1].trim()) return `${args[0]}${args[1]}` @@ -263,7 +263,7 @@ export function getCtrlKeyLabel(): 'ctrl' | '⌘' { return Platform.isMacOS ? '⌘' : 'ctrl' } -export function isFileIndexable(path: string): boolean { +export function isContentIndexable(path: string): boolean { const hasTextExtractor = !!getTextExtractor() const canIndexPDF = hasTextExtractor && settings.PDFIndexing const canIndexImages = hasTextExtractor && settings.imagesIndexing @@ -276,6 +276,21 @@ export function isFileIndexable(path: string): boolean { ) } +export function isFilenameIndexable(path: string): boolean { + return ( + canIndexUnsupportedFiles() || + isFilePlaintext(path) || + isFileCanvas(path) || + isFileFromDataloomPlugin(path) || + isFilePDF(path) || + isFileImage(path) + ) +} + +export function isFileIndexable(path: string): boolean { + return isFilenameIndexable(path) || isContentIndexable(path) +} + export function isFileImage(path: string): boolean { const ext = getExtension(path) return ext === 'png' || ext === 'jpg' || ext === 'jpeg' || ext === 'webp' From 0b9a7099fed600b1148049159e9564055ea66462 Mon Sep 17 00:00:00 2001 From: Simon Cambier Date: Tue, 18 Jul 2023 07:44:23 +0200 Subject: [PATCH 11/11] 1.15.0-beta.2 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 24b73c3..60f2f13 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "scambier.obsidian-search", - "version": "1.15.0-beta.1", + "version": "1.15.0-beta.2", "description": "A search engine for Obsidian", "main": "dist/main.js", "scripts": {