diff --git a/assets/styles.css b/assets/styles.css
index 2e8501e..2bec00f 100644
--- a/assets/styles.css
+++ b/assets/styles.css
@@ -25,7 +25,7 @@
}
.omnisearch-result__title > span {
- display: flex;
+
}
.omnisearch-result__folder-path {
diff --git a/manifest-beta.json b/manifest-beta.json
index f59113d..b260e4f 100644
--- a/manifest-beta.json
+++ b/manifest-beta.json
@@ -1,8 +1,8 @@
{
"id": "omnisearch",
"name": "Omnisearch",
- "version": "1.15.0",
- "minAppVersion": "1.0.0",
+ "version": "1.16.0",
+ "minAppVersion": "1.3.0",
"description": "A search engine that just works",
"author": "Simon Cambier",
"authorUrl": "https://github.com/scambier/obsidian-omnisearch",
diff --git a/manifest.json b/manifest.json
index b2d112f..1b645aa 100644
--- a/manifest.json
+++ b/manifest.json
@@ -1,8 +1,8 @@
{
"id": "omnisearch",
"name": "Omnisearch",
- "version": "1.15.0",
- "minAppVersion": "1.0.0",
+ "version": "1.16.0",
+ "minAppVersion": "1.3.0",
"description": "A search engine that just works",
"author": "Simon Cambier",
"authorUrl": "https://github.com/scambier/obsidian-omnisearch",
diff --git a/package.json b/package.json
index 4ea1874..f9e6496 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "scambier.obsidian-search",
- "version": "1.15.0",
+ "version": "1.16.0",
"description": "A search engine for Obsidian",
"main": "dist/main.js",
"scripts": {
diff --git a/src/cache-manager.ts b/src/cache-manager.ts
index 5ef87a3..1cd61a5 100644
--- a/src/cache-manager.ts
+++ b/src/cache-manager.ts
@@ -11,6 +11,8 @@ import {
getTagsFromMetadata,
isFileCanvas,
isFileFromDataloomPlugin,
+ isFileImage,
+ isFilePDF,
isFilePlaintext,
isFilenameIndexable,
logDebug,
@@ -20,6 +22,7 @@ import {
import type { CanvasData } from 'obsidian/canvas'
import type { AsPlainObject } from 'minisearch'
import type MiniSearch from 'minisearch'
+import { settings } from './settings'
/**
* This function is responsible for extracting the text from a file and
@@ -84,11 +87,23 @@ async function getAndMapIndexedDocument(
}
}
- // ** Image or PDF **
- else if (extractor?.canFileBeExtracted(path)) {
+ // ** Image **
+ else if (
+ isFileImage(path) &&
+ settings.imagesIndexing &&
+ extractor?.canFileBeExtracted(path)
+ ) {
content = await extractor.extractText(file)
}
-
+ // ** PDF **
+ else if (
+ isFilePDF(path) &&
+ settings.PDFIndexing &&
+ extractor?.canFileBeExtracted(path)
+ ) {
+ content = await extractor.extractText(file)
+ }
+
// ** Unsupported files **
else if (isFilenameIndexable(path)) {
content = file.path
diff --git a/src/components/ResultItemInFile.svelte b/src/components/ResultItemInFile.svelte
index 27a5805..db9bfa5 100644
--- a/src/components/ResultItemInFile.svelte
+++ b/src/components/ResultItemInFile.svelte
@@ -1,19 +1,22 @@
- {@html cleanedContent.replace(reg, highlighterGroups)}
+ {@html highlightText(cleanedContent.content, matchesExcerpt)}
diff --git a/src/components/ResultItemVault.svelte b/src/components/ResultItemVault.svelte
index 3360581..45f8f0d 100644
--- a/src/components/ResultItemVault.svelte
+++ b/src/components/ResultItemVault.svelte
@@ -3,17 +3,16 @@
import type { ResultNote } from '../globals'
import {
getExtension,
- highlighterGroups,
isFileCanvas,
isFileImage,
isFilePDF,
- makeExcerpt,
pathWithoutFilename,
removeDiacritics,
- stringsToRegex,
} from '../tools/utils'
import ResultItemContainer from './ResultItemContainer.svelte'
import { setIcon } from 'obsidian'
+ import { cloneDeep } from 'lodash-es'
+ import { stringsToRegex, getMatches, makeExcerpt, highlightText } from 'src/tools/text-processing'
export let selected = false
export let note: ResultNote
@@ -36,6 +35,11 @@
}
}
$: reg = stringsToRegex(note.foundWords)
+ $: matchesTitle = getMatches(title, reg)
+ $: matchesExcerpt = cloneDeep(note.matches).map(m => {
+ m.offset = m.offset - cleanedContent.offset
+ return m
+ })
$: cleanedContent = makeExcerpt(note.content, note.matches[0]?.offset ?? -1)
$: glyph = false //cacheManager.getLiveDocument(note.path)?.doesNotExist
$: {
@@ -70,7 +74,7 @@
- {@html title.replace(reg, highlighterGroups)}
+ {@html highlightText(title, matchesTitle)}
.{getExtension(note.path)}
@@ -97,7 +101,7 @@
{#if $showExcerpt}
- {@html cleanedContent.replace(reg, highlighterGroups)}
+ {@html highlightText(cleanedContent.content, matchesExcerpt)}
{/if}
diff --git a/src/components/modals.ts b/src/components/modals.ts
index aa86114..238114f 100644
--- a/src/components/modals.ts
+++ b/src/components/modals.ts
@@ -38,7 +38,7 @@ abstract class OmnisearchModal extends Modal {
] as const) {
for (const modifier of ['Ctrl', 'Mod'] as const) {
this.scope.register([modifier], key.k, _e => {
- if (this.app.vault.getConfig('vimMode')) {
+ if (settings.vimLikeNavigationShortcut) {
// e.preventDefault()
eventBus.emit('arrow-' + key.dir)
}
@@ -53,7 +53,7 @@ abstract class OmnisearchModal extends Modal {
] as const) {
for (const modifier of ['Ctrl', 'Mod'] as const) {
this.scope.register([modifier], key.k, _e => {
- if (this.app.vault.getConfig('vimMode')) {
+ if (settings.vimLikeNavigationShortcut) {
// e.preventDefault()
eventBus.emit('arrow-' + key.dir)
}
diff --git a/src/database.ts b/src/database.ts
index b1e1199..d3500b8 100644
--- a/src/database.ts
+++ b/src/database.ts
@@ -1,6 +1,7 @@
import Dexie from 'dexie'
import type { AsPlainObject } from 'minisearch'
import type { DocumentRef } from './globals'
+import { Notice } from 'obsidian'
export class OmnisearchCache extends Dexie {
public static readonly dbVersion = 8
@@ -57,6 +58,7 @@ export class OmnisearchCache extends Dexie {
}
public async clearCache() {
+ new Notice('Omnisearch - Cache cleared. Please restart Obsidian.')
await this.minisearch.clear()
}
}
diff --git a/src/globals.ts b/src/globals.ts
index 1ba9310..7f977a5 100644
--- a/src/globals.ts
+++ b/src/globals.ts
@@ -106,5 +106,9 @@ export function isCacheEnabled(): boolean {
return !Platform.isIosApp && settings.useCache
}
-export const SPACE_OR_PUNCTUATION =
- /[|\n\r -#%-*,.\/:;?@[-\]_{}\u00A0\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C77\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166E\u1680\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2000-\u200A\u2010-\u2029\u202F-\u2043\u2045-\u2051\u2053-\u205F\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4F\u3000-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]+/u
+const separators =
+ /[|\t\n\r= -#%-*,.`\/:;?@[-\]_{}\u00A0\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C77\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166E\u1680\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2000-\u200A\u2010-\u2029\u202F-\u2043\u2045-\u2051\u2053-\u205F\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4F\u3000-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]/
+ .toString()
+ .slice(1, -1)
+export const SPACE_OR_PUNCTUATION_UNIQUE = new RegExp(`${separators}`, 'u')
+export const SPACE_OR_PUNCTUATION = new RegExp(`${separators}+`, 'u')
diff --git a/src/main.ts b/src/main.ts
index 4e9566d..5c7a411 100644
--- a/src/main.ts
+++ b/src/main.ts
@@ -1,4 +1,4 @@
-import { Notice, Plugin } from 'obsidian'
+import { Notice, Platform, Plugin } from 'obsidian'
import {
OmnisearchInFileModal,
OmnisearchVaultModal,
@@ -103,7 +103,7 @@ export default class OmnisearchPlugin extends Plugin {
if (isFileIndexable(file.path)) {
logDebug('Renaming file', file.path)
cacheManager.removeFromLiveCache(oldPath)
- cacheManager.addToLiveCache(file.path)
+ await cacheManager.addToLiveCache(file.path)
searchEngine.removeFromPaths([oldPath])
await searchEngine.addFromPaths([file.path])
}
@@ -118,11 +118,11 @@ export default class OmnisearchPlugin extends Plugin {
executeFirstLaunchTasks(): void {
const code = '1.10.1'
if (settings.welcomeMessage !== code) {
- const welcome = new DocumentFragment()
- welcome.createSpan({}, span => {
- span.innerHTML = `🔎 Omnisearch now requires the
Text Extractor plugin to index PDF and images. See Omnisearch settings for more information.`
- })
- new Notice(welcome, 20_000)
+ // const welcome = new DocumentFragment()
+ // welcome.createSpan({}, span => {
+ // span.innerHTML = `🔎 Omnisearch now requires the
Text Extractor plugin to index PDF and images. See Omnisearch settings for more information.`
+ // })
+ // new Notice(welcome, 20_000)
}
settings.welcomeMessage = code
@@ -206,18 +206,18 @@ export default class OmnisearchPlugin extends Plugin {
// Disable settings.useCache while writing the cache, in case it freezes
settings.useCache = false
- saveSettings(this)
+ await saveSettings(this)
// Write the cache
await searchEngine.writeToCache()
// Re-enable settings.caching
settings.useCache = true
- saveSettings(this)
+ await saveSettings(this)
}
console.timeEnd('Omnisearch - Indexing total time')
- if (diff.toAdd.length >= 1000) {
+ if (diff.toAdd.length >= 1000 && !Platform.isIosApp) {
new Notice(`Omnisearch - Your files have been indexed.`)
}
indexingStep.set(IndexingStepType.Done)
diff --git a/src/notes-index.ts b/src/notes-index.ts
index 4a0f5cc..a6bffe5 100644
--- a/src/notes-index.ts
+++ b/src/notes-index.ts
@@ -43,7 +43,7 @@ export async function refreshIndex(): Promise
{
const paths = [...notesToReindex].map(n => n.path)
if (paths.length) {
searchEngine.removeFromPaths(paths)
- searchEngine.addFromPaths(paths)
+ await searchEngine.addFromPaths(paths)
notesToReindex.clear()
// console.log(`Omnisearch - Reindexed ${paths.length} file(s)`)
}
diff --git a/src/search/omnisearch.ts b/src/search/omnisearch.ts
index f764f62..6adf19b 100644
--- a/src/search/omnisearch.ts
+++ b/src/search/omnisearch.ts
@@ -1,10 +1,5 @@
import MiniSearch, { type Options, type SearchResult } from 'minisearch'
-import type {
- DocumentRef,
- IndexedDocument,
- ResultNote,
- SearchMatch,
-} from '../globals'
+import type { DocumentRef, IndexedDocument, ResultNote } from '../globals'
import { chsRegex, getChsSegmenter, SPACE_OR_PUNCTUATION } from '../globals'
import { settings } from '../settings'
import {
@@ -13,18 +8,23 @@ import {
removeDiacritics,
splitCamelCase,
splitHyphens,
- stringsToRegex,
stripMarkdownCharacters,
- warnDebug,
} from '../tools/utils'
import { Notice } from 'obsidian'
import type { Query } from './query'
import { cacheManager } from '../cache-manager'
import { sortBy } from 'lodash-es'
+import { getMatches, stringsToRegex } from 'src/tools/text-processing'
const tokenize = (text: string): string[] => {
let tokens = text.split(SPACE_OR_PUNCTUATION)
+ // Split hyphenated tokens
+ tokens = [...tokens, ...tokens.flatMap(splitHyphens)]
+
+ // Split camelCase tokens into "camel" and "case
+ tokens = [...tokens, ...tokens.flatMap(splitCamelCase)]
+
// When enabled, we only use the chsSegmenter,
// and not the other custom tokenizers
const chsSegmenter = getChsSegmenter()
@@ -32,12 +32,8 @@ const tokenize = (text: string): string[] => {
tokens = tokens.flatMap(word =>
chsRegex.test(word) ? chsSegmenter.cut(word) : [word]
)
- } else {
- // Split camelCase tokens into "camel" and "case
- tokens = [...tokens, ...tokens.flatMap(splitCamelCase)]
- // Split hyphenated tokens
- tokens = [...tokens, ...tokens.flatMap(splitHyphens)]
}
+
return tokens
}
@@ -186,6 +182,7 @@ export class Omnisearch {
return []
}
+ logDebug('=== New search ===')
logDebug('Starting search for', query)
let fuzziness: number
@@ -216,7 +213,7 @@ export class Omnisearch {
headings1: settings.weightH1,
headings2: settings.weightH2,
headings3: settings.weightH3,
- unmarkedTags: settings.weightUnmarkedTags
+ unmarkedTags: settings.weightUnmarkedTags,
},
})
@@ -297,6 +294,8 @@ export class Omnisearch {
// Sort results and keep the 50 best
results = results.sort((a, b) => b.score - a.score).slice(0, 50)
+ if (results.length) logDebug('First result:', results[0])
+
const documents = await Promise.all(
results.map(async result => await cacheManager.getDocument(result.id))
)
@@ -304,7 +303,7 @@ export class Omnisearch {
// If the search query contains quotes, filter out results that don't have the exact match
const exactTerms = query.getExactTerms()
if (exactTerms.length) {
- logDebug('Filtering with quoted terms')
+ logDebug('Filtering with quoted terms: ', exactTerms)
results = results.filter(r => {
const document = documents.find(d => d.path === r.id)
const title = document?.path.toLowerCase() ?? ''
@@ -340,33 +339,6 @@ export class Omnisearch {
return results
}
- public getMatches(text: string, reg: RegExp, query: Query): SearchMatch[] {
- const startTime = new Date().getTime()
- let match: RegExpExecArray | null = null
- const matches: SearchMatch[] = []
- let count = 0
- while ((match = reg.exec(text)) !== null) {
- // Avoid infinite loops, stop looking after 100 matches or if we're taking too much time
- if (++count >= 100 || new Date().getTime() - startTime > 50) {
- warnDebug('Stopped getMatches at', count, 'results')
- break
- }
- const m = match[0]
- if (m) matches.push({ match: m, offset: match.index })
- }
-
- // If the query can be found "as is" in the text, put this match first
- const best = text.toLowerCase().indexOf(query.segmentsToStr())
- if (best > -1) {
- matches.unshift({
- offset: best,
- match: query.segmentsToStr(),
- })
- }
-
- return matches
- }
-
/**
* Searches the index, and returns an array of ResultNote objects.
* If we have the singleFile option set,
@@ -427,12 +399,12 @@ export class Omnisearch {
logDebug('Matching tokens:', foundWords)
logDebug('Getting matches locations...')
- const matches = this.getMatches(
+ const matches = getMatches(
note.content,
stringsToRegex(foundWords),
query
)
- logDebug('Matches:', matches)
+ logDebug(`Matches for ${note.basename}`, matches)
const resultNote: ResultNote = {
score: result.score,
foundWords,
diff --git a/src/search/query.ts b/src/search/query.ts
index 065df00..0032c5f 100644
--- a/src/search/query.ts
+++ b/src/search/query.ts
@@ -30,6 +30,8 @@ export class Query {
if (!Array.isArray(parsed.exclude.text)) {
parsed.exclude.text = [parsed.exclude.text]
}
+ // Remove empty excluded strings
+ parsed.exclude.text = parsed.exclude.text.filter(o => o.length)
// Make sure that all fields are string[]
for (const k of keywords) {
@@ -75,10 +77,12 @@ export class Query {
public getExactTerms(): string[] {
return [
- ...new Set([
- ...this.query.text.filter(o => o.split(' ').length > 1),
- ...this.#inQuotes,
- ]),
+ ...new Set(
+ [
+ ...this.query.text.filter(o => o.split(' ').length > 1),
+ ...this.#inQuotes,
+ ].map(str => str.toLowerCase())
+ ),
]
}
}
diff --git a/src/settings.ts b/src/settings.ts
index 249ec24..938ad67 100644
--- a/src/settings.ts
+++ b/src/settings.ts
@@ -56,6 +56,7 @@ export interface OmnisearchSettings extends WeightingSettings {
splitCamelCase: boolean
openInNewPane: boolean
verboseLogging: boolean
+ vimLikeNavigationShortcut: boolean
fuzziness: '0' | '1' | '2'
}
@@ -100,31 +101,36 @@ export class SettingsTab extends PluginSettingTab {
//#region Indexing
- new Setting(containerEl).setName('Indexing').setHeading()
-
- const textExtractDesc = new DocumentFragment()
- if (getTextExtractor()) {
- textExtractDesc.createSpan({}, span => {
- span.innerHTML = `👍 You have installed Text Extractor, Omnisearch will use it to index PDFs and images.
+ const indexingDesc = new DocumentFragment()
+ indexingDesc.createSpan({}, span => {
+ span.innerHTML = `⚠️ Changing indexing settings will clear the cache, and requires a restart of Obsidian.
`
+ if (getTextExtractor()) {
+ span.innerHTML += `
+ 👍 You have installed Text Extractor, Omnisearch can use it to index PDFs and images contents.
Text extraction only works on desktop, but the cache can be synchronized with your mobile device.`
- })
- } else {
- textExtractDesc.createSpan({}, span => {
- span.innerHTML = `⚠️ Omnisearch requires Text Extractor to index PDFs and images.`
- })
- }
- new Setting(containerEl).setDesc(textExtractDesc)
+ } else {
+ span.innerHTML += `⚠️ Omnisearch requires Text Extractor to index PDFs and images.`
+ }
+ })
+
+ new Setting(containerEl)
+ .setName('Indexing')
+ .setHeading()
+ .setDesc(indexingDesc)
// PDF Indexing
const indexPDFsDesc = new DocumentFragment()
indexPDFsDesc.createSpan({}, span => {
- span.innerHTML = `Include PDFs in search results`
+ span.innerHTML = `Omnisearch will use Text Extractor to index the content of your PDFs`
})
new Setting(containerEl)
- .setName(`PDFs Indexing ${getTextExtractor() ? '' : '⚠️ Disabled'}`)
+ .setName(
+ `PDFs content indexing ${getTextExtractor() ? '' : '⚠️ Disabled'}`
+ )
.setDesc(indexPDFsDesc)
.addToggle(toggle =>
toggle.setValue(settings.PDFIndexing).onChange(async v => {
+ await database.clearCache()
settings.PDFIndexing = v
await saveSettings(this.plugin)
})
@@ -134,27 +140,49 @@ export class SettingsTab extends PluginSettingTab {
// Images Indexing
const indexImagesDesc = new DocumentFragment()
indexImagesDesc.createSpan({}, span => {
- span.innerHTML = `Include images in search results`
+ span.innerHTML = `Omnisearch will use Text Extractor to OCR your images and index their content`
})
new Setting(containerEl)
- .setName(`Images Indexing ${getTextExtractor() ? '' : '⚠️ Disabled'}`)
+ .setName(`Images OCR indexing ${getTextExtractor() ? '' : '⚠️ Disabled'}`)
.setDesc(indexImagesDesc)
.addToggle(toggle =>
toggle.setValue(settings.imagesIndexing).onChange(async v => {
+ await database.clearCache()
settings.imagesIndexing = v
await saveSettings(this.plugin)
})
)
.setDisabled(!getTextExtractor())
+ // Index filenames of unsupported files
+ const indexUnsupportedDesc = new DocumentFragment()
+ indexUnsupportedDesc.createSpan({}, span => {
+ span.innerHTML = `
+ Omnisearch can index filenames of "unsupported" files, such as e.g. .mp4
, .xlsx
,
+ or non-extracted PDFs & images.
+ "Obsidian setting" will respect the value of "Files & Links > Detect all file extensions"`
+ })
+ new Setting(containerEl)
+ .setName('Index paths of unsupported files')
+ .setDesc(indexUnsupportedDesc)
+ .addDropdown(dropdown => {
+ dropdown
+ .addOptions({ yes: 'Yes', no: 'No', default: 'Obsidian setting' })
+ .setValue(settings.unsupportedFilesIndexing)
+ .onChange(async v => {
+ await database.clearCache()
+ ;(settings.unsupportedFilesIndexing as any) = v
+ await saveSettings(this.plugin)
+ })
+ })
+
// Additional text files to index
const indexedFileTypesDesc = new DocumentFragment()
indexedFileTypesDesc.createSpan({}, span => {
span.innerHTML = `In addition to standard md files, Omnisearch can also index other PLAINTEXT files.
Add extensions separated by a space, without the dot. Example: "txt org csv".
⚠️ Using extensions of non-plaintext files (like .docx or .pptx) WILL cause crashes,
- because Omnisearch will try to index their content.
- ${needsARestart}`
+ because Omnisearch will try to index their content.`
})
new Setting(containerEl)
.setName('Additional TEXT files to index')
@@ -164,32 +192,12 @@ export class SettingsTab extends PluginSettingTab {
.setValue(settings.indexedFileTypes.join(' '))
.setPlaceholder('Example: txt org csv')
.onChange(async v => {
+ await database.clearCache()
settings.indexedFileTypes = v.split(' ')
await saveSettings(this.plugin)
})
})
- // Unsupported files
- const indexUnsupportedDesc = new DocumentFragment()
- indexUnsupportedDesc.createSpan({}, span => {
- span.innerHTML = `
- Omnisearch can index filenames of "unsupported" files, such as e.g. .mp4
or .xlsx
.
- "Obsidian setting" will respect the value of "Files & Links > Detect all file extensions".
-
${needsARestart}`
- })
- new Setting(containerEl)
- .setName('Index unsupported files')
- .setDesc(indexUnsupportedDesc)
- .addDropdown(dropdown => {
- dropdown
- .addOptions({ yes: 'Yes', no: 'No', default: 'Obsidian setting' })
- .setValue(settings.unsupportedFilesIndexing)
- .onChange(async v => {
- ;(settings.unsupportedFilesIndexing as any) = v
- await saveSettings(this.plugin)
- })
- })
-
//#endregion Indexing
//#region Behavior
@@ -256,6 +264,7 @@ export class SettingsTab extends PluginSettingTab {
})
)
+ // Open in new pane
new Setting(containerEl)
.setName('Open in new pane')
.setDesc(
@@ -268,6 +277,19 @@ export class SettingsTab extends PluginSettingTab {
})
)
+ // Set Vim like navigation keys
+ new Setting(containerEl)
+ .setName('Set Vim like navigation keys')
+ .setDesc(
+ 'Navigate down the results with Ctrl/⌘ + J/N, or navigate up with Ctrl/⌘ + K/P'
+ )
+ .addToggle(toggle =>
+ toggle.setValue(settings.vimLikeNavigationShortcut).onChange(async v => {
+ settings.vimLikeNavigationShortcut = v
+ await saveSettings(this.plugin)
+ })
+ )
+
// Fuzziness
new Setting(containerEl)
.setName('Fuzziness')
@@ -409,7 +431,9 @@ export class SettingsTab extends PluginSettingTab {
.addSlider(cb => this.weightSlider(cb, 'weightH3'))
new Setting(containerEl)
- .setName(`Tags without the # (default: ${DEFAULT_SETTINGS.weightUnmarkedTags})`)
+ .setName(
+ `Tags without the # (default: ${DEFAULT_SETTINGS.weightUnmarkedTags})`
+ )
.addSlider(cb => this.weightSlider(cb, 'weightUnmarkedTags'))
//#endregion Results Weighting
@@ -488,7 +512,6 @@ export class SettingsTab extends PluginSettingTab {
cb.setButtonText('Clear cache')
cb.onClick(async () => {
await database.clearCache()
- new Notice('Omnisearch - Cache cleared. Please restart Obsidian.')
})
})
}
@@ -499,9 +522,9 @@ export class SettingsTab extends PluginSettingTab {
cb.setLimits(1, 5, 0.1)
.setValue(settings[key])
.setDynamicTooltip()
- .onChange(v => {
+ .onChange(async (v) => {
settings[key] = v
- saveSettings(this.plugin)
+ await saveSettings(this.plugin)
})
}
}
@@ -516,6 +539,7 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = {
unsupportedFilesIndexing: 'no',
splitCamelCase: false,
openInNewPane: false,
+ vimLikeNavigationShortcut: app.vault.getConfig('vimMode') as boolean,
ribbonIcon: true,
showExcerpt: true,
diff --git a/src/tools/api.ts b/src/tools/api.ts
index 440f6e8..eae2d7b 100644
--- a/src/tools/api.ts
+++ b/src/tools/api.ts
@@ -1,7 +1,7 @@
import type { ResultNote } from '../globals'
import { Query } from '../search/query'
import { searchEngine } from '../search/omnisearch'
-import { makeExcerpt } from './utils'
+import { makeExcerpt } from './text-processing'
import { refreshIndex } from '../notes-index'
type ResultNoteApi = {
@@ -31,7 +31,7 @@ function mapResults(results: ResultNote[]): ResultNoteApi[] {
const excerpt = makeExcerpt(content, matches[0]?.offset ?? -1)
- return {
+ const res: ResultNoteApi = {
score,
path,
basename,
@@ -42,8 +42,10 @@ function mapResults(results: ResultNote[]): ResultNoteApi[] {
offset: match.offset,
}
}),
- excerpt,
+ excerpt: excerpt.content,
}
+
+ return res
})
}
diff --git a/src/tools/notes.ts b/src/tools/notes.ts
index 746cd16..2d61574 100644
--- a/src/tools/notes.ts
+++ b/src/tools/notes.ts
@@ -1,6 +1,6 @@
import { type CachedMetadata, MarkdownView, TFile } from 'obsidian'
-import { stringsToRegex } from './utils'
import type { ResultNote } from '../globals'
+import { stringsToRegex } from './text-processing'
export async function openNote(
item: ResultNote,
diff --git a/src/tools/text-processing.ts b/src/tools/text-processing.ts
new file mode 100644
index 0000000..197cce6
--- /dev/null
+++ b/src/tools/text-processing.ts
@@ -0,0 +1,204 @@
+import {
+ highlightClass,
+ type SearchMatch,
+ regexLineSplit,
+ regexYaml,
+ getChsSegmenter,
+ SPACE_OR_PUNCTUATION_UNIQUE,
+ regexStripQuotes,
+ excerptAfter,
+ excerptBefore,
+} from 'src/globals'
+import { settings } from 'src/settings'
+import { escapeRegex, warnDebug } from './utils'
+import type { Query } from 'src/search/query'
+import { Notice } from 'obsidian'
+
+export function highlighterGroups(_substring: string, ...args: any[]) {
+ // args[0] is the single char preceding args[1], which is the word we want to highlight
+ if (!!args[1].trim())
+ return `${args[0]}${args[1]}`
+ return '<no content>'
+}
+
+export function highlightText(text: string, matches: SearchMatch[]): string {
+ matches.forEach(matchInfo => {
+ const matchRegex = new RegExp(`\\b${matchInfo.match}\\b`, 'giu')
+ const matchOffsets = []
+
+ let match
+ while ((match = matchRegex.exec(text)) !== null) {
+ matchOffsets.push({ index: match.index, text: match[0] })
+ }
+
+ if (!matchOffsets.length) {
+ return text
+ }
+
+ const closestMatch = matchOffsets.reduce((prev, curr) => {
+ return Math.abs(curr.index - matchInfo.offset) <
+ Math.abs(prev.index - matchInfo.offset)
+ ? curr
+ : prev
+ })
+
+ if (matchOffsets.includes(closestMatch)) {
+ const originalMatch = closestMatch.text
+ text =
+ text.substring(0, closestMatch.index) +
+ `` +
+ originalMatch +
+ '' +
+ text.substring(closestMatch.index + originalMatch.length)
+ }
+ })
+
+ return text
+}
+
+export function escapeHTML(html: string): string {
+ return html
+ .replaceAll('&', '&')
+ .replaceAll('<', '<')
+ .replaceAll('>', '>')
+ .replaceAll('"', '"')
+ .replaceAll("'", ''')
+}
+
+export function splitLines(text: string): string[] {
+ return text.split(regexLineSplit).filter(l => !!l && l.length > 2)
+}
+
+export function removeFrontMatter(text: string): string {
+ // Regex to recognize YAML Front Matter (at beginning of file, 3 hyphens, than any charecter, including newlines, then 3 hyphens).
+ return text.replace(regexYaml, '')
+}
+
+/**
+ * Used to find excerpts in a note body, or select which words to highlight
+ */
+export function stringsToRegex(strings: string[]): RegExp {
+ if (!strings.length) return /^$/g
+
+ // sort strings by decreasing length, so that longer strings are matched first
+ strings.sort((a, b) => b.length - a.length)
+
+ const joined =
+ '(' +
+ // Default word split is not applied if the user uses the cm-chs-patch plugin
+ (getChsSegmenter()
+ ? ''
+ : // Split on start of line, spaces, punctuation, or capital letters (for camelCase)
+ // We also add the hyphen to the list of characters that can split words
+ settings.splitCamelCase
+ ? `^|${SPACE_OR_PUNCTUATION_UNIQUE.source}|\-|[A-Z]`
+ : `^|${SPACE_OR_PUNCTUATION_UNIQUE.source}|\-`) +
+ ')' +
+ `(${strings.map(s => escapeRegex(s)).join('|')})`
+
+ const reg = new RegExp(`${joined}`, 'gu')
+ return reg
+}
+
+export function getMatches(
+ text: string,
+ reg: RegExp,
+ query?: Query
+): SearchMatch[] {
+ text = text.toLowerCase()
+ const startTime = new Date().getTime()
+ let match: RegExpExecArray | null = null
+ let matches: SearchMatch[] = []
+ let count = 0
+ while ((match = reg.exec(text)) !== null) {
+ // Avoid infinite loops, stop looking after 100 matches or if we're taking too much time
+ if (++count >= 100 || new Date().getTime() - startTime > 50) {
+ warnDebug('Stopped getMatches at', count, 'results')
+ break
+ }
+ const m = match[2]
+ if (m && match.index >= 0) {
+ matches.push({ match: m, offset: match.index + 1 })
+ }
+ }
+
+ // If the query can be found "as is" in the text, put this match first
+ if (query) {
+ const best = text.indexOf(query.segmentsToStr())
+ if (best > -1 && matches.find(m => m.offset === best)) {
+ matches = matches.filter(m => m.offset !== best)
+ matches.unshift({
+ offset: best,
+ match: query.segmentsToStr(),
+ })
+ }
+ }
+
+ return matches
+}
+
+export function makeExcerpt(
+ content: string,
+ offset: number
+): { content: string; offset: number } {
+ try {
+ const pos = offset ?? -1
+ const from = Math.max(0, pos - excerptBefore)
+ const to = Math.min(content.length, pos + excerptAfter)
+ if (pos > -1) {
+ content =
+ (from > 0 ? '…' : '') +
+ content.slice(from, to).trim() +
+ (to < content.length - 1 ? '…' : '')
+ } else {
+ content = content.slice(0, excerptAfter)
+ }
+ if (settings.renderLineReturnInExcerpts) {
+ const lineReturn = new RegExp(/(?:\r\n|\r|\n)/g)
+ // Remove multiple line returns
+ content = content
+ .split(lineReturn)
+ .filter(l => l)
+ .join('\n')
+
+ const last = content.lastIndexOf('\n', pos - from)
+
+ if (last > 0) {
+ content = content.slice(last)
+ }
+ }
+
+ content = escapeHTML(content)
+
+ if (settings.renderLineReturnInExcerpts) {
+ content = content.trim().replaceAll('\n', '
')
+ }
+
+ return { content: content, offset: pos }
+ } catch (e) {
+ new Notice(
+ 'Omnisearch - Error while creating excerpt, see developer console'
+ )
+ console.error(`Omnisearch - Error while creating excerpt`)
+ console.error(e)
+ return { content: '', offset: -1 }
+ }
+}
+
+/**
+ * splits a string in words or "expressions in quotes"
+ * @param str
+ * @returns
+ */
+export function splitQuotes(str: string): string[] {
+ return (
+ str
+ .match(/"(.*?)"/g)
+ ?.map(s => s.replace(/"/g, ''))
+ .filter(q => !!q) ?? []
+ )
+}
+
+export function stripSurroundingQuotes(str: string): string {
+ return str.replace(regexStripQuotes, '')
+}
diff --git a/src/tools/utils.ts b/src/tools/utils.ts
index 49b11f1..50fabba 100644
--- a/src/tools/utils.ts
+++ b/src/tools/utils.ts
@@ -1,55 +1,17 @@
import {
type CachedMetadata,
getAllTags,
- Notice,
parseFrontMatterAliases,
Platform,
} from 'obsidian'
-import {
- excerptAfter,
- excerptBefore,
- getChsSegmenter,
- getTextExtractor,
- highlightClass,
- isSearchMatch,
- regexLineSplit,
- regexStripQuotes,
- regexYaml,
- SPACE_OR_PUNCTUATION,
- type SearchMatch,
-} from '../globals'
+import { getTextExtractor, isSearchMatch, type SearchMatch } from '../globals'
import { canIndexUnsupportedFiles, settings } from '../settings'
import { type BinaryLike, createHash } from 'crypto'
import { md5 } from 'pure-md5'
-export function highlighter(str: string): string {
- return `${str}`
-}
-
-export function highlighterGroups(_substring: string, ...args: any[]) {
- // args[0] is the single char preceding args[1], which is the word we want to highlight
- if (!!args[1].trim())
- return `${args[0]}${args[1]}`
- return '<no content>'
-}
-
-export function escapeHTML(html: string): string {
- return html
- .replaceAll('&', '&')
- .replaceAll('<', '<')
- .replaceAll('>', '>')
- .replaceAll('"', '"')
- .replaceAll("'", ''')
-}
-
-export function splitLines(text: string): string[] {
- return text.split(regexLineSplit).filter(l => !!l && l.length > 2)
-}
-
-export function removeFrontMatter(text: string): string {
- // Regex to recognize YAML Front Matter (at beginning of file, 3 hyphens, than any charecter, including newlines, then 3 hyphens).
- return text.replace(regexYaml, '')
-}
+// export function highlighter(str: string): string {
+// return `${str}`
+// }
export function pathWithoutFilename(path: string): string {
const split = path.split('/')
@@ -82,32 +44,6 @@ export function getAllIndices(text: string, regex: RegExp): SearchMatch[] {
.filter(isSearchMatch)
}
-/**
- * Used to find excerpts in a note body, or select which words to highlight
- */
-export function stringsToRegex(strings: string[]): RegExp {
- if (!strings.length) return /^$/g
-
- // sort strings by decreasing length, so that longer strings are matched first
- strings.sort((a, b) => b.length - a.length)
-
- const joined =
- '(' +
- // Default word split is not applied if the user uses the cm-chs-patch plugin
- (getChsSegmenter()
- ? ''
- : // Split on start of line, spaces, punctuation, or capital letters (for camelCase)
- // We also add the hyphen to the list of characters that can split words
- settings.splitCamelCase
- ? `^|${SPACE_OR_PUNCTUATION.source}|\-|[A-Z]`
- : `^|${SPACE_OR_PUNCTUATION.source}|\-`) +
- ')' +
- `(${strings.map(s => escapeRegex(s)).join('|')})`
-
- const reg = new RegExp(`${joined}`, 'giu')
- return reg
-}
-
export function extractHeadingsFromCache(
cache: CachedMetadata,
level: number
@@ -121,69 +57,6 @@ export function loopIndex(index: number, nbItems: number): number {
return (index + nbItems) % nbItems
}
-export function makeExcerpt(content: string, offset: number): string {
- try {
- const pos = offset ?? -1
- const from = Math.max(0, pos - excerptBefore)
- const to = Math.min(content.length, pos + excerptAfter)
- if (pos > -1) {
- content =
- (from > 0 ? '…' : '') +
- content.slice(from, to).trim() +
- (to < content.length - 1 ? '…' : '')
- } else {
- content = content.slice(0, excerptAfter)
- }
- if (settings.renderLineReturnInExcerpts) {
- const lineReturn = new RegExp(/(?:\r\n|\r|\n)/g)
- // Remove multiple line returns
- content = content
- .split(lineReturn)
- .filter(l => l)
- .join('\n')
-
- const last = content.lastIndexOf('\n', pos - from)
-
- if (last > 0) {
- content = content.slice(last)
- }
- }
-
- content = escapeHTML(content)
-
- if (settings.renderLineReturnInExcerpts) {
- content = content.trim().replaceAll('\n', '
')
- }
-
- return content
- } catch (e) {
- new Notice(
- 'Omnisearch - Error while creating excerpt, see developer console'
- )
- console.error(`Omnisearch - Error while creating excerpt`)
- console.error(e)
- return ''
- }
-}
-
-/**
- * splits a string in words or "expressions in quotes"
- * @param str
- * @returns
- */
-export function splitQuotes(str: string): string[] {
- return (
- str
- .match(/"(.*?)"/g)
- ?.map(s => s.replace(/"/g, ''))
- .filter(q => !!q) ?? []
- )
-}
-
-export function stripSurroundingQuotes(str: string): string {
- return str.replace(regexStripQuotes, '')
-}
-
function mapAsync(
array: T[],
callbackfn: (value: T, index: number, array: T[]) => Promise
@@ -281,9 +154,7 @@ export function isFilenameIndexable(path: string): boolean {
canIndexUnsupportedFiles() ||
isFilePlaintext(path) ||
isFileCanvas(path) ||
- isFileFromDataloomPlugin(path) ||
- isFilePDF(path) ||
- isFileImage(path)
+ isFileFromDataloomPlugin(path)
)
}
@@ -344,14 +215,15 @@ export function chunkArray(arr: T[], len: number): T[][] {
* @param text
*/
export function splitCamelCase(text: string): string[] {
- const split = text
+ // if no camel case found, do nothing
+ if (!/[a-z][A-Z]/.test(text)) {
+ return []
+ }
+ const splittedText = text
.replace(/([a-z](?=[A-Z]))/g, '$1 ')
.split(' ')
.filter(t => t)
- if (split.length > 1) {
- return split
- }
- return []
+ return splittedText
}
/**
@@ -360,11 +232,10 @@ export function splitCamelCase(text: string): string[] {
* @param text
*/
export function splitHyphens(text: string): string[] {
- const split = text.split('-').filter(t => t)
- if (split.length > 1) {
- return split
+ if (!text.includes('-')) {
+ return []
}
- return []
+ return text.split('-').filter(t => t)
}
export function logDebug(...args: any[]): void {
diff --git a/versions.json b/versions.json
index f3cd249..069ad90 100644
--- a/versions.json
+++ b/versions.json
@@ -112,5 +112,8 @@
"1.14.2": "1.0.0",
"1.15.0-beta.1": "1.0.0",
"1.15.0-beta.2": "1.0.0",
- "1.15.0": "1.0.0"
+ "1.15.0": "1.0.0",
+ "1.15.1": "1.3.0",
+ "1.16.0-beta.1": "1.3.0",
+ "1.16.0": "1.3.0"
}
\ No newline at end of file