diff --git a/README.md b/README.md index 258ce4e..3f15d01 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,8 @@ ![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/scambier/obsidian-omnisearch) ![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/scambier/obsidian-omnisearch?include_prereleases&label=BRAT%20beta) -> **Omnisearch** is a search engine that "_just works_". It always instantly shows you the most relevant results, thanks to its smart weighting algorithm. +> **Omnisearch** is a search engine that "_just works_". It always instantly shows you the most relevant results, thanks +> to its smart weighting algorithm. Under the hood, it uses the excellent [MiniSearch](https://github.com/lucaong/minisearch) library. @@ -17,9 +18,11 @@ Under the hood, it uses the excellent [MiniSearch](https://github.com/lucaong/mi > Omnisearch's first goal is to _locate_ files instantly. You can see it as a _Quick Switcher_ on steroids. - Find your **📝notes, 📄PDFs, and 🖼images** faster than ever - - _Images OCR and PDF indexing are only available on desktop_ -- Automatic document scoring using the [BM25 algorithm](https://github.com/lucaong/minisearch/issues/129#issuecomment-1046257399) - - The relevance of a document against a query depends on the number of times the query terms appear in the document, its filename, and its headings + - _Images OCR and PDF indexing are only available on desktop_ +- Automatic document scoring using + the [BM25 algorithm](https://github.com/lucaong/minisearch/issues/129#issuecomment-1046257399) + - The relevance of a document against a query depends on the number of times the query terms appear in the document, + its filename, and its headings - Keyboard first: you never have to use your mouse - Workflow similar to the "Quick Switcher" core plugin - Resistance to typos @@ -28,12 +31,15 @@ Under the hood, it uses the excellent [MiniSearch](https://github.com/lucaong/mi - Directly Insert a `[[link]]` from the search results - Supports Vim navigation keys -**Note:** support of Chinese, Japanese, Korean, etc. depends on [this additional plugin](https://github.com/aidenlx/cm-chs-patch). Please read its documentation for more information. +**Note:** support of Chinese, Japanese, Korean, etc. depends +on [this additional plugin](https://github.com/aidenlx/cm-chs-patch). Please read its documentation for more +information. ## Installation - Omnisearch is available on [the official Community Plugins repository](https://obsidian.md/plugins?search=Omnisearch). -- Beta releases can be installed through [BRAT](https://github.com/TfTHacker/obsidian42-brat). **Be advised that those versions can be buggy and break things.** +- Beta releases can be installed through [BRAT](https://github.com/TfTHacker/obsidian42-brat). **Be advised that those + versions can be buggy and break things.** Only install beta versions if you're willing to You can check the [CHANGELOG](./CHANGELOG.md) for more information on the different versions. @@ -43,14 +49,15 @@ Omnisearch can be used within 2 different contexts: ### Vault Search -Omnisearch's core feature, accessible with the Command Palette "**_Omnisearch: Vault search_**". This modal searches through your vault and returns the most relevant notes. That's all you need to _find_ a note. +Omnisearch's core feature, accessible with the Command Palette "**_Omnisearch: Vault search_**". This modal searches +through your vault and returns the most relevant notes. That's all you need to _find_ a note. If you want to list all the search matches of a single note, you can do so by using `tab` to open the In-File Search. -### In-File Search - -Also accessible through the Command Palette "**_Omnisearch: In-file search_**". This modal searches through the active note's content and lists the matching results. Just press enter to automatically scroll to the right place. +### In-File Search +Also accessible through the Command Palette "**_Omnisearch: In-file search_**". This modal searches through the active +note's content and lists the matching results. Just press enter to automatically scroll to the right place. ## URL Scheme & Public API @@ -58,9 +65,11 @@ You can open Omnisearch with the following scheme: `obsidian://omnisearch?query= ---- -For plugin developers and Dataview users, Omnisearch is also accessible through the global object `omnisearch` (`window.omnisearch`) +For plugin developers and Dataview users, Omnisearch is also accessible through the global +object `omnisearch` (`window.omnisearch`) -> This API is an experimental feature, the `ResultNote` interface may change in the future. The `search()` function returns at most 50 results. +> This API is an experimental feature, the `ResultNote` interface may change in the future. The `search()` function +> returns at most 50 results. ```ts // API: @@ -110,12 +119,13 @@ There are several CSS classes you can use to customize the appearance of Omnisea .omnisearch-input-field ``` -For example, if you'd like the usual yellow highlight on search matches, you can add this code inside a CSS snippet file: +For example, if you'd like the usual yellow highlight on search matches, you can add this code inside a CSS snippet +file: ```css .omnisearch-highlight { - color: var(--text-normal); - background-color: var(--text-highlight-bg); + color: var(--text-normal); + background-color: var(--text-highlight-bg); } ``` @@ -123,14 +133,27 @@ See [styles.css](./assets/styles.css) for more information. ## Issues & Solutions -**Omnisearch makes Obsidian sluggish at startup.** +**Omnisearch makes Obsidian sluggish/freeze at startup.** -- While Omnisearch does its best to work smoothly in the background, bigger vaults can cause some hiccups at startup because of the search index size. +- While Omnisearch does its best to work smoothly in the background, bigger vaults and files can make Obsidian stutter + during indexing. +- If you have several thousands of files, Obsidian may freeze a few seconds at startup while the Omnisearch cache is + loaded in memory. + +**Omnisearch is slow to index my PDFs and images** + +- The first time Omnisearch indexes those files, it needs to extract their text. This can take a long time, but + will only happen once. This process is also resumable, so you can temporarily disable PDFs/images indexing, or close + Obsidian without losing data. + +**Can I index PDFs/images on mobile?** + +- Not at the moment. On mobile devices, text extraction either doesn't work or consumes too much resources. **Omnisearch gives inconsistent/invalid results, there are errors in the developer console** - Restart Obsidian to force a reindex of Omnisearch. -- The cache can be corrupted; you can clear it at the bottom of the settings page, then restart Obsidian. +- The cache could be corrupted; you can clear it at the bottom of the settings page, then restart Obsidian. **A query should return a result that does not appear.** @@ -144,8 +167,8 @@ See [here](https://github.com/scambier/obsidian-omnisearch#css-customization). **I'm still having an issue** -You can write your issue [here](https://github.com/scambier/obsidian-omnisearch/issues) with as much details as possible. - +You can write your issue [here](https://github.com/scambier/obsidian-omnisearch/issues) with as much details as +possible. ## LICENSE @@ -153,6 +176,7 @@ Omnisearch is licensed under [GPL-3](https://tldrlegal.com/license/gnu-general-p ## Thanks -To all people who donate through [Ko-Fi](https://ko-fi.com/scambier) or [Github Sponsors](https://github.com/sponsors/scambier) ❤ +To all people who donate through [Ko-Fi](https://ko-fi.com/scambier) +or [Github Sponsors](https://github.com/sponsors/scambier) ❤ ![JetBrains Logo (Main) logo](https://resources.jetbrains.com/storage/products/company/brand/logos/jb_beam.svg) diff --git a/package.json b/package.json index ceaab65..572555a 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "scambier.obsidian-search", - "version": "1.8.1", + "version": "1.9.0-beta.1", "description": "A search engine for Obsidian", "main": "dist/main.js", "scripts": { diff --git a/src/cache-manager.ts b/src/cache-manager.ts index e1c21b0..3c805d6 100644 --- a/src/cache-manager.ts +++ b/src/cache-manager.ts @@ -1,5 +1,5 @@ import { Notice } from 'obsidian' -import type { IndexedDocument } from './globals' +import type { DocumentRef, IndexedDocument } from './globals' import { database } from './database' import type { AsPlainObject } from 'minisearch' import type MiniSearch from 'minisearch' @@ -53,7 +53,7 @@ class CacheManager { } public async getMinisearchCache(): Promise<{ - paths: { path: string; mtime: number }[] + paths: DocumentRef[] data: AsPlainObject } | null> { try { diff --git a/src/components/ModalVault.svelte b/src/components/ModalVault.svelte index e0244ef..ad240f0 100644 --- a/src/components/ModalVault.svelte +++ b/src/components/ModalVault.svelte @@ -38,17 +38,14 @@ case IndexingStepType.LoadingCache: indexingStepDesc = 'Loading cache...' break - case IndexingStepType.ReadingNotes: + case IndexingStepType.ReadingFiles: + indexingStepDesc = 'Reading files...' + break + case IndexingStepType.IndexingFiles: + indexingStepDesc = 'Indexing files...' + break + case IndexingStepType.WritingCache: updateResults() - indexingStepDesc = 'Reading notes...' - break - case IndexingStepType.ReadingPDFs: - indexingStepDesc = 'Reading PDFs...' - break - case IndexingStepType.ReadingImages: - indexingStepDesc = 'Reading images...' - break - case IndexingStepType.UpdatingCache: indexingStepDesc = 'Updating cache...' break default: diff --git a/src/database.ts b/src/database.ts index 05e10eb..e9ce767 100644 --- a/src/database.ts +++ b/src/database.ts @@ -1,6 +1,6 @@ import Dexie from 'dexie' import type { AsPlainObject } from 'minisearch' -import type { IndexedDocument } from './globals' +import type { DocumentRef, IndexedDocument } from './globals' export class OmnisearchCache extends Dexie { public static readonly dbVersion = 8 @@ -8,25 +8,11 @@ export class OmnisearchCache extends Dexie { private static instance: OmnisearchCache - //#region Table declarations - - /** - * @deprecated - */ - documents!: Dexie.Table< - { - path: string - mtime: number - document: IndexedDocument - }, - string - > - searchHistory!: Dexie.Table<{ id?: number; query: string }, number> minisearch!: Dexie.Table< { date: string - paths: Array<{ path: string; mtime: number }> + paths: DocumentRef[] data: AsPlainObject }, string @@ -37,7 +23,6 @@ export class OmnisearchCache extends Dexie { // Database structure this.version(OmnisearchCache.dbVersion).stores({ searchHistory: '++id', - documents: 'path', minisearch: 'date', }) } @@ -58,7 +43,6 @@ export class OmnisearchCache extends Dexie { console.log('Omnisearch - Those IndexedDb databases will be deleted:') for (const db of toDelete) { if (db.name) { - console.log(db.name + ' ' + db.version) indexedDB.deleteDatabase(db.name) } } diff --git a/src/globals.ts b/src/globals.ts index c5abc17..3c00145 100644 --- a/src/globals.ts +++ b/src/globals.ts @@ -20,12 +20,13 @@ export const EventNames = { export const enum IndexingStepType { Done, LoadingCache, - ReadingNotes, - ReadingPDFs, - ReadingImages, - UpdatingCache, + ReadingFiles, + IndexingFiles, + WritingCache, } +export type DocumentRef = { path: string; mtime: number } + export type IndexedDocument = { path: string basename: string @@ -51,7 +52,7 @@ export const isSearchMatch = (o: { offset?: number }): o is SearchMatch => { return o.offset !== undefined } -export const indexingStep = writable(IndexingStepType.LoadingCache) +export const indexingStep = writable(IndexingStepType.Done) export type ResultNote = { score: number diff --git a/src/main.ts b/src/main.ts index 0a4be6f..86d90c0 100644 --- a/src/main.ts +++ b/src/main.ts @@ -6,7 +6,13 @@ import { import { loadSettings, settings, SettingsTab, showExcerpt } from './settings' import { eventBus, EventNames, indexingStep, IndexingStepType } from './globals' import api from './tools/api' -import { isFileImage, isFilePDF, isFilePlaintext } from './tools/utils' +import { + isFileImage, + isFileIndexable, + isFilePDF, + isFilePlaintext, + wait, +} from './tools/utils' import { OmnisearchCache } from './database' import * as NotesIndex from './notes-index' import { searchEngine } from './search/omnisearch' @@ -53,7 +59,7 @@ export default class OmnisearchPlugin extends Plugin { // Listeners to keep the search index up-to-date this.registerEvent( this.app.vault.on('create', file => { - searchEngine.addFromPaths([file.path]) + searchEngine.addFromPaths([file.path], false) }) ) this.registerEvent( @@ -70,7 +76,7 @@ export default class OmnisearchPlugin extends Plugin { this.app.vault.on('rename', async (file, oldPath) => { if (file instanceof TFile && isFilePlaintext(file.path)) { searchEngine.removeFromPaths([oldPath]) - await searchEngine.addFromPaths([file.path]) + await searchEngine.addFromPaths([file.path], false) } }) ) @@ -78,7 +84,7 @@ export default class OmnisearchPlugin extends Plugin { await populateIndex() }) - showWelcomeNotice(this) + executeFirstLaunchTasks(this) } onunload(): void { @@ -104,61 +110,48 @@ export default class OmnisearchPlugin extends Plugin { */ async function populateIndex(): Promise { console.time('Omnisearch - Indexing total time') - - // // if not iOS, load data from cache - // if (!Platform.isIosApp) { - // engine = await SearchEngine.initFromCache() - // } - - // Load plaintext files - indexingStep.set(IndexingStepType.ReadingNotes) - console.log('Omnisearch - Reading notes') - const plainTextFiles = app.vault - .getFiles() - .filter(f => isFilePlaintext(f.path)) - .map(p => p.path) - await searchEngine.addFromPaths(plainTextFiles) - - let allFiles: string[] = [...plainTextFiles] - - // Load PDFs - if (settings.PDFIndexing) { - indexingStep.set(IndexingStepType.ReadingPDFs) - console.log('Omnisearch - Reading PDFs') - const pdfDocuments = app.vault - .getFiles() - .filter(f => isFilePDF(f.path)) - .map(p => p.path) - await searchEngine.addFromPaths(pdfDocuments) - // Add PDFs to the files list - allFiles = [...allFiles, ...pdfDocuments] - } - - // Load Images - if (settings.imagesIndexing) { - indexingStep.set(IndexingStepType.ReadingImages) - console.log('Omnisearch - Reading Images') - const imagesDocuments = app.vault - .getFiles() - .filter(f => isFileImage(f.path)) - .map(p => p.path) - await searchEngine.addFromPaths(imagesDocuments) - // Add Images to the files list - allFiles = [...allFiles, ...imagesDocuments] - } - - console.log('Omnisearch - Total number of files: ' + allFiles.length) - - // Load PDFs into the main search engine, and write cache - // SearchEngine.loadTmpDataIntoMain() - indexingStep.set(IndexingStepType.Done) - if (!Platform.isIosApp) { - console.log('Omnisearch - Writing cache...') + await searchEngine.loadCache() + } + + indexingStep.set(IndexingStepType.ReadingFiles) + const diff = searchEngine.getDiff( + app.vault + .getFiles() + .filter(f => isFileIndexable(f.path)) + .map(f => ({ path: f.path, mtime: f.stat.mtime })) + ) + + console.log( + 'Omnisearch - Total number of files to add/update: ' + diff.toAdd.length + ) + console.log( + 'Omnisearch - Total number of files to remove: ' + diff.toRemove.length + ) + + if (diff.toAdd.length >= 500) { + new Notice( + `Omnisearch - ${diff.toAdd.length} files need to be indexed. Obsidian may experience stutters and freezes during the process`, + 10_000 + ) + } + + indexingStep.set(IndexingStepType.IndexingFiles) + await searchEngine.removeFromPaths(diff.toRemove.map(o => o.path)) + await searchEngine.addFromPaths( + diff.toAdd.map(o => o.path), + true + ) + + if (diff.toRemove.length || diff.toAdd.length) { await searchEngine.writeToCache() } console.timeEnd('Omnisearch - Indexing total time') + if (diff.toAdd.length >= 500) { + new Notice(`Omnisearch - Your files have been indexed.`) + } + indexingStep.set(IndexingStepType.Done) } async function cleanOldCacheFiles() { @@ -179,7 +172,7 @@ async function cleanOldCacheFiles() { } } -function showWelcomeNotice(plugin: Plugin) { +function executeFirstLaunchTasks(plugin: Plugin) { const code = '1.8.0-beta.3' if (settings.welcomeMessage !== code) { const welcome = new DocumentFragment() diff --git a/src/notes-index.ts b/src/notes-index.ts index 524c488..6b2f243 100644 --- a/src/notes-index.ts +++ b/src/notes-index.ts @@ -43,7 +43,10 @@ export function markNoteForReindex(note: TAbstractFile): void { export async function refreshIndex(): Promise { const paths = [...notesToReindex].map(n => n.path) - searchEngine.removeFromPaths(paths) - searchEngine.addFromPaths(paths) - notesToReindex.clear() + if (paths.length) { + searchEngine.removeFromPaths(paths) + searchEngine.addFromPaths(paths, false) + notesToReindex.clear() + // console.log(`Omnisearch - Reindexed ${paths.length} file(s)`) + } } diff --git a/src/search/omnisearch.ts b/src/search/omnisearch.ts index 958e4db..e05505d 100644 --- a/src/search/omnisearch.ts +++ b/src/search/omnisearch.ts @@ -3,15 +3,26 @@ import MiniSearch, { type Options, type SearchResult, } from 'minisearch' -import type { IndexedDocument, ResultNote, SearchMatch } from '../globals' -import { chsRegex, SPACE_OR_PUNCTUATION } from '../globals' +import type { + DocumentRef, + IndexedDocument, + ResultNote, + SearchMatch, +} from '../globals' +import { + chsRegex, + indexingStep, + IndexingStepType, + SPACE_OR_PUNCTUATION, +} from '../globals' import { settings } from '../settings' import { + chunkArray, removeDiacritics, stringsToRegex, stripMarkdownCharacters, } from '../tools/utils' -import { Notice } from 'obsidian' +import { Notice, Platform } from 'obsidian' import { getIndexedDocument } from '../file-loader' import type { Query } from './query' import { cacheManager } from '../cache-manager' @@ -45,7 +56,8 @@ export class Omnisearch { logger(_level, _message, code) { if (code === 'version_conflict') { new Notice( - 'Omnisearch - Your index cache may be incorrect or corrupted. If this message keeps appearing, go to Settings to clear the cache.' + 'Omnisearch - Your index cache may be incorrect or corrupted. If this message keeps appearing, go to Settings to clear the cache.', + 5000 ) } }, @@ -59,6 +71,7 @@ export class Omnisearch { } async loadCache(): Promise { + indexingStep.set(IndexingStepType.LoadingCache) const cache = await cacheManager.getMinisearchCache() if (cache) { this.minisearch = MiniSearch.loadJS(cache.data, Omnisearch.options) @@ -66,11 +79,38 @@ export class Omnisearch { } } + /** + * Returns the list of documents that need to be reindexed + * @param docs + */ + getDiff(docs: DocumentRef[]): { + toAdd: DocumentRef[] + toRemove: DocumentRef[] + } { + const indexedArr = [...this.indexedDocuments] + const docsMap = new Map(docs.map(d => [d.path, d.mtime])) + + const toAdd = docs.filter( + d => + !this.indexedDocuments.has(d.path) || + this.indexedDocuments.get(d.path) !== d.mtime + ) + const toRemove = [...this.indexedDocuments] + .filter( + ([path, mtime]) => !docsMap.has(path) || docsMap.get(path) !== mtime + ) + .map(o => ({ path: o[0], mtime: o[1] })) + return { toAdd, toRemove } + } + /** * Add notes/PDFs/images to the search index * @param paths */ - public async addFromPaths(paths: string[]): Promise { + public async addFromPaths( + paths: string[], + writeToCache: boolean + ): Promise { let documents = await Promise.all( paths.map(async path => await getIndexedDocument(path)) ) @@ -80,8 +120,20 @@ export class Omnisearch { documents.filter(d => this.indexedDocuments.has(d.path)).map(d => d.path) ) - documents.forEach(doc => this.indexedDocuments.set(doc.path, doc.mtime)) - await this.minisearch.addAllAsync(documents) + // Split the documents in smaller chunks to regularly save the cache. + // If the user shuts off Obsidian mid-indexing, we at least saved some + const chunkedDocs = chunkArray(documents, 500) + for (const docs of chunkedDocs) { + indexingStep.set(IndexingStepType.IndexingFiles) + // Update the list of indexed docks + docs.forEach(doc => this.indexedDocuments.set(doc.path, doc.mtime)) + // Add docs to minisearch + await this.minisearch.addAllAsync(docs) + // Save the index + if (writeToCache) { + await this.writeToCache() + } + } } /** @@ -292,6 +344,10 @@ export class Omnisearch { } public async writeToCache(): Promise { + if (Platform.isIosApp) { + return + } + indexingStep.set(IndexingStepType.WritingCache) await cacheManager.writeMinisearchCache( this.minisearch, this.indexedDocuments diff --git a/src/settings.ts b/src/settings.ts index fdd651d..8a17de5 100644 --- a/src/settings.ts +++ b/src/settings.ts @@ -188,8 +188,7 @@ export class SettingsTab extends PluginSettingTab { new Setting(containerEl) .setName('Simpler search') .setDesc( - `When enabled, Omnisearch is a bit more restrictive when using your query terms as prefixes. - May return less results, but will be quicker. You should enable this if Omnisearch makes Obsidian freeze while searching.` + `Enable this if Obsidian often freezes while making searches. This will return more strict results.` ) .addToggle(toggle => toggle.setValue(settings.simpleSearch).onChange(async v => { @@ -235,7 +234,7 @@ export class SettingsTab extends PluginSettingTab { // Keep line returns in excerpts new Setting(containerEl) .setName('Render line return in excerpts') - .setDesc('Activate this option render line returns in result excerpts.') + .setDesc('Activate this option to render line returns in result excerpts.') .addToggle(toggle => toggle .setValue(settings.renderLineReturnInExcerpts) diff --git a/src/tools/utils.ts b/src/tools/utils.ts index b3f6c5b..e49bd5b 100644 --- a/src/tools/utils.ts +++ b/src/tools/utils.ts @@ -244,3 +244,15 @@ export function makeMD5(data: BinaryLike): string { } return createHash('md5').update(data).digest('hex') } + +export function chunkArray(arr: T[], len: number): T[][] { + var chunks = [], + i = 0, + n = arr.length + + while (i < n) { + chunks.push(arr.slice(i, (i += len))) + } + + return chunks +}