From 684070e4a4c05e9d9dd0bcd229332ca4413f0e07 Mon Sep 17 00:00:00 2001 From: Simon Cambier Date: Mon, 31 Oct 2022 21:15:15 +0100 Subject: [PATCH 1/3] Do not load tmp data if no pdf indexing --- src/main.ts | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main.ts b/src/main.ts index d14427a..bbc34bf 100644 --- a/src/main.ts +++ b/src/main.ts @@ -1,5 +1,5 @@ import { Notice, Plugin, TFile } from 'obsidian' -import {SearchEngine} from './search/search-engine' +import { SearchEngine } from './search/search-engine' import { OmnisearchInFileModal, OmnisearchVaultModal, @@ -126,10 +126,11 @@ async function populateIndex(): Promise { await tmpEngine.addAllToMinisearch(pdfs) console.log(`Omnisearch - Indexed ${pdfs.length} PDFs`) console.timeEnd('Omnisearch - Timing') - } - // Load PDFs into the main search engine, and write cache - SearchEngine.loadTmpDataIntoMain() + // Load PDFs into the main search engine, and write cache + SearchEngine.loadTmpDataIntoMain() + } + SearchEngine.isIndexing.set(false) await tmpEngine.writeToCache() From 777b172904eba7266c5576596653ddc040cdde05 Mon Sep 17 00:00:00 2001 From: Simon Cambier Date: Sat, 5 Nov 2022 14:30:08 +0100 Subject: [PATCH 2/3] 1.8.0-beta.1 --- manifest-beta.json | 2 +- package.json | 2 +- versions.json | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/manifest-beta.json b/manifest-beta.json index 768b303..8f9557a 100644 --- a/manifest-beta.json +++ b/manifest-beta.json @@ -1,7 +1,7 @@ { "id": "omnisearch", "name": "Omnisearch", - "version": "1.7.10", + "version": "1.8.0-beta.1", "minAppVersion": "1.0.0", "description": "A search engine that just works", "author": "Simon Cambier", diff --git a/package.json b/package.json index cc75b8d..695e3cb 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "scambier.obsidian-search", - "version": "1.7.10", + "version": "1.8.0-beta.1", "description": "A search engine for Obsidian", "main": "dist/main.js", "scripts": { diff --git a/versions.json b/versions.json index f46d8a8..de3d61c 100644 --- a/versions.json +++ b/versions.json @@ -68,5 +68,6 @@ "1.7.7": "1.0.0", "1.7.8": "1.0.0", "1.7.9": "1.0.0", - "1.7.10": "1.0.0" + "1.7.10": "1.0.0", + "1.8.0-beta.1": "1.0.0" } \ No newline at end of file From 087ec5cc990f6edf9797f3fd347281bd41d39b3b Mon Sep 17 00:00:00 2001 From: Simon Cambier Date: Sat, 5 Nov 2022 14:58:25 +0100 Subject: [PATCH 3/3] Squashed commit of the following: commit 3b229cad538ad88ef2d366964c4261bc0e02fb7c Author: Simon Cambier Date: Sat Nov 5 14:30:08 2022 +0100 1.8.0-beta.1 commit f43c369b2dd0a1083b171724e3f7466429505629 Author: Simon Cambier Date: Sat Nov 5 13:39:45 2022 +0100 Squashed commit of the following: commit 93508ee95046385baf62475e5bd835ed9fafe6d3 Author: Simon Cambier Date: Sat Nov 5 13:35:56 2022 +0100 Cleaning commit 205e6a7cce4c1939338820f366f7ae8a067ec7fb Author: Simon Cambier Date: Fri Nov 4 08:53:46 2022 +0100 Added logs commit ea19b94e164581829908ac71d09a60e230925a7f Author: Simon Cambier Date: Thu Nov 3 22:27:24 2022 +0100 Notices commit 53ff4e822b3c292a56da150b94a1cfe43e199d44 Author: Simon Cambier Date: Thu Nov 3 22:27:09 2022 +0100 Custom minisearch build + Notice when the cache could be corrupted commit 498408afd1c350dd68969318c3533fff8aa6c172 Author: Simon Cambier Date: Thu Nov 3 22:26:22 2022 +0100 Added a button to manually clear the cache commit 90afe5d3868989626ba4613b064e24ac7efa88be Author: Simon Cambier Date: Thu Nov 3 22:03:41 2022 +0100 Optimized loading minisearch from cache commit 719dcb9c82f09f56dabb828ac13c9c1db7f795bb Author: Simon Cambier Date: Thu Nov 3 21:43:49 2022 +0100 #92 - Refactored cache to make it behave like pre-indexedDb commit 2164ccfa39d83eef23231d01e8aa35ac30e0d31c Author: Simon Cambier Date: Wed Nov 2 23:13:59 2022 +0100 Removed cache & tmp engine commit 50eb33bbd4d074be9a9952eaf871cd8f58b327e6 Author: Simon Cambier Date: Wed Nov 2 22:56:04 2022 +0100 More efficient loading of PDFs commit a6342a675fb6726afdd45144afd33eead37bb04d Author: Simon Cambier Date: Wed Nov 2 10:34:02 2022 +0100 #120 - Cleaning of old cache databases commit b6890567f3b500cab518f4aaa0b5bb519588e08b Author: Simon Cambier Date: Mon Oct 31 17:28:17 2022 +0100 Updated Readme --- README.md | 19 ++-- package.json | 2 +- pnpm-lock.yaml | 14 +-- src/cache-manager.ts | 137 ++++++++++++++++++++++---- src/components/ResultItemVault.svelte | 2 +- src/database.ts | 67 +++++++++++-- src/file-loader.ts | 60 ++++++----- src/main.ts | 85 +++++++++++----- src/notes-index.ts | 14 +-- src/pdf/pdf-manager.ts | 4 +- src/search/search-engine.ts | 64 +++++------- src/settings.ts | 63 +++++++----- 12 files changed, 367 insertions(+), 164 deletions(-) diff --git a/README.md b/README.md index e1cdc17..826a593 100644 --- a/README.md +++ b/README.md @@ -14,17 +14,20 @@ Under the hood, it uses the excellent [MiniSearch](https://github.com/lucaong/mi ## Features +- Find your notes faster than ever + - Workflow similar to the "Quick Switcher" core plugin - Automatic document scoring using the [BM25 algorithm](https://github.com/lucaong/minisearch/issues/129#issuecomment-1046257399) - The relevance of a document against a query depends on the number of times the query terms appear in the document, its filename, and its headings -- Can search other plaintext files and PDFs (configurable in settings) -- Workflow similar to "Quick Switcher" plugins +- Can search other plaintext files and PDFs + - Opt-in in settings + - PDF indexing is disabled on iOS - Keyboard first: you never have to use your mouse - Resistance to typos - Switch between Vault and In-file search to quickly skim multiple results in a single note - Supports `"expressions in quotes"` and `-exclusions` - Directly Insert a `[[link]]` from the search results - Respects Obsidian's "Excluded Files" list - results are downranked, not hidden -- Optional support for Vim navigation keys (ctrl + j, k, n, p) +- Supports Vim navigation keys (ctrl + j, k, n, p) **Note:** support of Chinese, Japanese, Korean, etc. depends on [this additional plugin](https://github.com/aidenlx/cm-chs-patch). Please read its documentation for more information. @@ -121,17 +124,15 @@ See [styles.css](./assets/styles.css) for more information. **Omnisearch makes Obsidian sluggish at startup.** -- You may have _big_ documents. Huge notes (like novels) can freeze the interface for a short time when being indexed. Enabling the setting "_Persist cache on disk_" may help you in this case. +- You may have _big_ documents. Huge notes (like novels) can freeze the interface for a short time when being indexed. While Omnisearch uses a cache between sessions, it's still rebuilt at startup to keep it up-to-date. -**I have thousands of notes, and at startup I have to wait a few seconds before making a query, or else Omnisearch does not return all the expected results.** +**I have thousands of notes, and at startup I have to wait a few seconds before Omnisearch gives me the context of a result.** -- Enabling the setting "_Persist cache on disk_" may help you in this case. +- Omnisearch refreshes its index at startup. During this time, you can still find notes, but Omnisearch is not able to show you the excerpts. **Omnisearch gives inconsistent/invalid results, or there are errors in the developer console.** -- Go in Omnisearch settings. -- If applicable, disable and re-enable "*Persist cache on disk*". -- Restart Obsidian to clear the cache and force a reindex. +- Restart Obsidian to force a reindex of Omnisearch **A query should return a result that does not appear.** diff --git a/package.json b/package.json index 695e3cb..4c662c5 100644 --- a/package.json +++ b/package.json @@ -48,7 +48,7 @@ "@vanakat/plugin-api": "0.1.0", "dexie": "^3.2.2", "lodash-es": "4.17.21", - "minisearch": "5.0.0", + "minisearch": "github:scambier/minisearch#callback_desync", "p-limit": "^4.0.0", "pako": "^2.0.4", "pure-md5": "^0.1.14" diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 77da2fc..1110324 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -21,7 +21,7 @@ specifiers: dexie: ^3.2.2 jest: ^27.5.1 lodash-es: 4.17.21 - minisearch: 5.0.0 + minisearch: github:scambier/minisearch#callback_desync obsidian: latest p-limit: ^4.0.0 pako: ^2.0.4 @@ -45,7 +45,7 @@ dependencies: '@vanakat/plugin-api': 0.1.0 dexie: 3.2.2 lodash-es: 4.17.21 - minisearch: 5.0.0 + minisearch: github.com/scambier/minisearch/adf11cab46d851220a41c9ad95ed986b630f0f3c p-limit: 4.0.0 pako: 2.0.4 pure-md5: 0.1.14 @@ -3866,10 +3866,6 @@ packages: resolution: {integrity: sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==} dev: true - /minisearch/5.0.0: - resolution: {integrity: sha512-VEwBhl8aFtc2UG2XmP7a4XaZxVfNhe7GvB2W/ZRGbLL3P3LbBhkoOezBWsMqG8Mr5VonqXAMRWth79XXKja1bQ==} - dev: false - /mkdirp/0.5.6: resolution: {integrity: sha512-FP+p8RB8OWpF3YZBCrP5gtADmtXApB5AMLn+vdyA+PyxCjrCs00mjyUozssO33cwDeT3wNGdLxJ5M//YqtHAJw==} hasBin: true @@ -4951,3 +4947,9 @@ packages: resolution: {integrity: sha512-9bnSc/HEW2uRy67wc+T8UwauLuPJVn28jb+GtJY16iiKWyvmYJRXVT4UamsAEGQfPohgr2q4Tq0sQbQlxTfi1g==} engines: {node: '>=12.20'} dev: false + + github.com/scambier/minisearch/adf11cab46d851220a41c9ad95ed986b630f0f3c: + resolution: {tarball: https://codeload.github.com/scambier/minisearch/tar.gz/adf11cab46d851220a41c9ad95ed986b630f0f3c} + name: minisearch + version: 5.0.0 + dev: false diff --git a/src/cache-manager.ts b/src/cache-manager.ts index 51b2be3..18d1208 100644 --- a/src/cache-manager.ts +++ b/src/cache-manager.ts @@ -1,11 +1,13 @@ -import type { TFile } from 'obsidian' +import { Notice, type TFile } from 'obsidian' import type { IndexedDocument } from './globals' import { database } from './database' import MiniSearch from 'minisearch' import { minisearchOptions } from './search/search-engine' +import { makeMD5, wait } from './tools/utils' +import { settings } from './settings' class CacheManager { - private documentsCache: Map = new Map() + private liveDocuments: Map = new Map() /** * Show an empty input field next time the user opens Omnisearch modal */ @@ -35,48 +37,147 @@ class CacheManager { return data } - public async updateDocument(path: string, note: IndexedDocument) { - this.documentsCache.set(path, note) + /** + * Important: keep this method async for the day it _really_ becomes async. + * This will avoid a refactor. + * @param path + * @param note + */ + public async updateLiveDocument( + path: string, + note: IndexedDocument + ): Promise { + this.liveDocuments.set(path, note) } - public deleteDocument(key: string): void { - this.documentsCache.delete(key) + public deleteLiveDocument(key: string): void { + this.liveDocuments.delete(key) } - public getDocument(key: string): IndexedDocument | undefined { - return this.documentsCache.get(key) - } - - public getNonExistingNotesFromMemCache(): IndexedDocument[] { - return Object.values(this.documentsCache).filter(note => note.doesNotExist) + public getLiveDocument(key: string): IndexedDocument | undefined { + return this.liveDocuments.get(key) } public isDocumentOutdated(file: TFile): boolean { - const indexedNote = this.getDocument(file.path) + const indexedNote = this.getLiveDocument(file.path) return !indexedNote || indexedNote.mtime !== file.stat.mtime } //#region Minisearch + public getDocumentsChecksum(documents: IndexedDocument[]): string { + return makeMD5( + JSON.stringify( + documents.sort((a, b) => { + if (a.path < b.path) { + return -1 + } else if (a.path > b.path) { + return 1 + } + return 0 + }) + ) + ) + } + public async getMinisearchCache(): Promise { - const cache = (await database.minisearch.toArray())[0] - if (!cache) { + // Retrieve documents and make their checksum + const cachedDocs = await database.documents.toArray() + const checksum = this.getDocumentsChecksum(cachedDocs.map(d => d.document)) + + // Add those documents in the live cache + cachedDocs.forEach(doc => + cacheManager.updateLiveDocument(doc.path, doc.document) + ) + + // Retrieve the search cache, and verify the checksum + const cachedIndex = (await database.minisearch.toArray())[0] + if (cachedIndex?.checksum !== checksum) { + console.warn("Omnisearch - Cache - Checksums don't match, clearing cache") + // Invalid (or null) cache, clear everything + await database.minisearch.clear() + await database.documents.clear() return null } + try { - return MiniSearch.loadJSON(cache.data, minisearchOptions) + return MiniSearch.loadJS(cachedIndex.data, minisearchOptions) } catch (e) { + if (settings.showIndexingNotices) { + new Notice( + 'Omnisearch - Cache missing or invalid. Some freezes may occur while Omnisearch indexes your vault.' + ) + } console.error('Omnisearch - Error while loading Minisearch cache') console.error(e) return null } } - public async writeMinisearchCache(minisearch: MiniSearch): Promise { + /** + * Get a dict listing the deleted/added documents since last cache + * @param documents + */ + public async getDiffDocuments(documents: IndexedDocument[]): Promise<{ + toDelete: IndexedDocument[] + toAdd: IndexedDocument[] + toUpdate: { old: IndexedDocument; new: IndexedDocument }[] + }> { + let cachedDocs = await database.documents.toArray() + const toAdd = documents.filter( + d => !cachedDocs.find(c => c.path === d.path) + ) + const toDelete = cachedDocs + .filter(c => !documents.find(d => d.path === c.path)) + .map(d => d.document) + + const toUpdate = cachedDocs + .filter(c => + documents.find(d => d.path === c.path && d.mtime !== c.mtime) + ) + .map(c => ({ + old: c.document, + new: documents.find(d => d.path === c.path)!, + })) + + return { + toDelete, + toAdd, + toUpdate, + } + } + + public async writeMinisearchCache( + minisearch: MiniSearch, + documents: IndexedDocument[] + ): Promise { + const { toDelete, toAdd, toUpdate } = await this.getDiffDocuments(documents) + + // Delete + // console.log(`Omnisearch - Cache - Will delete ${toDelete.length} documents`) + await database.documents.bulkDelete(toDelete.map(o => o.path)) + + // Add + // console.log(`Omnisearch - Cache - Will add ${toAdd.length} documents`) + await database.documents.bulkAdd( + toAdd.map(o => ({ document: o, mtime: o.mtime, path: o.path })) + ) + + // Update + // console.log(`Omnisearch - Cache - Will update ${toUpdate.length} documents`) + await database.documents.bulkPut( + toUpdate.map(o => ({ + document: o.new, + mtime: o.new.mtime, + path: o.new.path, + })) + ) + await database.minisearch.clear() await database.minisearch.add({ date: new Date().toISOString(), - data: JSON.stringify(minisearch.toJSON()), + checksum: this.getDocumentsChecksum(documents), + data: minisearch.toJSON(), }) console.log('Omnisearch - Search cache written') } diff --git a/src/components/ResultItemVault.svelte b/src/components/ResultItemVault.svelte index e4230c3..6fdbb9c 100644 --- a/src/components/ResultItemVault.svelte +++ b/src/components/ResultItemVault.svelte @@ -10,7 +10,7 @@ $: reg = stringsToRegex(note.foundWords) $: cleanedContent = makeExcerpt(note.content, note.matches[0]?.offset ?? -1) - $: glyph = cacheManager.getDocument(note.path)?.doesNotExist + $: glyph = cacheManager.getLiveDocument(note.path)?.doesNotExist $: title = settings.showShortName ? note.basename : note.path diff --git a/src/database.ts b/src/database.ts index f26ee83..23b105d 100644 --- a/src/database.ts +++ b/src/database.ts @@ -1,21 +1,72 @@ import Dexie from 'dexie' +import type { AsPlainObject } from 'minisearch' +import type { IndexedDocument } from './globals' -class OmnisearchCache extends Dexie { - pdf!: Dexie.Table< - { path: string; hash: string; size: number; text: string }, +export class OmnisearchCache extends Dexie { + public static readonly dbVersion = 7 + public static readonly dbPrefix = 'omnisearch/cache/' + public static readonly dbName = OmnisearchCache.dbPrefix + app.appId + + private static instance: OmnisearchCache + + /** + * Deletes Omnisearch databases that have an older version than the current one + */ + public static async clearOldDatabases(): Promise { + const toDelete = (await indexedDB.databases()).filter( + db => + db.name?.startsWith(OmnisearchCache.dbPrefix) && + // version multiplied by 10 https://github.com/dexie/Dexie.js/issues/59 + db.version !== OmnisearchCache.dbVersion * 10 + ) + if (toDelete.length) { + console.log('Omnisearch - Those IndexedDb databases will be deleted:') + for (const db of toDelete) { + if (db.name) { + console.log(db.name + ' ' + db.version) + indexedDB.deleteDatabase(db.name) + } + } + } + } + + //#region Table declarations + + pdf!: Dexie.Table<{ path: string; hash: string; text: string }, string> + documents!: Dexie.Table< + { path: string; mtime: number; document: IndexedDocument }, string > searchHistory!: Dexie.Table<{ id?: number; query: string }, number> - minisearch!: Dexie.Table<{ date: string; data: string }, string> + minisearch!: Dexie.Table< + { date: string; checksum: string; data: AsPlainObject }, + string + > - constructor() { - super('omnisearch/cache/' + app.appId) - this.version(5).stores({ + //#endregion Table declarations + + public static getInstance() { + if (!OmnisearchCache.instance) { + OmnisearchCache.instance = new OmnisearchCache() + } + return OmnisearchCache.instance + } + + private constructor() { + super(OmnisearchCache.dbName) + // Database structure + this.version(OmnisearchCache.dbVersion).stores({ pdf: 'path, hash, size', searchHistory: '++id', + documents: 'path', minisearch: 'date', }) } + + public async clearCache() { + await this.minisearch.clear() + await this.documents.clear() + } } -export const database = new OmnisearchCache() +export const database = OmnisearchCache.getInstance() diff --git a/src/file-loader.ts b/src/file-loader.ts index 3b46ace..15451ab 100644 --- a/src/file-loader.ts +++ b/src/file-loader.ts @@ -11,6 +11,7 @@ import type { TFile } from 'obsidian' import type { IndexedDocument } from './globals' import { pdfManager } from './pdf/pdf-manager' import { getNonExistingNotes } from './tools/notes' +import { database } from './database' /** * Return all plaintext files as IndexedDocuments @@ -21,7 +22,7 @@ export async function getPlainTextFiles(): Promise { for (const file of allFiles) { const doc = await fileToIndexedDocument(file) data.push(doc) - await cacheManager.updateDocument(file.path, doc) + await cacheManager.updateLiveDocument(file.path, doc) } return data } @@ -31,15 +32,19 @@ export async function getPlainTextFiles(): Promise { * If a PDF isn't cached, it will be read from the disk and added to the IndexedDB */ export async function getPDFFiles(): Promise { - const allFiles = app.vault.getFiles().filter(f => f.path.endsWith('.pdf')) - const data: IndexedDocument[] = [] + const fromDisk = app.vault.getFiles().filter(f => f.path.endsWith('.pdf')) + const fromDb = await database.pdf.toArray() + const data: IndexedDocument[] = [] const input = [] - for (const file of allFiles) { + for (const file of fromDisk) { input.push( NotesIndex.processQueue(async () => { - const doc = await fileToIndexedDocument(file) - await cacheManager.updateDocument(file.path, doc) + const doc = await fileToIndexedDocument( + file, + fromDb.find(o => o.path === file.path)?.text + ) + await cacheManager.updateLiveDocument(file.path, doc) data.push(doc) }) ) @@ -52,38 +57,45 @@ export async function getPDFFiles(): Promise { * Convert a file into an IndexedDocument. * Will use the cache if possible. * @param file + * @param content If we give a text content, will skip the fetching part */ export async function fileToIndexedDocument( - file: TFile + file: TFile, + content?: string ): Promise { - let content: string - if (isFilePlaintext(file.path)) { - content = removeDiacritics(await app.vault.cachedRead(file)) - } else if (file.path.endsWith('.pdf')) { - content = removeDiacritics(await pdfManager.getPdfText(file)) - } else { - throw new Error('Invalid file: ' + file.path) + if (!content) { + if (isFilePlaintext(file.path)) { + content = await app.vault.cachedRead(file) + } else if (file.path.endsWith('.pdf')) { + content = await pdfManager.getPdfText(file) + } else { + throw new Error('Invalid file: ' + file.path) + } } content = removeDiacritics(content) const metadata = app.metadataCache.getFileCache(file) - // EXCALIDRAW - // Remove the json code - if (metadata?.frontmatter?.['excalidraw-plugin']) { - const comments = metadata.sections?.filter(s => s.type === 'comment') ?? [] - for (const { start, end } of comments.map(c => c.position)) { - content = content.substring(0, start.offset-1) + content.substring(end.offset) - } - } - // Look for links that lead to non-existing files, // and add them to the index. if (metadata) { const nonExisting = getNonExistingNotes(file, metadata) - for (const name of nonExisting.filter(o => !cacheManager.getDocument(o))) { + for (const name of nonExisting.filter( + o => !cacheManager.getLiveDocument(o) + )) { NotesIndex.addNonExistingToIndex(name, file.path) } + + // EXCALIDRAW + // Remove the json code + if (metadata.frontmatter?.['excalidraw-plugin']) { + const comments = + metadata.sections?.filter(s => s.type === 'comment') ?? [] + for (const { start, end } of comments.map(c => c.position)) { + content = + content.substring(0, start.offset - 1) + content.substring(end.offset) + } + } } return { diff --git a/src/main.ts b/src/main.ts index bbc34bf..58c80c7 100644 --- a/src/main.ts +++ b/src/main.ts @@ -1,4 +1,4 @@ -import { Notice, Plugin, TFile } from 'obsidian' +import { Notice, Platform, Plugin, TFile } from 'obsidian' import { SearchEngine } from './search/search-engine' import { OmnisearchInFileModal, @@ -11,17 +11,17 @@ import api from './tools/api' import { isFilePlaintext, wait } from './tools/utils' import * as NotesIndex from './notes-index' import * as FileLoader from './file-loader' +import { OmnisearchCache } from './database' +import { cacheManager } from './cache-manager' export default class OmnisearchPlugin extends Plugin { private ribbonButton?: HTMLElement async onload(): Promise { await cleanOldCacheFiles() + await OmnisearchCache.clearOldDatabases() await loadSettings(this) - // Initialize minisearch - await SearchEngine.initFromCache() - _registerAPI(this) if (settings.ribbonIcon) { @@ -105,37 +105,68 @@ export default class OmnisearchPlugin extends Plugin { * Read the files and feed them to Minisearch */ async function populateIndex(): Promise { - const tmpEngine = SearchEngine.getTmpEngine() + console.time('Omnisearch - Indexing duration') - // Load plain text files - console.time('Omnisearch - Timing') - const files = await FileLoader.getPlainTextFiles() - // Index them - await tmpEngine.addAllToMinisearch(files) - console.log(`Omnisearch - Indexed ${files.length} notes`) - console.timeEnd('Omnisearch - Timing') + // Initialize minisearch + let engine = SearchEngine.getEngine() - // Load normal notes into the main search engine - SearchEngine.loadTmpDataIntoMain() + // No cache for iOS + if (!Platform.isIosApp) { + engine = await SearchEngine.initFromCache() + } + + // Load plaintext files + const plainTextFiles = await FileLoader.getPlainTextFiles() + let allFiles = [...plainTextFiles] + // iOS: since there's no cache, directly index the documents + if (Platform.isIosApp) { + await wait(1000) + await engine.addAllToMinisearch(plainTextFiles) + } // Load PDFs if (settings.PDFIndexing) { - console.time('Omnisearch - Timing') const pdfs = await FileLoader.getPDFFiles() - // Index them - await tmpEngine.addAllToMinisearch(pdfs) - console.log(`Omnisearch - Indexed ${pdfs.length} PDFs`) - console.timeEnd('Omnisearch - Timing') - - // Load PDFs into the main search engine, and write cache - SearchEngine.loadTmpDataIntoMain() + // iOS: since there's no cache, just index the documents + if (Platform.isIosApp) { + await wait(1000) + await engine.addAllToMinisearch(pdfs) + } + // Add PDFs to the files list + allFiles = [...allFiles, ...pdfs] } - - SearchEngine.isIndexing.set(false) - await tmpEngine.writeToCache() - // Clear memory - SearchEngine.clearTmp() + // Other platforms: make a diff of what's to add/update/delete + if (!Platform.isIosApp) { + // Check which documents need to be removed/added/updated + const diffDocs = await cacheManager.getDiffDocuments(allFiles) + // Add + await engine.addAllToMinisearch(diffDocs.toAdd) + diffDocs.toAdd.forEach(doc => + cacheManager.updateLiveDocument(doc.path, doc) + ) + + // Delete + diffDocs.toDelete.forEach(d => engine.removeFromMinisearch(d)) + diffDocs.toDelete.forEach(doc => cacheManager.deleteLiveDocument(doc.path)) + + // Update (delete + add) + diffDocs.toUpdate + .map(d => d.old) + .forEach(d => { + engine.removeFromMinisearch(d) + cacheManager.updateLiveDocument(d.path, d) + }) + await engine.addAllToMinisearch(diffDocs.toUpdate.map(d => d.new)) + } + // Load PDFs into the main search engine, and write cache + // SearchEngine.loadTmpDataIntoMain() + SearchEngine.isIndexing.set(false) + if (!Platform.isIosApp) { + await SearchEngine.getEngine().writeToCache(allFiles) + } + + console.timeEnd('Omnisearch - Indexing duration') } async function cleanOldCacheFiles() { diff --git a/src/notes-index.ts b/src/notes-index.ts index cdfe781..28586ba 100644 --- a/src/notes-index.ts +++ b/src/notes-index.ts @@ -27,19 +27,19 @@ export async function addToIndexAndMemCache( // Check if the file was already indexed as non-existent. // If so, remove it from the index, and add it again as a real note. - if (cacheManager.getDocument(file.path)?.doesNotExist) { + if (cacheManager.getLiveDocument(file.path)?.doesNotExist) { removeFromIndex(file.path) } try { - if (cacheManager.getDocument(file.path)) { + if (cacheManager.getLiveDocument(file.path)) { throw new Error(`${file.basename} is already indexed`) } // Make the document and index it const note = await fileToIndexedDocument(file) SearchEngine.getEngine().addSingleToMinisearch(note) - await cacheManager.updateDocument(note.path, note) + await cacheManager.updateLiveDocument(note.path, note) } catch (e) { // console.trace('Error while indexing ' + file.basename) console.error(e) @@ -55,7 +55,7 @@ export async function addToIndexAndMemCache( export function addNonExistingToIndex(name: string, parent: string): void { name = removeAnchors(name) const filename = name + (name.endsWith('.md') ? '' : '.md') - if (cacheManager.getDocument(filename)) return + if (cacheManager.getLiveDocument(filename)) return const note: IndexedDocument = { path: filename, @@ -73,7 +73,7 @@ export function addNonExistingToIndex(name: string, parent: string): void { parent, } SearchEngine.getEngine().addSingleToMinisearch(note) - cacheManager.updateDocument(filename, note) + cacheManager.updateLiveDocument(filename, note) } /** @@ -84,10 +84,10 @@ export function removeFromIndex(path: string): void { console.info(`"${path}" is not an indexable file`) return } - const note = cacheManager.getDocument(path) + const note = cacheManager.getLiveDocument(path) if (note) { SearchEngine.getEngine().removeFromMinisearch(note) - cacheManager.deleteDocument(path) + cacheManager.deleteLiveDocument(path) // FIXME: only remove non-existing notes if they don't have another parent // cacheManager diff --git a/src/pdf/pdf-manager.ts b/src/pdf/pdf-manager.ts index aebd726..eacdd39 100644 --- a/src/pdf/pdf-manager.ts +++ b/src/pdf/pdf-manager.ts @@ -76,7 +76,7 @@ class PDFManager { // Add it to the cache database.pdf - .add({ hash, text, path: file.path, size: file.stat.size }) + .add({ hash, text, path: file.path }) .then(() => { resolve(text) }) @@ -84,7 +84,7 @@ class PDFManager { // In case of error (unreadable PDF or timeout) just add // an empty string to the cache database.pdf - .add({ hash, text: '', path: file.path, size: file.stat.size }) + .add({ hash, text: '', path: file.path }) .then(() => { resolve('') }) diff --git a/src/search/search-engine.ts b/src/search/search-engine.ts index de41ecd..e5319ce 100644 --- a/src/search/search-engine.ts +++ b/src/search/search-engine.ts @@ -1,8 +1,4 @@ -import MiniSearch, { - type AsPlainObject, - type Options, - type SearchResult, -} from 'minisearch' +import MiniSearch, { type Options, type SearchResult } from 'minisearch' import { chsRegex, type IndexedDocument, @@ -19,6 +15,7 @@ import type { Query } from './query' import { settings } from '../settings' import { cacheManager } from '../cache-manager' import { writable } from 'svelte/store' +import { Notice } from 'obsidian' const tokenize = (text: string): string[] => { const tokens = text.split(SPACE_OR_PUNCTUATION) @@ -45,11 +42,15 @@ export const minisearchOptions: Options = { 'headings3', ], storeFields: ['tags'], + callbackWhenDesync() { + new Notice( + 'Omnisearch - Your index cache may be incorrect or corrupted. If this message keeps appearing, go to Settings to clear the cache.' + ) + }, } export class SearchEngine { private static engine?: SearchEngine - private static tmpEngine?: SearchEngine public static isIndexing = writable(true) /** @@ -63,41 +64,23 @@ export class SearchEngine { return this.engine } - /** - * The secondary instance. This one is indexed in the background, - * while the main instance is quickly filled with cache data - */ - public static getTmpEngine(): SearchEngine { - if (!this.tmpEngine) { - this.tmpEngine = new SearchEngine() - } - return this.tmpEngine - } - /** * Instantiates the main instance with cache data (if it exists) */ - public static async initFromCache(): Promise { + public static async initFromCache(): Promise { try { const cache = await cacheManager.getMinisearchCache() if (cache) { this.getEngine().minisearch = cache } } catch (e) { + new Notice( + 'Omnisearch - Cache missing or invalid. Some freezes may occur while Omnisearch indexes your vault.' + ) + console.error('Omnisearch - Could not init engine from cache') console.error(e) } - } - - /** - * Loads the freshest indexed data into the main instance. - */ - public static loadTmpDataIntoMain(): void { - const tmpData = this.getTmpEngine().minisearch.toJSON() - this.getEngine().minisearch = MiniSearch.loadJS(tmpData, minisearchOptions) - } - - public static clearTmp(): void { - this.getTmpEngine().minisearch = new MiniSearch(minisearchOptions) + return this.getEngine() } private minisearch: MiniSearch @@ -147,9 +130,10 @@ export class SearchEngine { const exactTerms = query.getExactTerms() if (exactTerms.length) { results = results.filter(r => { - const title = cacheManager.getDocument(r.id)?.path.toLowerCase() ?? '' + const title = + cacheManager.getLiveDocument(r.id)?.path.toLowerCase() ?? '' const content = stripMarkdownCharacters( - cacheManager.getDocument(r.id)?.content ?? '' + cacheManager.getLiveDocument(r.id)?.content ?? '' ).toLowerCase() return exactTerms.every(q => content.includes(q) || title.includes(q)) }) @@ -160,7 +144,7 @@ export class SearchEngine { if (exclusions.length) { results = results.filter(r => { const content = stripMarkdownCharacters( - cacheManager.getDocument(r.id)?.content ?? '' + cacheManager.getLiveDocument(r.id)?.content ?? '' ).toLowerCase() return exclusions.every(q => !content.includes(q.value)) }) @@ -240,9 +224,10 @@ export class SearchEngine { // Map the raw results to get usable suggestions return results.map(result => { - let note = cacheManager.getDocument(result.id) + let note = cacheManager.getLiveDocument(result.id) if (!note) { // throw new Error(`Omnisearch - Note "${result.id}" not indexed`) + console.warn(`Omnisearch - Note "${result.id}" not in the live cache`) note = { content: '', basename: result.id, @@ -286,8 +271,11 @@ export class SearchEngine { // #region Read/write minisearch index - public async addAllToMinisearch(documents: IndexedDocument[]): Promise { - await this.minisearch.addAllAsync(documents) + public async addAllToMinisearch( + documents: IndexedDocument[], + chunkSize = 10 + ): Promise { + await this.minisearch.addAllAsync(documents, { chunkSize }) } public addSingleToMinisearch(document: IndexedDocument): void { @@ -300,7 +288,7 @@ export class SearchEngine { // #endregion - public async writeToCache(): Promise { - await cacheManager.writeMinisearchCache(this.minisearch) + public async writeToCache(documents: IndexedDocument[]): Promise { + await cacheManager.writeMinisearchCache(this.minisearch, documents) } } diff --git a/src/settings.ts b/src/settings.ts index 36a7b01..67619a1 100644 --- a/src/settings.ts +++ b/src/settings.ts @@ -1,4 +1,5 @@ import { + Notice, Platform, Plugin, PluginSettingTab, @@ -6,6 +7,7 @@ import { SliderComponent, } from 'obsidian' import { writable } from 'svelte/store' +import { database } from './database' import type OmnisearchPlugin from './main' interface WeightingSettings { @@ -143,11 +145,10 @@ export class SettingsTab extends PluginSettingTab { }) ) - // PDF Indexing - disabled on iOS - if (!Platform.isIosApp) { - const indexPDFsDesc = new DocumentFragment() - indexPDFsDesc.createSpan({}, span => { - span.innerHTML = `Omnisearch will include PDFs in search results. + // PDF Indexing + const indexPDFsDesc = new DocumentFragment() + indexPDFsDesc.createSpan({}, span => { + span.innerHTML = `Omnisearch will include PDFs in search results.
  • ⚠️ Depending on their size, PDFs can take anywhere from a few seconds to 2 minutes to be processed.
  • ⚠️ Texts extracted from PDFs may contain errors such as missing spaces, or spaces in the middle of words.
  • @@ -155,17 +156,17 @@ export class SettingsTab extends PluginSettingTab {
  • This feature is currently a work-in-progress, please report issues that you might experience.
Needs a restart to fully take effect.` - }) - new Setting(containerEl) - .setName('BETA - PDF Indexing') - .setDesc(indexPDFsDesc) - .addToggle(toggle => - toggle.setValue(settings.PDFIndexing).onChange(async v => { - settings.PDFIndexing = v - await saveSettings(this.plugin) - }) - ) - } + }) + new Setting(containerEl) + .setName('BETA - PDF Indexing') + .setDesc(indexPDFsDesc) + .addToggle(toggle => + toggle.setValue(settings.PDFIndexing).onChange(async v => { + settings.PDFIndexing = v + await saveSettings(this.plugin) + }) + ) + // #endregion Behavior // #region User Interface @@ -276,6 +277,29 @@ export class SettingsTab extends PluginSettingTab { .addSlider(cb => this.weightSlider(cb, 'weightH3')) // #endregion Results Weighting + + // #region Danger Zone + + new Setting(containerEl).setName('Danger Zone').setHeading() + + const resetCacheDesc = new DocumentFragment() + resetCacheDesc.createSpan({}, span => { + span.innerHTML = `Erase all Omnisearch cache data. + Use this if Omnisearch results are inconsistent, missing, or appear outdated.
+ Needs a restart to fully take effect.` + }) + new Setting(containerEl) + .setName('Clear cache data') + .setDesc(resetCacheDesc) + .addButton(cb => { + cb.setButtonText('Clear cache') + cb.onClick(async () => { + await database.clearCache() + new Notice('Omnisearch - Cache cleared. Please restart Obsidian.') + }) + }) + + //#endregion Danger Zone } weightSlider(cb: SliderComponent, key: keyof WeightingSettings): void { @@ -317,8 +341,6 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = { weightH2: 1.3, weightH3: 1.1, - // persistCache: false, - welcomeMessage: '', } as const @@ -327,11 +349,6 @@ export let settings = Object.assign({}, DEFAULT_SETTINGS) as OmnisearchSettings export async function loadSettings(plugin: Plugin): Promise { settings = Object.assign({}, DEFAULT_SETTINGS, await plugin.loadData()) - // Make sure that PDF indexing is disabled on iOS - if (Platform.isIosApp) { - settings.PDFIndexing = false - } - showExcerpt.set(settings.showExcerpt) }