diff --git a/README.md b/README.md index 8ab79c1..e1cdc17 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ Under the hood, it uses the excellent [MiniSearch](https://github.com/lucaong/mi - Automatic document scoring using the [BM25 algorithm](https://github.com/lucaong/minisearch/issues/129#issuecomment-1046257399) - The relevance of a document against a query depends on the number of times the query terms appear in the document, its filename, and its headings -- Can search other plaintext files (configurable in settings) +- Can search other plaintext files and PDFs (configurable in settings) - Workflow similar to "Quick Switcher" plugins - Keyboard first: you never have to use your mouse - Resistance to typos @@ -117,6 +117,37 @@ For example, if you'd like the usual yellow highlight on search matches, you can See [styles.css](./assets/styles.css) for more information. +## Issues & Solutions + +**Omnisearch makes Obsidian sluggish at startup.** + +- You may have _big_ documents. Huge notes (like novels) can freeze the interface for a short time when being indexed. Enabling the setting "_Persist cache on disk_" may help you in this case. + +**I have thousands of notes, and at startup I have to wait a few seconds before making a query, or else Omnisearch does not return all the expected results.** + +- Enabling the setting "_Persist cache on disk_" may help you in this case. + +**Omnisearch gives inconsistent/invalid results, or there are errors in the developer console.** + +- Go in Omnisearch settings. +- If applicable, disable and re-enable "*Persist cache on disk*". +- Restart Obsidian to clear the cache and force a reindex. + +**A query should return a result that does not appear.** + +- If applicable, make sure that "*Ignore diacritics*" is enabled. +- If you have modified them, reset weightings to their original values. +- Rewrite your query and avoid numbers and common words. + +**How do I highlight matches in search results?** + +See [here](https://github.com/scambier/obsidian-omnisearch#css-customization). + +**I'm still having an issue** + +You can write your issue [here](https://github.com/scambier/obsidian-omnisearch/issues) with as much details as possible. + + ## LICENSE Omnisearch is licensed under [GPL-3](https://tldrlegal.com/license/gnu-general-public-license-v3-(gpl-3)). diff --git a/package.json b/package.json index 840fa10..b9dc6fc 100644 --- a/package.json +++ b/package.json @@ -45,9 +45,10 @@ }, "dependencies": { "@vanakat/plugin-api": "0.1.0", + "dexie": "^3.2.2", "lodash-es": "4.17.21", "minisearch": "5.0.0", - "p-queue-compat": "1.0.187", + "p-limit": "^4.0.0", "pako": "^2.0.4", "pure-md5": "^0.1.14" }, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 27380d9..224c131 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1,4 +1,4 @@ -lockfileVersion: 5.3 +lockfileVersion: 5.4 overrides: moment@>=2.18.0 <2.29.4: '>=2.29.4' @@ -18,11 +18,12 @@ specifiers: '@vanakat/plugin-api': 0.1.0 babel-jest: ^27.5.1 builtin-modules: ^3.3.0 + dexie: ^3.2.2 jest: ^27.5.1 lodash-es: 4.17.21 minisearch: 5.0.0 obsidian: latest - p-queue-compat: 1.0.187 + p-limit: ^4.0.0 pako: ^2.0.4 prettier: ^2.7.1 prettier-plugin-svelte: ^2.8.0 @@ -41,9 +42,10 @@ specifiers: dependencies: '@vanakat/plugin-api': 0.1.0 + dexie: 3.2.2 lodash-es: 4.17.21 minisearch: 5.0.0 - p-queue-compat: 1.0.187 + p-limit: 4.0.0 pako: 2.0.4 pure-md5: 0.1.14 @@ -52,7 +54,7 @@ devDependencies: '@babel/preset-typescript': 7.18.6 '@rollup/plugin-commonjs': 23.0.0_rollup@2.79.1 '@rollup/plugin-node-resolve': 13.3.0_rollup@2.79.1 - '@rollup/plugin-typescript': 8.5.0_9b51dbafa6663640b0d4c612e14590fb + '@rollup/plugin-typescript': 8.5.0_tni5xl5gmy3ebmguyyjocrmq7m '@testing-library/jest-dom': 5.16.5 '@tsconfig/svelte': 3.0.0 '@types/jest': 27.5.2 @@ -64,16 +66,16 @@ devDependencies: jest: 27.5.1 obsidian: 0.16.3 prettier: 2.7.1 - prettier-plugin-svelte: 2.8.0_prettier@2.7.1+svelte@3.51.0 + prettier-plugin-svelte: 2.8.0_ibge6ami6vq2q2j5g4rcvk62hq rollup: 2.79.1 rollup-plugin-base64: 1.0.1_rollup@2.79.1 rollup-plugin-copy: 3.4.0 - rollup-plugin-svelte: 7.1.0_rollup@2.79.1+svelte@3.51.0 + rollup-plugin-svelte: 7.1.0_yotnjasp5pi6hr7nvksbt64bqu rollup-plugin-terser: 7.0.2_rollup@2.79.1 rollup-plugin-web-worker-loader: 1.6.1_rollup@2.79.1 svelte: 3.51.0 svelte-jester: 2.3.2_jest@27.5.1+svelte@3.51.0 - svelte-preprocess: 4.10.7_svelte@3.51.0+typescript@4.8.4 + svelte-preprocess: 4.10.7_c36sqhgzdfaw4kmxogryyrffx4 tslib: 2.3.1 typescript: 4.8.4 @@ -385,6 +387,8 @@ packages: resolution: {integrity: sha512-qpVT7gtuOLjWeDTKLkJ6sryqLliBaFpAtGeqw5cs5giLldvh+Ch0plqnUMKoVAUS6ZEueQQiZV+p5pxtPitEsA==} engines: {node: '>=6.0.0'} hasBin: true + dependencies: + '@babel/types': 7.19.4 dev: true /@babel/plugin-bugfix-safari-id-destructuring-collision-in-function-expression/7.18.6: @@ -1676,7 +1680,7 @@ packages: rollup: 2.79.1 dev: true - /@rollup/plugin-typescript/8.5.0_9b51dbafa6663640b0d4c612e14590fb: + /@rollup/plugin-typescript/8.5.0_tni5xl5gmy3ebmguyyjocrmq7m: resolution: {integrity: sha512-wMv1/scv0m/rXx21wD2IsBbJFba8wGF3ErJIr6IKRfRj49S85Lszbxb4DCo8iILpluTjk2GAAu9CoZt4G3ppgQ==} engines: {node: '>=8.0.0'} peerDependencies: @@ -1904,7 +1908,7 @@ packages: '@types/yargs-parser': 21.0.0 dev: true - /@typescript-eslint/eslint-plugin/5.40.0_98a53d32d22651d8b7bb9311d3e58ca3: + /@typescript-eslint/eslint-plugin/5.40.0_tcst2mwsezi5rn53smi5hzmmum: resolution: {integrity: sha512-FIBZgS3DVJgqPwJzvZTuH4HNsZhHMa9SjxTKAZTlMsPw/UzpEjcf9f4dfgDJEHjK+HboUJo123Eshl6niwEm/Q==} engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0} peerDependencies: @@ -2030,7 +2034,7 @@ packages: /@vanakat/plugin-api/0.1.0: resolution: {integrity: sha512-IxylWsxwkz1Knx/1/+8AkiKB7LWhGmiGX+j0rNiPMB5faw7AcSBBikxyq3gbOnbfwhaVPiQzJVIxKQcCPewMIw==} dependencies: - '@typescript-eslint/eslint-plugin': 5.40.0_98a53d32d22651d8b7bb9311d3e58ca3 + '@typescript-eslint/eslint-plugin': 5.40.0_tcst2mwsezi5rn53smi5hzmmum '@typescript-eslint/parser': 5.40.0_typescript@4.4.4 obsidian: 0.14.8 typescript: 4.4.4 @@ -2575,6 +2579,11 @@ packages: engines: {node: '>=8'} dev: true + /dexie/3.2.2: + resolution: {integrity: sha512-q5dC3HPmir2DERlX+toCBbHQXW5MsyrFqPFcovkH9N2S/UW/H3H5AWAB6iEOExeraAu+j+zRDG+zg/D7YhH0qg==} + engines: {node: '>=6.0'} + dev: false + /diff-sequences/27.5.1: resolution: {integrity: sha512-k1gCAXAsNgLwEL+Y8Wvl+M6oEFj5bgazfZULpS5CneoPPXRaCCW7dm+q21Ky2VEE5X+VeRDBVg1Pcvvsr4TtNQ==} engines: {node: ^10.13.0 || ^12.13.0 || ^14.15.0 || >=15.0.0} @@ -2714,10 +2723,6 @@ packages: engines: {node: '>=0.10.0'} dev: true - /eventemitter3/4.0.7: - resolution: {integrity: sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==} - dev: false - /execa/5.1.1: resolution: {integrity: sha512-8uSpZZocAZRBAPIEINJj3Lo9HyGitllczc27Eh5YYojjMFMn8yHMDMaUHE2Jqfq05D/wucwI4JGURyXt1vchyg==} engines: {node: '>=10'} @@ -3935,6 +3940,13 @@ packages: p-try: 2.2.0 dev: true + /p-limit/4.0.0: + resolution: {integrity: sha512-5b0R4txpzjPWVw/cXXUResoD4hb6U/x9BH08L7nw+GN1sezDzPdxeRvpc9c433fZhBan/wusjbCsqwqm4EIBIQ==} + engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} + dependencies: + yocto-queue: 1.0.0 + dev: false + /p-locate/4.1.0: resolution: {integrity: sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A==} engines: {node: '>=8'} @@ -3942,19 +3954,6 @@ packages: p-limit: 2.3.0 dev: true - /p-queue-compat/1.0.187: - resolution: {integrity: sha512-5cbNNLxU3IlYmVpSD3/qvgSzdj0zbLBoAVlEyX0FS6Atbb734jvpqKgGYTQcywEZBq1Xmm/fAUvco6gKhUMCcw==} - engines: {node: '>=12'} - dependencies: - eventemitter3: 4.0.7 - p-timeout-compat: 1.0.2 - dev: false - - /p-timeout-compat/1.0.2: - resolution: {integrity: sha512-64XPDh1d1D+isdMup2O+MWAmRpYYoQ8WtGY7aM7IgDWkQ5tftavm3qn+3TMswa+Nj7Mlltr0GCe3APVPdOw1Rw==} - engines: {node: '>=12'} - dev: false - /p-try/2.2.0: resolution: {integrity: sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==} engines: {node: '>=6'} @@ -4026,7 +4025,7 @@ packages: engines: {node: '>= 0.8.0'} dev: true - /prettier-plugin-svelte/2.8.0_prettier@2.7.1+svelte@3.51.0: + /prettier-plugin-svelte/2.8.0_ibge6ami6vq2q2j5g4rcvk62hq: resolution: {integrity: sha512-QlXv/U3bUszks3XYDPsk1fsaQC+fo2lshwKbcbO+lrSVdJ+40mB1BfL8OCAk1W9y4pJxpqO/4gqm6NtF3zNGCw==} peerDependencies: prettier: ^1.16.4 || ^2.0.0 @@ -4222,7 +4221,7 @@ packages: is-plain-object: 3.0.1 dev: true - /rollup-plugin-svelte/7.1.0_rollup@2.79.1+svelte@3.51.0: + /rollup-plugin-svelte/7.1.0_yotnjasp5pi6hr7nvksbt64bqu: resolution: {integrity: sha512-vopCUq3G+25sKjwF5VilIbiY6KCuMNHP1PFvx2Vr3REBNMDllKHFZN2B9jwwC+MqNc3UPKkjXnceLPEjTjXGXg==} engines: {node: '>=10'} peerDependencies: @@ -4481,7 +4480,7 @@ packages: svelte: 3.51.0 dev: true - /svelte-preprocess/4.10.7_svelte@3.51.0+typescript@4.8.4: + /svelte-preprocess/4.10.7_c36sqhgzdfaw4kmxogryyrffx4: resolution: {integrity: sha512-sNPBnqYD6FnmdBrUmBCaqS00RyCsCpj2BG58A1JBswNF7b0OKviwxqVrOL/CKyJrLSClrSeqQv5BXNg2RUbPOw==} engines: {node: '>= 9.11.2'} requiresBuild: true @@ -4851,3 +4850,8 @@ packages: y18n: 5.0.8 yargs-parser: 20.2.9 dev: true + + /yocto-queue/1.0.0: + resolution: {integrity: sha512-9bnSc/HEW2uRy67wc+T8UwauLuPJVn28jb+GtJY16iiKWyvmYJRXVT4UamsAEGQfPohgr2q4Tq0sQbQlxTfi1g==} + engines: {node: '>=12.20'} + dev: false diff --git a/src/cache-manager.ts b/src/cache-manager.ts index aa4cc10..2932084 100644 --- a/src/cache-manager.ts +++ b/src/cache-manager.ts @@ -5,12 +5,12 @@ import { deflate, inflate } from 'pako' import { notesCacheFilePath, minisearchCacheFilePath, - type IndexedNote, + type IndexedDocument, } from './globals' import { settings } from './settings' class CacheManager { - notesCache: Record = {} + notesCache: Record = {} compress = true writeInterval = 5_000 // In milliseconds @@ -94,7 +94,7 @@ class CacheManager { console.log('Omnisearch - Notes cache saved on disk') } - public addNoteToCache(path: string, note: IndexedNote) { + public addNoteToCache(path: string, note: IndexedDocument) { this.notesCache[path] = note this.saveNotesCache() } @@ -103,11 +103,11 @@ class CacheManager { delete this.notesCache[key] } - public getNoteFromCache(key: string): IndexedNote | undefined { + public getNoteFromCache(key: string): IndexedDocument | undefined { return this.notesCache[key] } - public getNonExistingNotesFromCache(): IndexedNote[] { + public getNonExistingNotesFromCache(): IndexedDocument[] { return Object.values(this.notesCache).filter(note => note.doesNotExist) } diff --git a/src/components/InputSearch.svelte b/src/components/InputSearch.svelte index ca6bcc7..370adce 100644 --- a/src/components/InputSearch.svelte +++ b/src/components/InputSearch.svelte @@ -20,7 +20,7 @@ const debouncedOnInput = debounce(() => { dispatch('input', value) - }, 100) + }, 250)
diff --git a/src/database.ts b/src/database.ts new file mode 100644 index 0000000..97cd728 --- /dev/null +++ b/src/database.ts @@ -0,0 +1,17 @@ +import Dexie from 'dexie' + +class OmnisearchCache extends Dexie { + pdf!: Dexie.Table< + { path: string; hash: string; size: number; text: string }, + string + > + + constructor() { + super(app.appId + '_omnisearch') + this.version(1).stores({ + pdf: 'path, hash, size, text', + }) + } +} + +export const database = new OmnisearchCache() diff --git a/src/globals.ts b/src/globals.ts index 0a19881..4679cfa 100644 --- a/src/globals.ts +++ b/src/globals.ts @@ -14,14 +14,13 @@ export const eventBus = new EventBus() export const minisearchCacheFilePath = `${app.vault.configDir}/plugins/omnisearch/searchIndex.data` export const notesCacheFilePath = `${app.vault.configDir}/plugins/omnisearch/notesCache.data` -export const pdfCacheFilePath = `${app.vault.configDir}/plugins/omnisearch/pdfCache.data` export const historyFilePath = `${app.vault.configDir}/plugins/omnisearch/historyCache.json` export const EventNames = { ToggleExcerpts: 'toggle-excerpts', } as const -export type IndexedNote = { +export type IndexedDocument = { path: string basename: string mtime: number diff --git a/src/main.ts b/src/main.ts index d068810..3a91805 100644 --- a/src/main.ts +++ b/src/main.ts @@ -9,7 +9,6 @@ import { loadSearchHistory } from './search-history' import { isFilePlaintext } from './utils' import * as NotesIndex from './notes-index' import { cacheManager } from './cache-manager' -import { pdfManager } from './pdf-manager' function _registerAPI(plugin: OmnisearchPlugin): void { registerAPI('omnisearch', api, plugin as any) @@ -25,7 +24,6 @@ export default class OmnisearchPlugin extends Plugin { await loadSettings(this) await loadSearchHistory() await cacheManager.loadNotesCache() - await pdfManager.loadPDFCache() _registerAPI(this) @@ -91,7 +89,7 @@ export default class OmnisearchPlugin extends Plugin { onunload(): void { console.log('Omnisearch - Interrupting PDF indexing') - NotesIndex.pdfQueue.pause() + NotesIndex.pdfQueue.clearQueue() } addRibbonButton(): void { @@ -102,17 +100,17 @@ export default class OmnisearchPlugin extends Plugin { } async function cleanOldCacheFiles() { - const oldSearchIndexFilePath = `${app.vault.configDir}/plugins/omnisearch/searchIndex.json` - if (await app.vault.adapter.exists(oldSearchIndexFilePath)) { - try { - await app.vault.adapter.remove(oldSearchIndexFilePath) - } catch (e) {} - } - const oldNnotesCacheFilePath = `${app.vault.configDir}/plugins/omnisearch/notesCache.json` - if (await app.vault.adapter.exists(oldNnotesCacheFilePath)) { - try { - await app.vault.adapter.remove(oldNnotesCacheFilePath) - } catch (e) {} + const toDelete = [ + `${app.vault.configDir}/plugins/omnisearch/searchIndex.json`, + `${app.vault.configDir}/plugins/omnisearch/notesCache.json`, + `${app.vault.configDir}/plugins/omnisearch/pdfCache.data` + ] + for (const item of toDelete) { + if (await app.vault.adapter.exists(item)) { + try { + await app.vault.adapter.remove(item) + } catch (e) {} + } } } diff --git a/src/notes-index.ts b/src/notes-index.ts index 87c16a1..419f00f 100644 --- a/src/notes-index.ts +++ b/src/notes-index.ts @@ -4,23 +4,19 @@ import { getAliasesFromMetadata, getTagsFromMetadata, isFileIndexable, - isFilePlaintext, removeDiacritics, wait, } from './utils' import { getNonExistingNotes, removeAnchors } from './notes' -import * as PDF from './pdf-manager' -import type { IndexedNote } from './globals' +import { pdfManager } from './pdf-manager' +import type { IndexedDocument } from './globals' import { settings } from './settings' import * as Search from './search' -import PQueue from 'p-queue-compat' +// import PQueue from 'p-queue-compat' +import pLimit from 'p-limit' import { cacheManager } from './cache-manager' -let isIndexChanged: boolean - -export const pdfQueue = new PQueue({ - concurrency: settings.backgroundProcesses, -}) +export const pdfQueue = pLimit(settings.backgroundProcesses) /** * Adds a file to the index @@ -59,14 +55,14 @@ export async function addToIndexAndCache(file: TAbstractFile): Promise { let content if (file.path.endsWith('.pdf')) { - content = removeDiacritics(await PDF.pdfManager.getPdfText(file as TFile)) + content = removeDiacritics(await pdfManager.getPdfText(file as TFile)) } else { // Fetch content from the cache to index it as-is content = removeDiacritics(await app.vault.cachedRead(file)) } // Make the document and index it - const note: IndexedNote = { + const note: IndexedDocument = { basename: removeDiacritics(file.basename), content, path: file.path, @@ -86,7 +82,6 @@ export async function addToIndexAndCache(file: TAbstractFile): Promise { } Search.minisearchInstance.add(note) - isIndexChanged = true cacheManager.addNoteToCache(note.path, note) } catch (e) { console.trace('Error while indexing ' + file.basename) @@ -105,12 +100,13 @@ export function addNonExistingToIndex(name: string, parent: string): void { const filename = name + (name.endsWith('.md') ? '' : '.md') if (cacheManager.getNoteFromCache(filename)) return - const note = { + const note: IndexedDocument = { path: filename, basename: name, mtime: 0, content: '', + tags: [], aliases: '', headings1: '', headings2: '', @@ -118,9 +114,8 @@ export function addNonExistingToIndex(name: string, parent: string): void { doesNotExist: true, parent, - } as IndexedNote + } Search.minisearchInstance.add(note) - isIndexChanged = true cacheManager.addNoteToCache(filename, note) } @@ -129,14 +124,13 @@ export function addNonExistingToIndex(name: string, parent: string): void { * @param path */ export function removeFromIndex(path: string): void { - if (!isFilePlaintext(path)) { + if (!isFileIndexable(path)) { console.info(`"${path}" is not an indexable file`) return } const note = cacheManager.getNoteFromCache(path) if (note) { Search.minisearchInstance.remove(note) - isIndexChanged = true cacheManager.removeNoteFromCache(path) cacheManager .getNonExistingNotesFromCache() @@ -175,21 +169,30 @@ export async function indexPDFs() { const files = app.vault.getFiles().filter(f => f.path.endsWith('.pdf')) console.time('PDF Indexing') console.log(`Omnisearch - Indexing ${files.length} PDFs`) + const input = [] for (const file of files) { if (cacheManager.getNoteFromCache(file.path)) { removeFromIndex(file.path) } - pdfQueue.add(async () => { - await addToIndexAndCache(file) - await cacheManager.writeMinisearchIndex(Search.minisearchInstance) - }) + input.push( + pdfQueue(async () => { + await addToIndexAndCache(file) + await cacheManager.writeMinisearchIndex(Search.minisearchInstance) + }) + ) + // pdfQueue.add(async () => { + // await addToIndexAndCache(file) + // await cacheManager.writeMinisearchIndex(Search.minisearchInstance) + // }) } - - await pdfQueue.onEmpty() + await Promise.all(input) + // await pdfQueue.onEmpty() console.timeEnd('PDF Indexing') if (settings.showIndexingNotices) { new Notice(`Omnisearch - Indexed ${files.length} PDFs`) } + + await pdfManager.cleanCache() } } diff --git a/src/pdf-manager.ts b/src/pdf-manager.ts index b1f296e..9e0f301 100644 --- a/src/pdf-manager.ts +++ b/src/pdf-manager.ts @@ -1,51 +1,58 @@ import type { TFile } from 'obsidian' -import PQueue from 'p-queue-compat' import PDFWorker from 'web-worker:./pdf-worker.ts' -import { pdfCacheFilePath } from './globals' -import { deflate, inflate } from 'pako' import { makeMD5 } from './utils' +import { database } from './database' class PDFManager { - private cache: Map = new Map() - private serializeQueue = new PQueue({ concurrency: 1 }) - - public async loadPDFCache(): Promise { - if (await app.vault.adapter.exists(pdfCacheFilePath)) { - try { - const data = await app.vault.adapter.readBinary(pdfCacheFilePath) - const json = new TextDecoder('utf8').decode(inflate(data)) - this.cache = new Map(JSON.parse(json)) - } catch (e) { - console.error(e) - this.cache = new Map() - } - } - } - public async getPdfText(file: TFile): Promise { + // 1) Check if we can find by path & size + const docByPath = await database.pdf.get({ + path: file.path, + size: file.stat.size, + }) + + if (docByPath) { + return docByPath.text + } + + // 2) Check by hash const data = new Uint8Array(await app.vault.readBinary(file)) const hash = makeMD5(data) - if (this.cache.has(hash)) { - return this.cache.get(hash)!.content + const docByHash = await database.pdf.get(hash) + if (docByHash) { + return docByHash.text } + // 3) The PDF is not cached, extract it const worker = new PDFWorker({ name: 'PDF Text Extractor' }) return new Promise(async (resolve, reject) => { // @ts-ignore + file.stat.size worker.postMessage({ data, name: file.basename }) worker.onmessage = (evt: any) => { - const txt = evt.data.text - this.updatePDFCache(hash, txt) - resolve(txt) + const text = (evt.data.text as string) + // Replace \n with spaces + .replace(/\n/g, ' ') + // Trim multiple spaces + .replace(/ +/g, ' ') + .trim() + database.pdf + .add({ hash, text, path: file.path, size: file.stat.size }) + .then(() => { + resolve(text) + }) } }) } - private async updatePDFCache(hash: string, content: string): Promise { - this.serializeQueue.add(() => { - this.cache.set(hash, { content }) - const data = deflate(JSON.stringify(Array.from(this.cache), null, 1)) - app.vault.adapter.writeBinary(pdfCacheFilePath, data as any) + /** + * Removes the outdated cache entries + */ + public async cleanCache(): Promise { + database.pdf.each(async item => { + if (!(await app.vault.adapter.exists(item.path))) { + console.log(item.path + ' does not exist') + } }) } } diff --git a/src/search.ts b/src/search.ts index a630c89..7f3d41b 100644 --- a/src/search.ts +++ b/src/search.ts @@ -2,33 +2,25 @@ import { Notice } from 'obsidian' import MiniSearch, { type Options, type SearchResult } from 'minisearch' import { chsRegex, - type IndexedNote, + type IndexedDocument, type ResultNote, minisearchCacheFilePath, type SearchMatch, SPACE_OR_PUNCTUATION, } from './globals' import { - isFileIndexable, isFilePlaintext, removeDiacritics, stringsToRegex, stripMarkdownCharacters, - wait, } from './utils' import type { Query } from './query' import { settings } from './settings' -// import { -// getNoteFromCache, -// isCacheOutdated, -// loadNotesCache, -// resetNotesCache, -// } from './notes' import * as NotesIndex from './notes-index' -import PQueue from 'p-queue-compat' +import pLimit from 'p-limit' import { cacheManager } from './cache-manager' -export let minisearchInstance: MiniSearch +export let minisearchInstance: MiniSearch const tokenize = (text: string): string[] => { const tokens = text.split(SPACE_OR_PUNCTUATION) @@ -46,7 +38,7 @@ const tokenize = (text: string): string[] => { * and adds all the notes to the index */ export async function initGlobalSearchIndex(): Promise { - const options: Options = { + const options: Options = { tokenize, processTerm: (term: string) => (settings.ignoreDiacritics ? removeDiacritics(term) : term).toLowerCase(), @@ -106,15 +98,16 @@ export async function initGlobalSearchIndex(): Promise { } // Read and index all the files into the search engine - const queue = new PQueue({ concurrency: 10 }) + const queue = pLimit(10) + const input = [] for (const file of files) { if (cacheManager.getNoteFromCache(file.path)) { NotesIndex.removeFromIndex(file.path) } - queue.add(() => NotesIndex.addToIndexAndCache(file)) + input.push(queue(() => NotesIndex.addToIndexAndCache(file))) } - await queue.onEmpty() + await Promise.all(input) if (files.length > 0) { const message = `Omnisearch - Indexed ${files.length} ${notesSuffix} in ${ diff --git a/src/settings.ts b/src/settings.ts index 5a25a5a..e7cc060 100644 --- a/src/settings.ts +++ b/src/settings.ts @@ -173,12 +173,15 @@ export class SettingsTab extends PluginSettingTab { }) ) - // PDF Indexing - not available on mobile + // PDF Indexing const indexPDFsDesc = new DocumentFragment() indexPDFsDesc.createSpan({}, span => { span.innerHTML = `Omnisearch will include PDFs in search results. - This feature is currently a work-in-progress, please report slowdowns or issues that you might experience.
- Each PDF can take a few seconds to be indexed, so it may not appear immediately in search results.
+
    +
  • ⚠️ Texts extracted from PDFs may contain errors such as missing spaces, or spaces in the middle of words.
  • +
  • This feature is currently a work-in-progress, please report issues that you might experience.
  • +
  • Each PDF can take a few seconds to be indexed, so it may not appear immediately in search results.
  • +
Needs a restart to fully take effect.` }) new Setting(containerEl) diff --git a/src/typings/types-obsidian.d.ts b/src/typings/types-obsidian.d.ts index 5cc7d8f..b6c9eee 100644 --- a/src/typings/types-obsidian.d.ts +++ b/src/typings/types-obsidian.d.ts @@ -19,6 +19,10 @@ declare module 'obsidian' { interface Vault { getConfig(string): unknown } + + interface App { + appId: string + } }