From c1b3c6d0ec11ef83cb820113d8837fc1a6cf006c Mon Sep 17 00:00:00 2001 From: Simon Cambier Date: Fri, 20 Jan 2023 22:56:02 +0100 Subject: [PATCH] Fixed potential indexing issues --- src/cache-manager.ts | 14 +++--- src/main.ts | 100 +++++++++++++++++++++------------------ src/search/omnisearch.ts | 14 +++++- src/tools/utils.ts | 5 +- 4 files changed, 78 insertions(+), 55 deletions(-) diff --git a/src/cache-manager.ts b/src/cache-manager.ts index cb9f7f7..7e48fda 100644 --- a/src/cache-manager.ts +++ b/src/cache-manager.ts @@ -60,12 +60,10 @@ async function getAndMapIndexedDocument( } // ** Image or PDF ** - if (extractor) { - if (extractor.canFileBeExtracted(path)) { - content = await extractor.extractText(file) - } else { - throw new Error('Invalid file format: ' + file.path) - } + else if (extractor?.canFileBeExtracted(path)) { + content = await extractor.extractText(file) + } else { + throw new Error(`Unsupported file type: "${path}"`) } if (content === null || content === undefined) { @@ -125,6 +123,10 @@ class CacheManager { */ private documents: Map = new Map() + /** + * Set or update the live cache with the content of the given file. + * @param path + */ public async addToLiveCache(path: string): Promise { try { const doc = await getAndMapIndexedDocument(path) diff --git a/src/main.ts b/src/main.ts index e089de0..524bb73 100644 --- a/src/main.ts +++ b/src/main.ts @@ -64,41 +64,41 @@ export default class OmnisearchPlugin extends Plugin { }, }) - // Listeners to keep the search index up-to-date - this.registerEvent( - this.app.vault.on('create', async file => { - if (isFileIndexable(file.path)) { - await cacheManager.addToLiveCache(file.path) - searchEngine.addFromPaths([file.path]) - } - }) - ) - this.registerEvent( - this.app.vault.on('delete', file => { - cacheManager.removeFromLiveCache(file.path) - searchEngine.removeFromPaths([file.path]) - }) - ) - this.registerEvent( - this.app.vault.on('modify', async file => { - if (isFileIndexable(file.path)) { - await cacheManager.addToLiveCache(file.path) - NotesIndex.markNoteForReindex(file) - } - }) - ) - this.registerEvent( - this.app.vault.on('rename', async (file, oldPath) => { - if (isFileIndexable(file.path)) { - cacheManager.removeFromLiveCache(oldPath) - cacheManager.addToLiveCache(file.path) - searchEngine.removeFromPaths([oldPath]) - await searchEngine.addFromPaths([file.path]) - } - }) - ) - app.workspace.onLayoutReady(async () => { + // Listeners to keep the search index up-to-date + this.registerEvent( + this.app.vault.on('create', file => { + if (isFileIndexable(file.path)) { + // await cacheManager.addToLiveCache(file.path) + searchEngine.addFromPaths([file.path]) + } + }) + ) + this.registerEvent( + this.app.vault.on('delete', file => { + cacheManager.removeFromLiveCache(file.path) + searchEngine.removeFromPaths([file.path]) + }) + ) + this.registerEvent( + this.app.vault.on('modify', async file => { + if (isFileIndexable(file.path)) { + await cacheManager.addToLiveCache(file.path) + NotesIndex.markNoteForReindex(file) + } + }) + ) + this.registerEvent( + this.app.vault.on('rename', async (file, oldPath) => { + if (isFileIndexable(file.path)) { + cacheManager.removeFromLiveCache(oldPath) + cacheManager.addToLiveCache(file.path) + searchEngine.removeFromPaths([oldPath]) + await searchEngine.addFromPaths([file.path]) + } + }) + ) + this.executeFirstLaunchTasks() await this.populateIndex() }) @@ -145,30 +145,38 @@ export default class OmnisearchPlugin extends Plugin { indexingStep.set(IndexingStepType.ReadingFiles) const files = app.vault.getFiles().filter(f => isFileIndexable(f.path)) console.log(`Omnisearch - ${files.length} files total`) - + console.log( + `Omnisearch - Cache is ${isCacheEnabled() ? 'enabled' : 'disabled'}` + ) // Map documents in the background // Promise.all(files.map(f => cacheManager.addToLiveCache(f.path))) if (isCacheEnabled()) { console.time('Omnisearch - Loading index from cache') indexingStep.set(IndexingStepType.LoadingCache) - await searchEngine.loadCache() - console.timeEnd('Omnisearch - Loading index from cache') + const hasCache = await searchEngine.loadCache() + if (hasCache) { + console.timeEnd('Omnisearch - Loading index from cache') + } } const diff = searchEngine.getDiff( files.map(f => ({ path: f.path, mtime: f.stat.mtime })) ) - if (diff.toAdd.length) { - console.log( - 'Omnisearch - Total number of files to add/update: ' + diff.toAdd.length - ) - } - if (diff.toRemove.length) { - console.log( - 'Omnisearch - Total number of files to remove: ' + diff.toRemove.length - ) + if (isCacheEnabled()) { + if (diff.toAdd.length) { + console.log( + 'Omnisearch - Total number of files to add/update: ' + + diff.toAdd.length + ) + } + if (diff.toRemove.length) { + console.log( + 'Omnisearch - Total number of files to remove: ' + + diff.toRemove.length + ) + } } if (diff.toAdd.length >= 1000 && isCacheEnabled()) { diff --git a/src/search/omnisearch.ts b/src/search/omnisearch.ts index 4fc0ad0..df88b74 100644 --- a/src/search/omnisearch.ts +++ b/src/search/omnisearch.ts @@ -16,6 +16,7 @@ import { import { Notice, Platform } from 'obsidian' import type { Query } from './query' import { cacheManager } from '../cache-manager' +import { sortBy } from 'lodash-es' const tokenize = (text: string): string[] => { const tokens = text.split(SPACE_OR_PUNCTUATION) @@ -59,12 +60,19 @@ export class Omnisearch { this.minisearch = new MiniSearch(Omnisearch.options) } - async loadCache(): Promise { + /** + * Return true if the cache is valid + */ + async loadCache(): Promise { const cache = await cacheManager.getMinisearchCache() if (cache) { + // console.log('Omnisearch - Cache', cache) this.minisearch = MiniSearch.loadJS(cache.data, Omnisearch.options) this.indexedDocuments = new Map(cache.paths.map(o => [o.path, o.mtime])) + return true } + console.log('Omnisearch - No cache found') + return false } /** @@ -77,11 +85,13 @@ export class Omnisearch { } { const docsMap = new Map(docs.map(d => [d.path, d.mtime])) + // console.log(this.indexedDocuments) const toAdd = docs.filter( d => !this.indexedDocuments.has(d.path) || this.indexedDocuments.get(d.path) !== d.mtime ) + // console.log(toAdd) const toRemove = [...this.indexedDocuments] .filter( ([path, mtime]) => !docsMap.has(path) || docsMap.get(path) !== mtime @@ -100,6 +110,8 @@ export class Omnisearch { paths.map(async path => await cacheManager.getDocument(path)) ) ).filter(d => !!d?.path) + // Index markdown files first + documents = sortBy(documents, d => (d.path.endsWith('.md') ? 0 : 1)) // If a document is already added, discard it this.removeFromPaths( diff --git a/src/tools/utils.ts b/src/tools/utils.ts index 02f20f9..fb392c9 100644 --- a/src/tools/utils.ts +++ b/src/tools/utils.ts @@ -237,8 +237,9 @@ export function getCtrlKeyLabel(): 'ctrl' | '⌘' { } export function isFileIndexable(path: string): boolean { - const canIndexPDF = !!getTextExtractor() && settings.PDFIndexing - const canIndexImages = !!getTextExtractor() && settings.imagesIndexing + const hasTextExtractor = !!getTextExtractor() + const canIndexPDF = hasTextExtractor && settings.PDFIndexing + const canIndexImages = hasTextExtractor && settings.imagesIndexing return ( isFilePlaintext(path) || isFileCanvas(path) ||