Fixed potential indexing issues

This commit is contained in:
Simon Cambier
2023-01-20 22:56:02 +01:00
parent e243c32a8d
commit c1b3c6d0ec
4 changed files with 78 additions and 55 deletions

View File

@@ -60,12 +60,10 @@ async function getAndMapIndexedDocument(
} }
// ** Image or PDF ** // ** Image or PDF **
if (extractor) { else if (extractor?.canFileBeExtracted(path)) {
if (extractor.canFileBeExtracted(path)) { content = await extractor.extractText(file)
content = await extractor.extractText(file) } else {
} else { throw new Error(`Unsupported file type: "${path}"`)
throw new Error('Invalid file format: ' + file.path)
}
} }
if (content === null || content === undefined) { if (content === null || content === undefined) {
@@ -125,6 +123,10 @@ class CacheManager {
*/ */
private documents: Map<string, IndexedDocument> = new Map() private documents: Map<string, IndexedDocument> = new Map()
/**
* Set or update the live cache with the content of the given file.
* @param path
*/
public async addToLiveCache(path: string): Promise<void> { public async addToLiveCache(path: string): Promise<void> {
try { try {
const doc = await getAndMapIndexedDocument(path) const doc = await getAndMapIndexedDocument(path)

View File

@@ -64,41 +64,41 @@ export default class OmnisearchPlugin extends Plugin {
}, },
}) })
// Listeners to keep the search index up-to-date
this.registerEvent(
this.app.vault.on('create', async file => {
if (isFileIndexable(file.path)) {
await cacheManager.addToLiveCache(file.path)
searchEngine.addFromPaths([file.path])
}
})
)
this.registerEvent(
this.app.vault.on('delete', file => {
cacheManager.removeFromLiveCache(file.path)
searchEngine.removeFromPaths([file.path])
})
)
this.registerEvent(
this.app.vault.on('modify', async file => {
if (isFileIndexable(file.path)) {
await cacheManager.addToLiveCache(file.path)
NotesIndex.markNoteForReindex(file)
}
})
)
this.registerEvent(
this.app.vault.on('rename', async (file, oldPath) => {
if (isFileIndexable(file.path)) {
cacheManager.removeFromLiveCache(oldPath)
cacheManager.addToLiveCache(file.path)
searchEngine.removeFromPaths([oldPath])
await searchEngine.addFromPaths([file.path])
}
})
)
app.workspace.onLayoutReady(async () => { app.workspace.onLayoutReady(async () => {
// Listeners to keep the search index up-to-date
this.registerEvent(
this.app.vault.on('create', file => {
if (isFileIndexable(file.path)) {
// await cacheManager.addToLiveCache(file.path)
searchEngine.addFromPaths([file.path])
}
})
)
this.registerEvent(
this.app.vault.on('delete', file => {
cacheManager.removeFromLiveCache(file.path)
searchEngine.removeFromPaths([file.path])
})
)
this.registerEvent(
this.app.vault.on('modify', async file => {
if (isFileIndexable(file.path)) {
await cacheManager.addToLiveCache(file.path)
NotesIndex.markNoteForReindex(file)
}
})
)
this.registerEvent(
this.app.vault.on('rename', async (file, oldPath) => {
if (isFileIndexable(file.path)) {
cacheManager.removeFromLiveCache(oldPath)
cacheManager.addToLiveCache(file.path)
searchEngine.removeFromPaths([oldPath])
await searchEngine.addFromPaths([file.path])
}
})
)
this.executeFirstLaunchTasks() this.executeFirstLaunchTasks()
await this.populateIndex() await this.populateIndex()
}) })
@@ -145,30 +145,38 @@ export default class OmnisearchPlugin extends Plugin {
indexingStep.set(IndexingStepType.ReadingFiles) indexingStep.set(IndexingStepType.ReadingFiles)
const files = app.vault.getFiles().filter(f => isFileIndexable(f.path)) const files = app.vault.getFiles().filter(f => isFileIndexable(f.path))
console.log(`Omnisearch - ${files.length} files total`) console.log(`Omnisearch - ${files.length} files total`)
console.log(
`Omnisearch - Cache is ${isCacheEnabled() ? 'enabled' : 'disabled'}`
)
// Map documents in the background // Map documents in the background
// Promise.all(files.map(f => cacheManager.addToLiveCache(f.path))) // Promise.all(files.map(f => cacheManager.addToLiveCache(f.path)))
if (isCacheEnabled()) { if (isCacheEnabled()) {
console.time('Omnisearch - Loading index from cache') console.time('Omnisearch - Loading index from cache')
indexingStep.set(IndexingStepType.LoadingCache) indexingStep.set(IndexingStepType.LoadingCache)
await searchEngine.loadCache() const hasCache = await searchEngine.loadCache()
console.timeEnd('Omnisearch - Loading index from cache') if (hasCache) {
console.timeEnd('Omnisearch - Loading index from cache')
}
} }
const diff = searchEngine.getDiff( const diff = searchEngine.getDiff(
files.map(f => ({ path: f.path, mtime: f.stat.mtime })) files.map(f => ({ path: f.path, mtime: f.stat.mtime }))
) )
if (diff.toAdd.length) { if (isCacheEnabled()) {
console.log( if (diff.toAdd.length) {
'Omnisearch - Total number of files to add/update: ' + diff.toAdd.length console.log(
) 'Omnisearch - Total number of files to add/update: ' +
} diff.toAdd.length
if (diff.toRemove.length) { )
console.log( }
'Omnisearch - Total number of files to remove: ' + diff.toRemove.length if (diff.toRemove.length) {
) console.log(
'Omnisearch - Total number of files to remove: ' +
diff.toRemove.length
)
}
} }
if (diff.toAdd.length >= 1000 && isCacheEnabled()) { if (diff.toAdd.length >= 1000 && isCacheEnabled()) {

View File

@@ -16,6 +16,7 @@ import {
import { Notice, Platform } from 'obsidian' import { Notice, Platform } from 'obsidian'
import type { Query } from './query' import type { Query } from './query'
import { cacheManager } from '../cache-manager' import { cacheManager } from '../cache-manager'
import { sortBy } from 'lodash-es'
const tokenize = (text: string): string[] => { const tokenize = (text: string): string[] => {
const tokens = text.split(SPACE_OR_PUNCTUATION) const tokens = text.split(SPACE_OR_PUNCTUATION)
@@ -59,12 +60,19 @@ export class Omnisearch {
this.minisearch = new MiniSearch(Omnisearch.options) this.minisearch = new MiniSearch(Omnisearch.options)
} }
async loadCache(): Promise<void> { /**
* Return true if the cache is valid
*/
async loadCache(): Promise<boolean> {
const cache = await cacheManager.getMinisearchCache() const cache = await cacheManager.getMinisearchCache()
if (cache) { if (cache) {
// console.log('Omnisearch - Cache', cache)
this.minisearch = MiniSearch.loadJS(cache.data, Omnisearch.options) this.minisearch = MiniSearch.loadJS(cache.data, Omnisearch.options)
this.indexedDocuments = new Map(cache.paths.map(o => [o.path, o.mtime])) this.indexedDocuments = new Map(cache.paths.map(o => [o.path, o.mtime]))
return true
} }
console.log('Omnisearch - No cache found')
return false
} }
/** /**
@@ -77,11 +85,13 @@ export class Omnisearch {
} { } {
const docsMap = new Map(docs.map(d => [d.path, d.mtime])) const docsMap = new Map(docs.map(d => [d.path, d.mtime]))
// console.log(this.indexedDocuments)
const toAdd = docs.filter( const toAdd = docs.filter(
d => d =>
!this.indexedDocuments.has(d.path) || !this.indexedDocuments.has(d.path) ||
this.indexedDocuments.get(d.path) !== d.mtime this.indexedDocuments.get(d.path) !== d.mtime
) )
// console.log(toAdd)
const toRemove = [...this.indexedDocuments] const toRemove = [...this.indexedDocuments]
.filter( .filter(
([path, mtime]) => !docsMap.has(path) || docsMap.get(path) !== mtime ([path, mtime]) => !docsMap.has(path) || docsMap.get(path) !== mtime
@@ -100,6 +110,8 @@ export class Omnisearch {
paths.map(async path => await cacheManager.getDocument(path)) paths.map(async path => await cacheManager.getDocument(path))
) )
).filter(d => !!d?.path) ).filter(d => !!d?.path)
// Index markdown files first
documents = sortBy(documents, d => (d.path.endsWith('.md') ? 0 : 1))
// If a document is already added, discard it // If a document is already added, discard it
this.removeFromPaths( this.removeFromPaths(

View File

@@ -237,8 +237,9 @@ export function getCtrlKeyLabel(): 'ctrl' | '⌘' {
} }
export function isFileIndexable(path: string): boolean { export function isFileIndexable(path: string): boolean {
const canIndexPDF = !!getTextExtractor() && settings.PDFIndexing const hasTextExtractor = !!getTextExtractor()
const canIndexImages = !!getTextExtractor() && settings.imagesIndexing const canIndexPDF = hasTextExtractor && settings.PDFIndexing
const canIndexImages = hasTextExtractor && settings.imagesIndexing
return ( return (
isFilePlaintext(path) || isFilePlaintext(path) ||
isFileCanvas(path) || isFileCanvas(path) ||