Minisearch 6.0 refactor ok

This commit is contained in:
Simon Cambier
2022-11-26 12:44:20 +01:00
parent e3ac5a4bac
commit c6cee62214
11 changed files with 194 additions and 125 deletions

View File

@@ -1,5 +1,5 @@
import { Notice } from 'obsidian'
import type { IndexedDocument } from './globals'
import type { DocumentRef, IndexedDocument } from './globals'
import { database } from './database'
import type { AsPlainObject } from 'minisearch'
import type MiniSearch from 'minisearch'
@@ -53,7 +53,7 @@ class CacheManager {
}
public async getMinisearchCache(): Promise<{
paths: { path: string; mtime: number }[]
paths: DocumentRef[]
data: AsPlainObject
} | null> {
try {

View File

@@ -38,17 +38,14 @@
case IndexingStepType.LoadingCache:
indexingStepDesc = 'Loading cache...'
break
case IndexingStepType.ReadingNotes:
case IndexingStepType.ReadingFiles:
indexingStepDesc = 'Reading files...'
break
case IndexingStepType.IndexingFiles:
indexingStepDesc = 'Indexing files...'
break
case IndexingStepType.WritingCache:
updateResults()
indexingStepDesc = 'Reading notes...'
break
case IndexingStepType.ReadingPDFs:
indexingStepDesc = 'Reading PDFs...'
break
case IndexingStepType.ReadingImages:
indexingStepDesc = 'Reading images...'
break
case IndexingStepType.UpdatingCache:
indexingStepDesc = 'Updating cache...'
break
default:

View File

@@ -1,6 +1,6 @@
import Dexie from 'dexie'
import type { AsPlainObject } from 'minisearch'
import type { IndexedDocument } from './globals'
import type { DocumentRef, IndexedDocument } from './globals'
export class OmnisearchCache extends Dexie {
public static readonly dbVersion = 8
@@ -8,25 +8,11 @@ export class OmnisearchCache extends Dexie {
private static instance: OmnisearchCache
//#region Table declarations
/**
* @deprecated
*/
documents!: Dexie.Table<
{
path: string
mtime: number
document: IndexedDocument
},
string
>
searchHistory!: Dexie.Table<{ id?: number; query: string }, number>
minisearch!: Dexie.Table<
{
date: string
paths: Array<{ path: string; mtime: number }>
paths: DocumentRef[]
data: AsPlainObject
},
string
@@ -37,7 +23,6 @@ export class OmnisearchCache extends Dexie {
// Database structure
this.version(OmnisearchCache.dbVersion).stores({
searchHistory: '++id',
documents: 'path',
minisearch: 'date',
})
}
@@ -58,7 +43,6 @@ export class OmnisearchCache extends Dexie {
console.log('Omnisearch - Those IndexedDb databases will be deleted:')
for (const db of toDelete) {
if (db.name) {
console.log(db.name + ' ' + db.version)
indexedDB.deleteDatabase(db.name)
}
}

View File

@@ -20,12 +20,13 @@ export const EventNames = {
export const enum IndexingStepType {
Done,
LoadingCache,
ReadingNotes,
ReadingPDFs,
ReadingImages,
UpdatingCache,
ReadingFiles,
IndexingFiles,
WritingCache,
}
export type DocumentRef = { path: string; mtime: number }
export type IndexedDocument = {
path: string
basename: string
@@ -51,7 +52,7 @@ export const isSearchMatch = (o: { offset?: number }): o is SearchMatch => {
return o.offset !== undefined
}
export const indexingStep = writable(IndexingStepType.LoadingCache)
export const indexingStep = writable(IndexingStepType.Done)
export type ResultNote = {
score: number

View File

@@ -6,7 +6,13 @@ import {
import { loadSettings, settings, SettingsTab, showExcerpt } from './settings'
import { eventBus, EventNames, indexingStep, IndexingStepType } from './globals'
import api from './tools/api'
import { isFileImage, isFilePDF, isFilePlaintext } from './tools/utils'
import {
isFileImage,
isFileIndexable,
isFilePDF,
isFilePlaintext,
wait,
} from './tools/utils'
import { OmnisearchCache } from './database'
import * as NotesIndex from './notes-index'
import { searchEngine } from './search/omnisearch'
@@ -53,7 +59,7 @@ export default class OmnisearchPlugin extends Plugin {
// Listeners to keep the search index up-to-date
this.registerEvent(
this.app.vault.on('create', file => {
searchEngine.addFromPaths([file.path])
searchEngine.addFromPaths([file.path], false)
})
)
this.registerEvent(
@@ -70,7 +76,7 @@ export default class OmnisearchPlugin extends Plugin {
this.app.vault.on('rename', async (file, oldPath) => {
if (file instanceof TFile && isFilePlaintext(file.path)) {
searchEngine.removeFromPaths([oldPath])
await searchEngine.addFromPaths([file.path])
await searchEngine.addFromPaths([file.path], false)
}
})
)
@@ -78,7 +84,7 @@ export default class OmnisearchPlugin extends Plugin {
await populateIndex()
})
showWelcomeNotice(this)
executeFirstLaunchTasks(this)
}
onunload(): void {
@@ -104,61 +110,48 @@ export default class OmnisearchPlugin extends Plugin {
*/
async function populateIndex(): Promise<void> {
console.time('Omnisearch - Indexing total time')
// // if not iOS, load data from cache
// if (!Platform.isIosApp) {
// engine = await SearchEngine.initFromCache()
// }
// Load plaintext files
indexingStep.set(IndexingStepType.ReadingNotes)
console.log('Omnisearch - Reading notes')
const plainTextFiles = app.vault
.getFiles()
.filter(f => isFilePlaintext(f.path))
.map(p => p.path)
await searchEngine.addFromPaths(plainTextFiles)
let allFiles: string[] = [...plainTextFiles]
// Load PDFs
if (settings.PDFIndexing) {
indexingStep.set(IndexingStepType.ReadingPDFs)
console.log('Omnisearch - Reading PDFs')
const pdfDocuments = app.vault
.getFiles()
.filter(f => isFilePDF(f.path))
.map(p => p.path)
await searchEngine.addFromPaths(pdfDocuments)
// Add PDFs to the files list
allFiles = [...allFiles, ...pdfDocuments]
}
// Load Images
if (settings.imagesIndexing) {
indexingStep.set(IndexingStepType.ReadingImages)
console.log('Omnisearch - Reading Images')
const imagesDocuments = app.vault
.getFiles()
.filter(f => isFileImage(f.path))
.map(p => p.path)
await searchEngine.addFromPaths(imagesDocuments)
// Add Images to the files list
allFiles = [...allFiles, ...imagesDocuments]
}
console.log('Omnisearch - Total number of files: ' + allFiles.length)
// Load PDFs into the main search engine, and write cache
// SearchEngine.loadTmpDataIntoMain()
indexingStep.set(IndexingStepType.Done)
if (!Platform.isIosApp) {
console.log('Omnisearch - Writing cache...')
await searchEngine.loadCache()
}
indexingStep.set(IndexingStepType.ReadingFiles)
const diff = searchEngine.getDiff(
app.vault
.getFiles()
.filter(f => isFileIndexable(f.path))
.map(f => ({ path: f.path, mtime: f.stat.mtime }))
)
console.log(
'Omnisearch - Total number of files to add/update: ' + diff.toAdd.length
)
console.log(
'Omnisearch - Total number of files to remove: ' + diff.toRemove.length
)
if (diff.toAdd.length >= 500) {
new Notice(
`Omnisearch - ${diff.toAdd.length} files need to be indexed. Obsidian may experience stutters and freezes during the process`,
10_000
)
}
indexingStep.set(IndexingStepType.IndexingFiles)
await searchEngine.removeFromPaths(diff.toRemove.map(o => o.path))
await searchEngine.addFromPaths(
diff.toAdd.map(o => o.path),
true
)
if (diff.toRemove.length || diff.toAdd.length) {
await searchEngine.writeToCache()
}
console.timeEnd('Omnisearch - Indexing total time')
if (diff.toAdd.length >= 500) {
new Notice(`Omnisearch - Your files have been indexed.`)
}
indexingStep.set(IndexingStepType.Done)
}
async function cleanOldCacheFiles() {
@@ -179,7 +172,7 @@ async function cleanOldCacheFiles() {
}
}
function showWelcomeNotice(plugin: Plugin) {
function executeFirstLaunchTasks(plugin: Plugin) {
const code = '1.8.0-beta.3'
if (settings.welcomeMessage !== code) {
const welcome = new DocumentFragment()

View File

@@ -43,7 +43,10 @@ export function markNoteForReindex(note: TAbstractFile): void {
export async function refreshIndex(): Promise<void> {
const paths = [...notesToReindex].map(n => n.path)
searchEngine.removeFromPaths(paths)
searchEngine.addFromPaths(paths)
notesToReindex.clear()
if (paths.length) {
searchEngine.removeFromPaths(paths)
searchEngine.addFromPaths(paths, false)
notesToReindex.clear()
// console.log(`Omnisearch - Reindexed ${paths.length} file(s)`)
}
}

View File

@@ -3,15 +3,26 @@ import MiniSearch, {
type Options,
type SearchResult,
} from 'minisearch'
import type { IndexedDocument, ResultNote, SearchMatch } from '../globals'
import { chsRegex, SPACE_OR_PUNCTUATION } from '../globals'
import type {
DocumentRef,
IndexedDocument,
ResultNote,
SearchMatch,
} from '../globals'
import {
chsRegex,
indexingStep,
IndexingStepType,
SPACE_OR_PUNCTUATION,
} from '../globals'
import { settings } from '../settings'
import {
chunkArray,
removeDiacritics,
stringsToRegex,
stripMarkdownCharacters,
} from '../tools/utils'
import { Notice } from 'obsidian'
import { Notice, Platform } from 'obsidian'
import { getIndexedDocument } from '../file-loader'
import type { Query } from './query'
import { cacheManager } from '../cache-manager'
@@ -45,7 +56,8 @@ export class Omnisearch {
logger(_level, _message, code) {
if (code === 'version_conflict') {
new Notice(
'Omnisearch - Your index cache may be incorrect or corrupted. If this message keeps appearing, go to Settings to clear the cache.'
'Omnisearch - Your index cache may be incorrect or corrupted. If this message keeps appearing, go to Settings to clear the cache.',
5000
)
}
},
@@ -59,6 +71,7 @@ export class Omnisearch {
}
async loadCache(): Promise<void> {
indexingStep.set(IndexingStepType.LoadingCache)
const cache = await cacheManager.getMinisearchCache()
if (cache) {
this.minisearch = MiniSearch.loadJS(cache.data, Omnisearch.options)
@@ -66,11 +79,38 @@ export class Omnisearch {
}
}
/**
* Returns the list of documents that need to be reindexed
* @param docs
*/
getDiff(docs: DocumentRef[]): {
toAdd: DocumentRef[]
toRemove: DocumentRef[]
} {
const indexedArr = [...this.indexedDocuments]
const docsMap = new Map(docs.map(d => [d.path, d.mtime]))
const toAdd = docs.filter(
d =>
!this.indexedDocuments.has(d.path) ||
this.indexedDocuments.get(d.path) !== d.mtime
)
const toRemove = [...this.indexedDocuments]
.filter(
([path, mtime]) => !docsMap.has(path) || docsMap.get(path) !== mtime
)
.map(o => ({ path: o[0], mtime: o[1] }))
return { toAdd, toRemove }
}
/**
* Add notes/PDFs/images to the search index
* @param paths
*/
public async addFromPaths(paths: string[]): Promise<void> {
public async addFromPaths(
paths: string[],
writeToCache: boolean
): Promise<void> {
let documents = await Promise.all(
paths.map(async path => await getIndexedDocument(path))
)
@@ -80,8 +120,20 @@ export class Omnisearch {
documents.filter(d => this.indexedDocuments.has(d.path)).map(d => d.path)
)
documents.forEach(doc => this.indexedDocuments.set(doc.path, doc.mtime))
await this.minisearch.addAllAsync(documents)
// Split the documents in smaller chunks to regularly save the cache.
// If the user shuts off Obsidian mid-indexing, we at least saved some
const chunkedDocs = chunkArray(documents, 500)
for (const docs of chunkedDocs) {
indexingStep.set(IndexingStepType.IndexingFiles)
// Update the list of indexed docks
docs.forEach(doc => this.indexedDocuments.set(doc.path, doc.mtime))
// Add docs to minisearch
await this.minisearch.addAllAsync(docs)
// Save the index
if (writeToCache) {
await this.writeToCache()
}
}
}
/**
@@ -292,6 +344,10 @@ export class Omnisearch {
}
public async writeToCache(): Promise<void> {
if (Platform.isIosApp) {
return
}
indexingStep.set(IndexingStepType.WritingCache)
await cacheManager.writeMinisearchCache(
this.minisearch,
this.indexedDocuments

View File

@@ -188,8 +188,7 @@ export class SettingsTab extends PluginSettingTab {
new Setting(containerEl)
.setName('Simpler search')
.setDesc(
`When enabled, Omnisearch is a bit more restrictive when using your query terms as prefixes.
May return less results, but will be quicker. You should enable this if Omnisearch makes Obsidian freeze while searching.`
`Enable this if Obsidian often freezes while making searches. This will return more strict results.`
)
.addToggle(toggle =>
toggle.setValue(settings.simpleSearch).onChange(async v => {
@@ -235,7 +234,7 @@ export class SettingsTab extends PluginSettingTab {
// Keep line returns in excerpts
new Setting(containerEl)
.setName('Render line return in excerpts')
.setDesc('Activate this option render line returns in result excerpts.')
.setDesc('Activate this option to render line returns in result excerpts.')
.addToggle(toggle =>
toggle
.setValue(settings.renderLineReturnInExcerpts)

View File

@@ -244,3 +244,15 @@ export function makeMD5(data: BinaryLike): string {
}
return createHash('md5').update(data).digest('hex')
}
export function chunkArray<T>(arr: T[], len: number): T[][] {
var chunks = [],
i = 0,
n = arr.length
while (i < n) {
chunks.push(arr.slice(i, (i += len)))
}
return chunks
}