Squashed commit of the following:

commit 603b9bbde4c6efc90c81032e4e765c64d3075e75
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Tue Oct 11 21:47:03 2022 +0200

    Basic PDF indexing ok

commit 200331bb5c5111493af1e1f6ef8cd4bbfbdbfd4f
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Tue Oct 11 20:56:44 2022 +0200

    Tweaks and comments

commit 434b9662d40c5fea9d8b28d43828b11916db8c94
Author: Simon Cambier <simon.cambier@ores.be>
Date:   Tue Oct 11 16:22:55 2022 +0200

    Refactoring notes & minisearch cache

commit 7253c676c8ed161782ba8e33f0c4c162880925ad
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Tue Oct 11 09:50:33 2022 +0200

    wip

commit 77736e6ef6f28ccfddb64fb768732927d43bbd77
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Mon Oct 10 20:49:02 2022 +0200

    Small rewrites & deps updates

commit 59845fdb89eb6a3ad3f3f9ad75b39e7a3e604c45
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Mon Oct 10 12:22:11 2022 +0200

    wasm + worker ok

commit 1cf3b506e56147586cd0ebcc003642c5230e04cc
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Sun Oct 2 20:04:49 2022 +0200

    no disk access, of course

commit eb3dd9dd4f616a479a53e10856f6c96c6725e911
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Sun Oct 2 19:08:48 2022 +0200

    Rollup build ok

commit 54f2b7e615456c0e1b1504691689d1ba2c72d9e8
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Sun Oct 2 16:03:31 2022 +0200

    Rollup build + wasm PoC
This commit is contained in:
Simon Cambier
2022-10-11 21:54:11 +02:00
parent cf7f6af257
commit 7ddae6dc08
28 changed files with 18437 additions and 923 deletions

View File

@@ -4,12 +4,12 @@ import {
chsRegex,
type IndexedNote,
type ResultNote,
searchIndexFilePath,
minisearchCacheFilePath,
type SearchMatch,
SPACE_OR_PUNCTUATION,
} from './globals'
import {
canIndexPDFs,
isFileIndexable,
isFilePlaintext,
removeDiacritics,
stringsToRegex,
@@ -18,13 +18,15 @@ import {
} from './utils'
import type { Query } from './query'
import { settings } from './settings'
import {
getNoteFromCache,
isCacheOutdated,
loadNotesCache,
resetNotesCache,
} from './notes'
import {addToIndex, indexPDFs, removeFromIndex, saveIndexToFile} from './notes-index'
// import {
// getNoteFromCache,
// isCacheOutdated,
// loadNotesCache,
// resetNotesCache,
// } from './notes'
import * as NotesIndex from './notes-index'
import PQueue from 'p-queue-compat'
import { cacheManager } from './cache-manager'
export let minisearchInstance: MiniSearch<IndexedNote>
@@ -60,15 +62,18 @@ export async function initGlobalSearchIndex(): Promise<void> {
storeFields: ['tags'],
}
if (
settings.storeIndexInFile &&
(await app.vault.adapter.exists(searchIndexFilePath))
) {
// Default instance
minisearchInstance = new MiniSearch(options)
// Load Minisearch cache, if it exists
if (await app.vault.adapter.exists(minisearchCacheFilePath)) {
try {
const json = await app.vault.adapter.read(searchIndexFilePath)
minisearchInstance = MiniSearch.loadJSON(json, options)
const json = await cacheManager.readMinisearchIndex()
if (json) {
// If we have cache data, reload it
minisearchInstance = MiniSearch.loadJSON(json, options)
}
console.log('Omnisearch - MiniSearch index loaded from the file')
await loadNotesCache()
} catch (e) {
console.trace(
'Omnisearch - Could not load MiniSearch index from the file'
@@ -77,10 +82,9 @@ export async function initGlobalSearchIndex(): Promise<void> {
}
}
if (!minisearchInstance) {
minisearchInstance = new MiniSearch(options)
resetNotesCache()
}
// if (!minisearchInstance) {
// resetNotesCache()
// }
// Index files that are already present
const start = new Date().getTime()
@@ -89,32 +93,28 @@ export async function initGlobalSearchIndex(): Promise<void> {
let files
let notesSuffix
if (settings.storeIndexInFile) {
files = allFiles.filter(file => isCacheOutdated(file))
if (settings.persistCache) {
files = allFiles.filter(file => cacheManager.isCacheOutdated(file))
notesSuffix = 'modified notes'
} else {
files = allFiles
notesSuffix = 'notes'
}
console.log(`Omnisearch - indexing ${files.length} ${notesSuffix}`)
// This is basically the same behavior as MiniSearch's `addAllAsync()`.
// We index markdown and plaintext files by batches of 10
let promises: Promise<void>[] = []
for (let i = 0; i < files.length; ++i) {
const file = files[i]
if (getNoteFromCache(file.path)) {
removeFromIndex(file.path)
}
promises.push(addToIndex(file))
if (i % 10 === 0) {
await wait(1)
await Promise.all(promises)
promises = []
}
if (files.length > 0) {
console.log(`Omnisearch - Indexing ${files.length} ${notesSuffix}`)
}
await Promise.all(promises)
// Read and index all the files into the search engine
const queue = new PQueue({ concurrency: 10 })
for (const file of files) {
if (cacheManager.getNoteFromCache(file.path)) {
NotesIndex.removeFromIndex(file.path)
}
queue.add(() => NotesIndex.addToIndexAndCache(file))
}
await queue.onEmpty()
if (files.length > 0) {
const message = `Omnisearch - Indexed ${files.length} ${notesSuffix} in ${
@@ -127,10 +127,10 @@ export async function initGlobalSearchIndex(): Promise<void> {
new Notice(message)
}
await saveIndexToFile()
await cacheManager.writeMinisearchIndex(minisearchInstance)
// PDFs are indexed later, since they're heavier
await indexPDFs()
await NotesIndex.indexPDFs()
}
}
@@ -172,9 +172,10 @@ async function search(query: Query): Promise<SearchResult[]> {
const exactTerms = query.getExactTerms()
if (exactTerms.length) {
results = results.filter(r => {
const title = getNoteFromCache(r.id)?.path.toLowerCase() ?? ''
const title =
cacheManager.getNoteFromCache(r.id)?.path.toLowerCase() ?? ''
const content = stripMarkdownCharacters(
getNoteFromCache(r.id)?.content ?? ''
cacheManager.getNoteFromCache(r.id)?.content ?? ''
).toLowerCase()
return exactTerms.every(q => content.includes(q) || title.includes(q))
})
@@ -185,7 +186,7 @@ async function search(query: Query): Promise<SearchResult[]> {
if (exclusions.length) {
results = results.filter(r => {
const content = stripMarkdownCharacters(
getNoteFromCache(r.id)?.content ?? ''
cacheManager.getNoteFromCache(r.id)?.content ?? ''
).toLowerCase()
return exclusions.every(q => !content.includes(q.value))
})
@@ -253,7 +254,7 @@ export async function getSuggestions(
// Map the raw results to get usable suggestions
return results.map(result => {
const note = getNoteFromCache(result.id)
const note = cacheManager.getNoteFromCache(result.id)
if (!note) {
throw new Error(`Note "${result.id}" not indexed`)
}