Minisearch 6.0 mostly ok

This commit is contained in:
Simon Cambier
2022-11-25 22:40:59 +01:00
parent dcef2d3719
commit e3ac5a4bac
16 changed files with 196 additions and 1058 deletions

608
Cargo.lock generated
View File

@@ -1,608 +0,0 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "adler"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "adobe-cmap-parser"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3aaf5066d68c8ec9656cfd3a96bc9de83d4883f183d6c6b8d742e36a4819dda"
dependencies = [
"pom 1.1.0",
]
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "base-x"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4cbbc9d0964165b47557570cce6c952866c2678457aca742aafc9fb771d30270"
[[package]]
name = "bumpalo"
version = "3.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1ad822118d20d2c234f427000d5acc36eabe1e29a348c89b63dd60b13f28e5d"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "const_fn"
version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fbdcdcb6d86f71c5e97409ad45898af11cbc995b4ee8112d59095a28d376c935"
[[package]]
name = "crc32fast"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d"
dependencies = [
"cfg-if",
]
[[package]]
name = "discard"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "212d0f5754cb6769937f4501cc0e67f4f4483c8d2c3e1e922ee9edbe4ab4c7c0"
[[package]]
name = "dtoa"
version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56899898ce76aaf4a0f24d914c97ea6ed976d42fec6ad33fcbb0a1103e07b2b0"
[[package]]
name = "encoding"
version = "0.2.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b0d943856b990d12d3b55b359144ff341533e516d94098b1d3fc1ac666d36ec"
dependencies = [
"encoding-index-japanese",
"encoding-index-korean",
"encoding-index-simpchinese",
"encoding-index-singlebyte",
"encoding-index-tradchinese",
]
[[package]]
name = "encoding-index-japanese"
version = "1.20141219.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04e8b2ff42e9a05335dbf8b5c6f7567e5591d0d916ccef4e0b1710d32a0d0c91"
dependencies = [
"encoding_index_tests",
]
[[package]]
name = "encoding-index-korean"
version = "1.20141219.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4dc33fb8e6bcba213fe2f14275f0963fd16f0a02c878e3095ecfdf5bee529d81"
dependencies = [
"encoding_index_tests",
]
[[package]]
name = "encoding-index-simpchinese"
version = "1.20141219.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d87a7194909b9118fc707194baa434a4e3b0fb6a5a757c73c3adb07aa25031f7"
dependencies = [
"encoding_index_tests",
]
[[package]]
name = "encoding-index-singlebyte"
version = "1.20141219.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3351d5acffb224af9ca265f435b859c7c01537c0849754d3db3fdf2bfe2ae84a"
dependencies = [
"encoding_index_tests",
]
[[package]]
name = "encoding-index-tradchinese"
version = "1.20141219.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd0e20d5688ce3cab59eb3ef3a2083a5c77bf496cb798dc6fcdb75f323890c18"
dependencies = [
"encoding_index_tests",
]
[[package]]
name = "encoding_index_tests"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569"
[[package]]
name = "euclid"
version = "0.20.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2bb7ef65b3777a325d1eeefefab5b6d4959da54747e33bd6258e789640f307ad"
dependencies = [
"num-traits",
]
[[package]]
name = "flate2"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f82b0f4c27ad9f8bfd1f3208d882da2b09c301bc1c828fd3a00d0216d2fbbff6"
dependencies = [
"crc32fast",
"miniz_oxide",
]
[[package]]
name = "itoa"
version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4"
[[package]]
name = "itoa"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754"
[[package]]
name = "js-sys"
version = "0.3.60"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47"
dependencies = [
"wasm-bindgen",
]
[[package]]
name = "libc"
version = "0.2.134"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "329c933548736bc49fd575ee68c89e8be4d260064184389a5b77517cddd99ffb"
[[package]]
name = "linked-hash-map"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8dd5a6d5999d9907cda8ed67bbd137d3af8085216c2ac62de5be860bd41f304a"
[[package]]
name = "log"
version = "0.4.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
dependencies = [
"cfg-if",
]
[[package]]
name = "lopdf"
version = "0.26.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b49a0272112719d0037ab63d4bb67f73ba659e1e90bc38f235f163a457ac16f3"
dependencies = [
"dtoa",
"encoding",
"flate2",
"itoa 0.4.8",
"linked-hash-map",
"log",
"lzw",
"pom 3.2.0",
"time",
]
[[package]]
name = "lzw"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d947cbb889ed21c2a84be6ffbaebf5b4e0f4340638cba0444907e38b56be084"
[[package]]
name = "miniz_oxide"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96590ba8f175222643a85693f33d26e9c8a015f599c216509b1a6894af675d34"
dependencies = [
"adler",
]
[[package]]
name = "num-traits"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
dependencies = [
"autocfg",
]
[[package]]
name = "obsidian-search"
version = "0.1.0"
dependencies = [
"js-sys",
"pdf-extract",
"wasm-bindgen",
]
[[package]]
name = "once_cell"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e82dad04139b71a90c080c8463fe0dc7902db5192d939bd0950f074d014339e1"
[[package]]
name = "pdf-extract"
version = "0.6.5-alpha.0"
source = "git+https://github.com/scambier/pdf-extract#8f01969a0bb49bd71195dd4fd5c87a4a0b5f4b48"
dependencies = [
"adobe-cmap-parser",
"encoding",
"euclid",
"linked-hash-map",
"lopdf",
"postscript",
"type1-encoding-parser",
"unicode-normalization",
]
[[package]]
name = "pom"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60f6ce597ecdcc9a098e7fddacb1065093a3d66446fa16c675e7e71d1b5c28e6"
[[package]]
name = "pom"
version = "3.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07e2192780e9f8e282049ff9bffcaa28171e1cb0844f49ed5374e518ae6024ec"
[[package]]
name = "postscript"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac1825c05c4f9e2f781202d1a02fff5e5f722bbafca542d818364e1b1ea22575"
[[package]]
name = "proc-macro-hack"
version = "0.5.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
[[package]]
name = "proc-macro2"
version = "1.0.46"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94e2ef8dbfc347b10c094890f778ee2e36ca9bb4262e86dc99cd217e35f3470b"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rustc_version"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
dependencies = [
"semver",
]
[[package]]
name = "ryu"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09"
[[package]]
name = "semver"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
dependencies = [
"semver-parser",
]
[[package]]
name = "semver-parser"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
[[package]]
name = "serde"
version = "1.0.145"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "728eb6351430bccb993660dfffc5a72f91ccc1295abaa8ce19b27ebe4f75568b"
[[package]]
name = "serde_derive"
version = "1.0.145"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81fa1584d3d1bcacd84c277a0dfe21f5b0f6accf4a23d04d4c6d61f1af522b4c"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.85"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e55a28e3aaef9d5ce0506d0a14dbba8054ddc7e499ef522dd8b26859ec9d4a44"
dependencies = [
"itoa 1.0.3",
"ryu",
"serde",
]
[[package]]
name = "sha1"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1da05c97445caa12d05e848c4a4fcbbea29e748ac28f7e80e9b010392063770"
dependencies = [
"sha1_smol",
]
[[package]]
name = "sha1_smol"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae1a47186c03a32177042e55dbc5fd5aee900b8e0069a8d70fba96a9375cd012"
[[package]]
name = "standback"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e113fb6f3de07a243d434a56ec6f186dfd51cb08448239fe7bcae73f87ff28ff"
dependencies = [
"version_check",
]
[[package]]
name = "stdweb"
version = "0.4.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d022496b16281348b52d0e30ae99e01a73d737b2f45d38fed4edf79f9325a1d5"
dependencies = [
"discard",
"rustc_version",
"stdweb-derive",
"stdweb-internal-macros",
"stdweb-internal-runtime",
"wasm-bindgen",
]
[[package]]
name = "stdweb-derive"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c87a60a40fccc84bef0652345bbbbbe20a605bf5d0ce81719fc476f5c03b50ef"
dependencies = [
"proc-macro2",
"quote",
"serde",
"serde_derive",
"syn",
]
[[package]]
name = "stdweb-internal-macros"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58fa5ff6ad0d98d1ffa8cb115892b6e69d67799f6763e162a1c9db421dc22e11"
dependencies = [
"base-x",
"proc-macro2",
"quote",
"serde",
"serde_derive",
"serde_json",
"sha1",
"syn",
]
[[package]]
name = "stdweb-internal-runtime"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "213701ba3370744dcd1a12960caa4843b3d68b4d1c0a5d575e0d65b2ee9d16c0"
[[package]]
name = "syn"
version = "1.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e90cde112c4b9690b8cbe810cba9ddd8bc1d7472e2cae317b69e9438c1cba7d2"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "time"
version = "0.2.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4752a97f8eebd6854ff91f1c1824cd6160626ac4bd44287f7f4ea2035a02a242"
dependencies = [
"const_fn",
"libc",
"standback",
"stdweb",
"time-macros",
"version_check",
"winapi",
]
[[package]]
name = "time-macros"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "957e9c6e26f12cb6d0dd7fc776bb67a706312e7299aed74c8dd5b17ebb27e2f1"
dependencies = [
"proc-macro-hack",
"time-macros-impl",
]
[[package]]
name = "time-macros-impl"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd3c141a1b43194f3f56a1411225df8646c55781d5f26db825b3d98507eb482f"
dependencies = [
"proc-macro-hack",
"proc-macro2",
"quote",
"standback",
"syn",
]
[[package]]
name = "tinyvec"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50"
dependencies = [
"tinyvec_macros",
]
[[package]]
name = "tinyvec_macros"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
[[package]]
name = "type1-encoding-parser"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3d6cc09e1a99c7e01f2afe4953789311a1c50baebbdac5b477ecf78e2e92a5b"
dependencies = [
"pom 1.1.0",
]
[[package]]
name = "unicode-ident"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd"
[[package]]
name = "unicode-normalization"
version = "0.1.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921"
dependencies = [
"tinyvec",
]
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "wasm-bindgen"
version = "0.2.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268"
dependencies = [
"cfg-if",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142"
dependencies = [
"bumpalo",
"log",
"once_cell",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c"
dependencies = [
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

View File

@@ -1,18 +0,0 @@
[package]
name = "obsidian-search"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
crate-type = ["cdylib"]
[dependencies]
wasm-bindgen = "0.2"
js-sys = "0.3.49"
pdf-extract = { git = "https://github.com/scambier/pdf-extract" }
[profile.release]
lto = true
opt-level = 'z'

View File

@@ -1,16 +1,11 @@
import { Notice, type TFile } from 'obsidian' import { Notice } from 'obsidian'
import type { IndexedDocument } from './globals' import type { IndexedDocument } from './globals'
import { database } from './database' import { database } from './database'
import MiniSearch from 'minisearch' import type { AsPlainObject } from 'minisearch'
import { minisearchOptions } from './search/search-engine' import type MiniSearch from 'minisearch'
import { makeMD5 } from './tools/utils' import { makeMD5 } from './tools/utils'
class CacheManager { class CacheManager {
/**
* @deprecated
* @private
*/
private liveDocuments: Map<string, IndexedDocument> = new Map()
/** /**
* Show an empty input field next time the user opens Omnisearch modal * Show an empty input field next time the user opens Omnisearch modal
*/ */
@@ -40,36 +35,6 @@ class CacheManager {
return data return data
} }
/**
* Important: keep this method async for the day it _really_ becomes async.
* This will avoid a refactor.
* @deprecated
* @param path
* @param note
*/
public async updateLiveDocument(
path: string,
note: IndexedDocument
): Promise<void> {
this.liveDocuments.set(path, note)
}
/**
* @deprecated
* @param key
*/
public deleteLiveDocument(key: string): void {
this.liveDocuments.delete(key)
}
/**
* @deprecated
* @param key
*/
public getLiveDocument(key: string): IndexedDocument | undefined {
return this.liveDocuments.get(key)
}
//#region Minisearch //#region Minisearch
public getDocumentsChecksum(documents: IndexedDocument[]): string { public getDocumentsChecksum(documents: IndexedDocument[]): string {
@@ -87,20 +52,13 @@ class CacheManager {
) )
} }
public async getMinisearchCache(): Promise<MiniSearch | null> { public async getMinisearchCache(): Promise<{
// Retrieve documents and make their checksum paths: { path: string; mtime: number }[]
const cachedDocs = await database.documents.toArray() data: AsPlainObject
} | null> {
// Add those documents in the live cache
cachedDocs.forEach(doc =>
cacheManager.updateLiveDocument(doc.path, doc.document)
)
// Retrieve the search cache, and verify the checksum
const cachedIndex = (await database.minisearch.toArray())[0]
try { try {
return MiniSearch.loadJS(cachedIndex.data, minisearchOptions) const cachedIndex = (await database.minisearch.toArray())[0]
return cachedIndex
} catch (e) { } catch (e) {
new Notice( new Notice(
'Omnisearch - Cache missing or invalid. Some freezes may occur while Omnisearch indexes your vault.' 'Omnisearch - Cache missing or invalid. Some freezes may occur while Omnisearch indexes your vault.'
@@ -111,75 +69,15 @@ class CacheManager {
} }
} }
/**
* Get a dict listing the deleted/added documents since last cache
* @param documents
*/
public async getDiffDocuments(documents: IndexedDocument[]): Promise<{
toDelete: string[]
toAdd: IndexedDocument[]
toUpdate: { oldDoc: IndexedDocument; newDoc: IndexedDocument }[]
}> {
let cachedDocs = await database.documents.toArray()
// present in `documents` but not in `cachedDocs`
const toAdd = documents.filter(
d => !cachedDocs.find(c => c.path === d.path)
)
// present in `cachedDocs` but not in `documents`
const toDelete = cachedDocs
.filter(c => !documents.find(d => d.path === c.path))
.map(d => d.path)
// toUpdate: same path, but different mtime
const toUpdate = cachedDocs
.filter(({ mtime: cMtime, path: cPath }) =>
documents.some(
({ mtime: dMtime, path: dPath }) =>
cPath === dPath && dMtime !== cMtime
)
)
.map(c => ({
oldDoc: c.document,
newDoc: documents.find(d => d.path === c.path)!,
}))
return {
toAdd,
toDelete,
toUpdate,
}
}
public async writeMinisearchCache( public async writeMinisearchCache(
minisearch: MiniSearch, minisearch: MiniSearch,
documents: IndexedDocument[] indexed: Map<string, number>
): Promise<void> { ): Promise<void> {
const { toDelete, toAdd, toUpdate } = await this.getDiffDocuments(documents) const paths = Array.from(indexed).map(([k, v]) => ({ path: k, mtime: v }))
// Delete
// console.log(`Omnisearch - Cache - Will delete ${toDelete.length} documents`)
await database.documents.bulkDelete(toDelete)
// Add
// console.log(`Omnisearch - Cache - Will add ${toAdd.length} documents`)
await database.documents.bulkAdd(
toAdd.map(o => ({ document: o, mtime: o.mtime, path: o.path }))
)
// Update
// console.log(`Omnisearch - Cache - Will update ${toUpdate.length} documents`)
await database.documents.bulkPut(
toUpdate.map(o => ({
document: o.newDoc,
mtime: o.newDoc.mtime,
path: o.newDoc.path,
}))
)
await database.minisearch.clear() await database.minisearch.clear()
await database.minisearch.add({ await database.minisearch.add({
date: new Date().toISOString(), date: new Date().toISOString(),
checksum: this.getDocumentsChecksum(documents), paths,
data: minisearch.toJSON(), data: minisearch.toJSON(),
}) })
console.log('Omnisearch - Search cache written') console.log('Omnisearch - Search cache written')

View File

@@ -9,7 +9,6 @@
import { loopIndex } from 'src/tools/utils' import { loopIndex } from 'src/tools/utils'
import { onDestroy, onMount, tick } from 'svelte' import { onDestroy, onMount, tick } from 'svelte'
import { MarkdownView } from 'obsidian' import { MarkdownView } from 'obsidian'
import { SearchEngine } from 'src/search/search-engine'
import ModalContainer from './ModalContainer.svelte' import ModalContainer from './ModalContainer.svelte'
import { import {
OmnisearchInFileModal, OmnisearchInFileModal,
@@ -18,6 +17,7 @@
import ResultItemInFile from './ResultItemInFile.svelte' import ResultItemInFile from './ResultItemInFile.svelte'
import { Query } from 'src/search/query' import { Query } from 'src/search/query'
import { openNote } from 'src/tools/notes' import { openNote } from 'src/tools/notes'
import { searchEngine } from 'src/search/omnisearch'
export let modal: OmnisearchInFileModal export let modal: OmnisearchInFileModal
export let parent: OmnisearchVaultModal | null = null export let parent: OmnisearchVaultModal | null = null
@@ -50,7 +50,7 @@
query = new Query(searchQuery) query = new Query(searchQuery)
note = note =
( (
await SearchEngine.getEngine().getSuggestions(query, { await searchEngine.getSuggestions(query, {
singleFilePath, singleFilePath,
}) })
)[0] ?? null )[0] ?? null

View File

@@ -3,9 +3,8 @@
import { onDestroy, onMount, tick } from 'svelte' import { onDestroy, onMount, tick } from 'svelte'
import InputSearch from './InputSearch.svelte' import InputSearch from './InputSearch.svelte'
import ModalContainer from './ModalContainer.svelte' import ModalContainer from './ModalContainer.svelte'
import { eventBus, IndexingStep, type ResultNote } from 'src/globals' import { eventBus, indexingStep, IndexingStepType, type ResultNote } from 'src/globals'
import { createNote, openNote } from 'src/tools/notes' import { createNote, openNote } from 'src/tools/notes'
import { SearchEngine } from 'src/search/search-engine'
import { getCtrlKeyLabel, getExtension, isFilePDF, loopIndex } from 'src/tools/utils' import { getCtrlKeyLabel, getExtension, isFilePDF, loopIndex } from 'src/tools/utils'
import { import {
OmnisearchInFileModal, OmnisearchInFileModal,
@@ -16,6 +15,7 @@
import { settings } from '../settings' import { settings } from '../settings'
import * as NotesIndex from '../notes-index' import * as NotesIndex from '../notes-index'
import { cacheManager } from '../cache-manager' import { cacheManager } from '../cache-manager'
import { searchEngine } from 'src/search/omnisearch'
export let modal: OmnisearchVaultModal export let modal: OmnisearchVaultModal
export let previousQuery: string | undefined export let previousQuery: string | undefined
@@ -24,7 +24,6 @@
let searchQuery: string | undefined let searchQuery: string | undefined
let resultNotes: ResultNote[] = [] let resultNotes: ResultNote[] = []
let query: Query let query: Query
let { indexingStep } = SearchEngine
let indexingStepDesc = '' let indexingStepDesc = ''
$: selectedNote = resultNotes[selectedIndex] $: selectedNote = resultNotes[selectedIndex]
@@ -36,20 +35,20 @@
} }
$: { $: {
switch ($indexingStep) { switch ($indexingStep) {
case IndexingStep.LoadingCache: case IndexingStepType.LoadingCache:
indexingStepDesc = 'Loading cache...' indexingStepDesc = 'Loading cache...'
break break
case IndexingStep.ReadingNotes: case IndexingStepType.ReadingNotes:
updateResults() updateResults()
indexingStepDesc = 'Reading notes...' indexingStepDesc = 'Reading notes...'
break break
case IndexingStep.ReadingPDFs: case IndexingStepType.ReadingPDFs:
indexingStepDesc = 'Reading PDFs...' indexingStepDesc = 'Reading PDFs...'
break break
case IndexingStep.ReadingImages: case IndexingStepType.ReadingImages:
indexingStepDesc = 'Reading images...' indexingStepDesc = 'Reading images...'
break break
case IndexingStep.UpdatingCache: case IndexingStepType.UpdatingCache:
indexingStepDesc = 'Updating cache...' indexingStepDesc = 'Updating cache...'
break break
default: default:
@@ -99,7 +98,7 @@
async function updateResults() { async function updateResults() {
query = new Query(searchQuery) query = new Query(searchQuery)
resultNotes = (await SearchEngine.getEngine().getSuggestions(query)).sort( resultNotes = (await searchEngine.getSuggestions(query)).sort(
(a, b) => b.score - a.score (a, b) => b.score - a.score
) )
selectedIndex = 0 selectedIndex = 0
@@ -139,7 +138,7 @@
openNote(note, newPane) openNote(note, newPane)
} }
async function onClickCreateNote(e: MouseEvent) { async function onClickCreateNote(_e: MouseEvent) {
await createNoteAndCloseModal() await createNoteAndCloseModal()
} }

View File

@@ -1,5 +1,4 @@
<script lang="ts"> <script lang="ts">
import { cacheManager } from 'src/cache-manager'
import { settings, showExcerpt } from 'src/settings' import { settings, showExcerpt } from 'src/settings'
import type { ResultNote } from '../globals' import type { ResultNote } from '../globals'
import { import {
@@ -28,7 +27,7 @@
} }
$: reg = stringsToRegex(note.foundWords) $: reg = stringsToRegex(note.foundWords)
$: cleanedContent = makeExcerpt(note.content, note.matches[0]?.offset ?? -1) $: cleanedContent = makeExcerpt(note.content, note.matches[0]?.offset ?? -1)
$: glyph = cacheManager.getLiveDocument(note.path)?.doesNotExist $: glyph = false //cacheManager.getLiveDocument(note.path)?.doesNotExist
$: title = settings.showShortName ? note.basename : note.path $: title = settings.showShortName ? note.basename : note.path
</script> </script>

View File

@@ -35,7 +35,7 @@ abstract class OmnisearchModal extends Modal {
{ k: 'K', dir: 'up' }, { k: 'K', dir: 'up' },
] as const) { ] as const) {
for (const modifier of ['Ctrl', 'Mod'] as const) { for (const modifier of ['Ctrl', 'Mod'] as const) {
this.scope.register([modifier], key.k, e => { this.scope.register([modifier], key.k, _e => {
if (this.app.vault.getConfig('vimMode')) { if (this.app.vault.getConfig('vimMode')) {
// e.preventDefault() // e.preventDefault()
eventBus.emit('arrow-' + key.dir) eventBus.emit('arrow-' + key.dir)
@@ -50,7 +50,7 @@ abstract class OmnisearchModal extends Modal {
{ k: 'P', dir: 'up' }, { k: 'P', dir: 'up' },
] as const) { ] as const) {
for (const modifier of ['Ctrl', 'Mod'] as const) { for (const modifier of ['Ctrl', 'Mod'] as const) {
this.scope.register([modifier], key.k, e => { this.scope.register([modifier], key.k, _e => {
if (this.app.vault.getConfig('vimMode')) { if (this.app.vault.getConfig('vimMode')) {
// e.preventDefault() // e.preventDefault()
eventBus.emit('arrow-' + key.dir) eventBus.emit('arrow-' + key.dir)
@@ -108,7 +108,7 @@ abstract class OmnisearchModal extends Modal {
}) })
// Context // Context
this.scope.register(['Ctrl'], 'H', e => { this.scope.register(['Ctrl'], 'H', _e => {
eventBus.emit(EventNames.ToggleExcerpts) eventBus.emit(EventNames.ToggleExcerpts)
}) })
} }

View File

@@ -3,11 +3,47 @@ import type { AsPlainObject } from 'minisearch'
import type { IndexedDocument } from './globals' import type { IndexedDocument } from './globals'
export class OmnisearchCache extends Dexie { export class OmnisearchCache extends Dexie {
public static readonly dbVersion = 7 public static readonly dbVersion = 8
public static readonly dbName = 'omnisearch/cache/' + app.appId public static readonly dbName = 'omnisearch/cache/' + app.appId
private static instance: OmnisearchCache private static instance: OmnisearchCache
//#region Table declarations
/**
* @deprecated
*/
documents!: Dexie.Table<
{
path: string
mtime: number
document: IndexedDocument
},
string
>
searchHistory!: Dexie.Table<{ id?: number; query: string }, number>
minisearch!: Dexie.Table<
{
date: string
paths: Array<{ path: string; mtime: number }>
data: AsPlainObject
},
string
>
private constructor() {
super(OmnisearchCache.dbName)
// Database structure
this.version(OmnisearchCache.dbVersion).stores({
searchHistory: '++id',
documents: 'path',
minisearch: 'date',
})
}
//#endregion Table declarations
/** /**
* Deletes Omnisearch databases that have an older version than the current one * Deletes Omnisearch databases that have an older version than the current one
*/ */
@@ -29,34 +65,6 @@ export class OmnisearchCache extends Dexie {
} }
} }
//#region Table declarations
documents!: Dexie.Table<
{
path: string
mtime: number
/**
* @deprecated
*/
document: IndexedDocument
},
string
>
searchHistory!: Dexie.Table<{ id?: number; query: string }, number>
minisearch!: Dexie.Table<
{
date: string
/**
* @deprecated
*/
checksum: string
data: AsPlainObject
},
string
>
//#endregion Table declarations
public static getInstance() { public static getInstance() {
if (!OmnisearchCache.instance) { if (!OmnisearchCache.instance) {
OmnisearchCache.instance = new OmnisearchCache() OmnisearchCache.instance = new OmnisearchCache()
@@ -64,19 +72,8 @@ export class OmnisearchCache extends Dexie {
return OmnisearchCache.instance return OmnisearchCache.instance
} }
private constructor() {
super(OmnisearchCache.dbName)
// Database structure
this.version(OmnisearchCache.dbVersion).stores({
searchHistory: '++id',
documents: 'path',
minisearch: 'date',
})
}
public async clearCache() { public async clearCache() {
await this.minisearch.clear() await this.minisearch.clear()
await this.documents.clear()
} }
} }

View File

@@ -1,4 +1,3 @@
import { cacheManager } from './cache-manager'
import { import {
extractHeadingsFromCache, extractHeadingsFromCache,
getAliasesFromMetadata, getAliasesFromMetadata,
@@ -47,9 +46,8 @@ async function getBinaryFiles(files: TFile[]): Promise<IndexedDocument[]> {
const input = [] const input = []
for (const file of files) { for (const file of files) {
input.push( input.push(
new Promise(async (resolve, reject) => { new Promise(async (resolve, _reject) => {
const doc = await getIndexedDocument(file.path) const doc = await getIndexedDocument(file.path)
// await cacheManager.updateLiveDocument(file.path, doc)
data.push(doc) data.push(doc)
return resolve(null) return resolve(null)
}) })

View File

@@ -1,4 +1,5 @@
import { EventBus } from './tools/event-bus' import { EventBus } from './tools/event-bus'
import { writable } from 'svelte/store'
export const regexLineSplit = /\r?\n|\r|((\.|\?|!)( |\r?\n|\r))/g export const regexLineSplit = /\r?\n|\r|((\.|\?|!)( |\r?\n|\r))/g
export const regexYaml = /^---\s*\n(.*?)\n?^---\s?/ms export const regexYaml = /^---\s*\n(.*?)\n?^---\s?/ms
@@ -16,7 +17,7 @@ export const EventNames = {
ToggleExcerpts: 'toggle-excerpts', ToggleExcerpts: 'toggle-excerpts',
} as const } as const
export const enum IndexingStep { export const enum IndexingStepType {
Done, Done,
LoadingCache, LoadingCache,
ReadingNotes, ReadingNotes,
@@ -50,6 +51,8 @@ export const isSearchMatch = (o: { offset?: number }): o is SearchMatch => {
return o.offset !== undefined return o.offset !== undefined
} }
export const indexingStep = writable(IndexingStepType.LoadingCache)
export type ResultNote = { export type ResultNote = {
score: number score: number
path: string path: string

View File

@@ -1,18 +1,15 @@
import { Notice, Platform, Plugin, TFile } from 'obsidian' import { Notice, Platform, Plugin, TFile } from 'obsidian'
import { SearchEngine } from './search/search-engine'
import { import {
OmnisearchInFileModal, OmnisearchInFileModal,
OmnisearchVaultModal, OmnisearchVaultModal,
} from './components/modals' } from './components/modals'
import { loadSettings, settings, SettingsTab, showExcerpt } from './settings' import { loadSettings, settings, SettingsTab, showExcerpt } from './settings'
import { eventBus, EventNames, IndexingStep } from './globals' import { eventBus, EventNames, indexingStep, IndexingStepType } from './globals'
import api from './tools/api' import api from './tools/api'
import { isFilePlaintext, wait } from './tools/utils' import { isFileImage, isFilePDF, isFilePlaintext } from './tools/utils'
import * as FileLoader from './file-loader'
import { OmnisearchCache } from './database' import { OmnisearchCache } from './database'
import { cacheManager } from './cache-manager'
import * as NotesIndex from './notes-index' import * as NotesIndex from './notes-index'
import { addToIndexAndMemCache } from "./notes-index"; import { searchEngine } from './search/omnisearch'
export default class OmnisearchPlugin extends Plugin { export default class OmnisearchPlugin extends Plugin {
private ribbonButton?: HTMLElement private ribbonButton?: HTMLElement
@@ -56,12 +53,12 @@ export default class OmnisearchPlugin extends Plugin {
// Listeners to keep the search index up-to-date // Listeners to keep the search index up-to-date
this.registerEvent( this.registerEvent(
this.app.vault.on('create', file => { this.app.vault.on('create', file => {
NotesIndex.addToIndexAndMemCache(file) searchEngine.addFromPaths([file.path])
}) })
) )
this.registerEvent( this.registerEvent(
this.app.vault.on('delete', file => { this.app.vault.on('delete', file => {
NotesIndex.removeFromIndex(file.path) searchEngine.removeFromPaths([file.path])
}) })
) )
this.registerEvent( this.registerEvent(
@@ -72,8 +69,8 @@ export default class OmnisearchPlugin extends Plugin {
this.registerEvent( this.registerEvent(
this.app.vault.on('rename', async (file, oldPath) => { this.app.vault.on('rename', async (file, oldPath) => {
if (file instanceof TFile && isFilePlaintext(file.path)) { if (file instanceof TFile && isFilePlaintext(file.path)) {
NotesIndex.removeFromIndex(oldPath) searchEngine.removeFromPaths([oldPath])
await NotesIndex.addToIndexAndMemCache(file) await searchEngine.addFromPaths([file.path])
} }
}) })
) )
@@ -108,105 +105,57 @@ export default class OmnisearchPlugin extends Plugin {
async function populateIndex(): Promise<void> { async function populateIndex(): Promise<void> {
console.time('Omnisearch - Indexing total time') console.time('Omnisearch - Indexing total time')
// Initialize minisearch // // if not iOS, load data from cache
let engine = SearchEngine.getEngine() // if (!Platform.isIosApp) {
// engine = await SearchEngine.initFromCache()
// if not iOS, load data from cache // }
if (!Platform.isIosApp) {
engine = await SearchEngine.initFromCache()
}
// Load plaintext files // Load plaintext files
SearchEngine.indexingStep.set(IndexingStep.ReadingNotes) indexingStep.set(IndexingStepType.ReadingNotes)
console.log('Omnisearch - Reading notes') console.log('Omnisearch - Reading notes')
const plainTextFiles = await FileLoader.getPlainTextFiles() const plainTextFiles = app.vault
let allFiles = [...plainTextFiles] .getFiles()
// iOS: since there's no cache, directly index the documents .filter(f => isFilePlaintext(f.path))
if (Platform.isIosApp) { .map(p => p.path)
await wait(1000) await searchEngine.addFromPaths(plainTextFiles)
await engine.addAllToMinisearch(plainTextFiles)
} let allFiles: string[] = [...plainTextFiles]
// Load PDFs // Load PDFs
if (settings.PDFIndexing) { if (settings.PDFIndexing) {
SearchEngine.indexingStep.set(IndexingStep.ReadingPDFs) indexingStep.set(IndexingStepType.ReadingPDFs)
console.log('Omnisearch - Reading PDFs') console.log('Omnisearch - Reading PDFs')
const pdfDocuments = await FileLoader.getPDFAsDocuments() const pdfDocuments = app.vault
// iOS: since there's no cache, just index the documents .getFiles()
if (Platform.isIosApp) { .filter(f => isFilePDF(f.path))
await wait(1000) .map(p => p.path)
await engine.addAllToMinisearch(pdfDocuments) await searchEngine.addFromPaths(pdfDocuments)
}
// Add PDFs to the files list // Add PDFs to the files list
allFiles = [...allFiles, ...pdfDocuments] allFiles = [...allFiles, ...pdfDocuments]
} }
// Load Images // Load Images
if (settings.imagesIndexing) { if (settings.imagesIndexing) {
SearchEngine.indexingStep.set(IndexingStep.ReadingImages) indexingStep.set(IndexingStepType.ReadingImages)
console.log('Omnisearch - Reading Images') console.log('Omnisearch - Reading Images')
const imagesDocuments = await FileLoader.getImagesAsDocuments() const imagesDocuments = app.vault
// iOS: since there's no cache, just index the documents .getFiles()
if (Platform.isIosApp) { .filter(f => isFileImage(f.path))
await wait(1000) .map(p => p.path)
await engine.addAllToMinisearch(imagesDocuments) await searchEngine.addFromPaths(imagesDocuments)
}
// Add Images to the files list // Add Images to the files list
allFiles = [...allFiles, ...imagesDocuments] allFiles = [...allFiles, ...imagesDocuments]
} }
console.log('Omnisearch - Total number of files: ' + allFiles.length) console.log('Omnisearch - Total number of files: ' + allFiles.length)
let needToUpdateCache = false
// Other platforms: make a diff of what's to add/update/delete
if (!Platform.isIosApp) {
SearchEngine.indexingStep.set(IndexingStep.UpdatingCache)
console.log('Omnisearch - Checking index cache diff...')
// Check which documents need to be removed/added/updated
const diffDocs = await cacheManager.getDiffDocuments(allFiles)
console.log(
`Omnisearch - Files to add/remove/update: ${diffDocs.toAdd.length}/${diffDocs.toDelete.length}/${diffDocs.toUpdate.length}`
)
if (
diffDocs.toAdd.length +
diffDocs.toDelete.length +
diffDocs.toUpdate.length >
100
) {
new Notice(
`Omnisearch - A great number of files need to be added/updated/cleaned. This process may make cause slowdowns.`
)
}
needToUpdateCache = !!(
diffDocs.toAdd.length ||
diffDocs.toDelete.length ||
diffDocs.toUpdate.length
)
// Add
await engine.addAllToMinisearch(diffDocs.toAdd)
// Delete
for (const pathToDel of diffDocs.toDelete) {
NotesIndex.removeFromIndex(pathToDel)
}
// Update (delete + add)
diffDocs.toUpdate.forEach(({ oldDoc, newDoc }) => {
NotesIndex.removeFromIndex(oldDoc.path)
})
await engine.addAllToMinisearch(diffDocs.toUpdate.map(d => d.newDoc))
}
// Load PDFs into the main search engine, and write cache // Load PDFs into the main search engine, and write cache
// SearchEngine.loadTmpDataIntoMain() // SearchEngine.loadTmpDataIntoMain()
SearchEngine.indexingStep.set(IndexingStep.Done) indexingStep.set(IndexingStepType.Done)
if (!Platform.isIosApp && needToUpdateCache) { if (!Platform.isIosApp) {
console.log('Omnisearch - Writing cache...') console.log('Omnisearch - Writing cache...')
await SearchEngine.getEngine().writeToCache(allFiles) await searchEngine.writeToCache()
} }
console.timeEnd('Omnisearch - Indexing total time') console.timeEnd('Omnisearch - Indexing total time')

View File

@@ -1,38 +1,7 @@
import { Notice, TAbstractFile, TFile } from 'obsidian' import type { TAbstractFile } from 'obsidian'
import { isFileIndexable, wait } from './tools/utils'
import { removeAnchors } from './tools/notes' import { removeAnchors } from './tools/notes'
import { SearchEngine } from './search/search-engine'
import { cacheManager } from './cache-manager'
import type { IndexedDocument } from './globals' import type { IndexedDocument } from './globals'
import { getIndexedDocument } from "./file-loader"; import { searchEngine } from './search/omnisearch'
const indexedList: Set<string> = new Set()
/**
* Adds a file to the search index
* @param file
* @returns
*/
export async function addToIndexAndMemCache(
file: TAbstractFile
): Promise<void> {
if (!(file instanceof TFile) || !isFileIndexable(file.path)) {
return
}
try {
if (indexedList.has(file.path)) {
throw new Error(`${file.basename} is already indexed`)
}
// Make the document and index it
SearchEngine.getEngine().addSingleToMinisearch(file.path)
indexedList.add(file.path)
} catch (e) {
// console.trace('Error while indexing ' + file.basename)
console.error(e)
}
}
/** /**
* Index a non-existing note. * Index a non-existing note.
@@ -43,7 +12,6 @@ export async function addToIndexAndMemCache(
export function addNonExistingToIndex(name: string, parent: string): void { export function addNonExistingToIndex(name: string, parent: string): void {
name = removeAnchors(name) name = removeAnchors(name)
const filename = name + (name.endsWith('.md') ? '' : '.md') const filename = name + (name.endsWith('.md') ? '' : '.md')
if (cacheManager.getLiveDocument(filename)) return
const note: IndexedDocument = { const note: IndexedDocument = {
path: filename, path: filename,
@@ -60,30 +28,7 @@ export function addNonExistingToIndex(name: string, parent: string): void {
doesNotExist: true, doesNotExist: true,
parent, parent,
} }
SearchEngine.getEngine().addSingleToMinisearch(note.path) // searchEngine.addDocuments([note])
}
/**
* Removes a file from the index, by its path.
*/
export function removeFromIndex(path: string): void {
if (!isFileIndexable(path)) {
console.info(`"${path}" is not an indexable file`)
return
}
if (indexedList.has(path)) {
SearchEngine.getEngine().removeFromMinisearch(path)
// FIXME: only remove non-existing notes if they don't have another parent
// cacheManager
// .getNonExistingNotesFromMemCache()
// .filter(n => n.parent === path)
// .forEach(n => {
// removeFromIndex(n.path)
// })
} else {
console.warn(`Omnisearch - Note not found under path ${path}`)
}
} }
const notesToReindex = new Set<TAbstractFile>() const notesToReindex = new Set<TAbstractFile>()
@@ -97,13 +42,8 @@ export function markNoteForReindex(note: TAbstractFile): void {
} }
export async function refreshIndex(): Promise<void> { export async function refreshIndex(): Promise<void> {
if (notesToReindex.size > 0) { const paths = [...notesToReindex].map(n => n.path)
console.info(`Omnisearch - Reindexing ${notesToReindex.size} notes`) searchEngine.removeFromPaths(paths)
for (const note of notesToReindex) { searchEngine.addFromPaths(paths)
removeFromIndex(note.path)
await addToIndexAndMemCache(note)
await wait(0)
}
notesToReindex.clear() notesToReindex.clear()
} }
}

View File

@@ -1,25 +1,20 @@
import MiniSearch, { type Options, type SearchResult } from 'minisearch' import MiniSearch, {
import { type AsPlainObject,
chsRegex, type Options,
type IndexedDocument, type SearchResult,
IndexingStep, } from 'minisearch'
type ResultNote, import type { IndexedDocument, ResultNote, SearchMatch } from '../globals'
type SearchMatch, import { chsRegex, SPACE_OR_PUNCTUATION } from '../globals'
SPACE_OR_PUNCTUATION, import { settings } from '../settings'
} from '../globals'
import { import {
removeDiacritics, removeDiacritics,
stringsToRegex, stringsToRegex,
stripMarkdownCharacters, stripMarkdownCharacters,
} from '../tools/utils' } from '../tools/utils'
import type { Query } from './query'
import { settings } from '../settings'
import { cacheManager } from '../cache-manager'
import { writable } from 'svelte/store'
import { Notice } from 'obsidian' import { Notice } from 'obsidian'
import { getIndexedDocument } from '../file-loader' import { getIndexedDocument } from '../file-loader'
import type { Query } from './query'
let previousResults: SearchResult[] = [] import { cacheManager } from '../cache-manager'
const tokenize = (text: string): string[] => { const tokenize = (text: string): string[] => {
const tokens = text.split(SPACE_OR_PUNCTUATION) const tokens = text.split(SPACE_OR_PUNCTUATION)
@@ -32,7 +27,8 @@ const tokenize = (text: string): string[] => {
} else return tokens } else return tokens
} }
export const minisearchOptions: Options<IndexedDocument> = { export class Omnisearch {
public static readonly options: Options<IndexedDocument> = {
tokenize, tokenize,
processTerm: (term: string) => processTerm: (term: string) =>
(settings.ignoreDiacritics ? removeDiacritics(term) : term).toLowerCase(), (settings.ignoreDiacritics ? removeDiacritics(term) : term).toLowerCase(),
@@ -46,7 +42,7 @@ export const minisearchOptions: Options<IndexedDocument> = {
'headings3', 'headings3',
], ],
storeFields: ['tags'], storeFields: ['tags'],
logger(level, message, code) { logger(_level, _message, code) {
if (code === 'version_conflict') { if (code === 'version_conflict') {
new Notice( new Notice(
'Omnisearch - Your index cache may be incorrect or corrupted. If this message keeps appearing, go to Settings to clear the cache.' 'Omnisearch - Your index cache may be incorrect or corrupted. If this message keeps appearing, go to Settings to clear the cache.'
@@ -54,45 +50,47 @@ export const minisearchOptions: Options<IndexedDocument> = {
} }
}, },
} }
private minisearch: MiniSearch
private indexedDocuments: Map<string, number> = new Map()
private previousResults: SearchResult[] = []
export class SearchEngine { constructor() {
private static engine?: SearchEngine this.minisearch = new MiniSearch(Omnisearch.options)
public static indexingStep = writable(IndexingStep.LoadingCache)
/**
* The main singleton SearchEngine instance.
* Should be used for all queries
*/
public static getEngine(): SearchEngine {
if (!this.engine) {
this.engine = new SearchEngine()
}
return this.engine
} }
/** async loadCache(): Promise<void> {
* Instantiates the main instance with cache data (if it exists)
*/
public static async initFromCache(): Promise<SearchEngine> {
try {
const cache = await cacheManager.getMinisearchCache() const cache = await cacheManager.getMinisearchCache()
if (cache) { if (cache) {
this.getEngine().minisearch = cache this.minisearch = MiniSearch.loadJS(cache.data, Omnisearch.options)
this.indexedDocuments = new Map(cache.paths.map(o => [o.path, o.mtime]))
} }
} catch (e) { }
new Notice(
'Omnisearch - Cache missing or invalid. Some freezes may occur while Omnisearch indexes your vault.' /**
* Add notes/PDFs/images to the search index
* @param paths
*/
public async addFromPaths(paths: string[]): Promise<void> {
let documents = await Promise.all(
paths.map(async path => await getIndexedDocument(path))
) )
console.error('Omnisearch - Could not init engine from cache')
console.error(e) // If a document is already added, discard it
} this.removeFromPaths(
return this.getEngine() documents.filter(d => this.indexedDocuments.has(d.path)).map(d => d.path)
)
documents.forEach(doc => this.indexedDocuments.set(doc.path, doc.mtime))
await this.minisearch.addAllAsync(documents)
} }
private minisearch: MiniSearch /**
* Discard a document from minisearch
private constructor() { * @param paths
this.minisearch = new MiniSearch(minisearchOptions) */
public removeFromPaths(paths: string[]): void {
paths.forEach(p => this.indexedDocuments.delete(p))
this.minisearch.discardAll(paths)
} }
/** /**
@@ -104,7 +102,7 @@ export class SearchEngine {
options: { prefixLength: number } options: { prefixLength: number }
): Promise<SearchResult[]> { ): Promise<SearchResult[]> {
if (query.isEmpty()) { if (query.isEmpty()) {
previousResults = [] this.previousResults = []
return [] return []
} }
@@ -120,7 +118,7 @@ export class SearchEngine {
headings3: settings.weightH3, headings3: settings.weightH3,
}, },
}) })
if (!results.length) return previousResults if (!results.length) return this.previousResults
// Downrank files that are in Obsidian's excluded list // Downrank files that are in Obsidian's excluded list
if (settings.respectExcluded) { if (settings.respectExcluded) {
@@ -169,14 +167,11 @@ export class SearchEngine {
) )
.slice(0, 50) .slice(0, 50)
previousResults = results this.previousResults = results
return results return results
} }
/**
* Parses a text against a regex, and returns the { string, offset } matches
*/
public getMatches(text: string, reg: RegExp, query: Query): SearchMatch[] { public getMatches(text: string, reg: RegExp, query: Query): SearchMatch[] {
let match: RegExpExecArray | null = null let match: RegExpExecArray | null = null
const matches: SearchMatch[] = [] const matches: SearchMatch[] = []
@@ -296,26 +291,12 @@ export class SearchEngine {
return resultNotes return resultNotes
} }
// #region Read/write minisearch index public async writeToCache(): Promise<void> {
await cacheManager.writeMinisearchCache(
public async addAllToMinisearch( this.minisearch,
documents: IndexedDocument[], this.indexedDocuments
chunkSize = 10 )
): Promise<void> {
await this.minisearch.addAllAsync(documents, { chunkSize })
}
public addSingleToMinisearch(path: string): void {
getIndexedDocument(path).then(doc => this.minisearch.add(doc))
}
public removeFromMinisearch(path: string): void {
this.minisearch.discard(path)
}
// #endregion
public async writeToCache(documents: IndexedDocument[]): Promise<void> {
await cacheManager.writeMinisearchCache(this.minisearch, documents)
} }
} }
export const searchEngine = new Omnisearch()

View File

@@ -1,6 +1,6 @@
import type { ResultNote } from '../globals' import type { ResultNote } from '../globals'
import { Query } from '../search/query' import { Query } from '../search/query'
import { SearchEngine } from '../search/search-engine' import { searchEngine } from '../search/omnisearch'
type ResultNoteApi = { type ResultNoteApi = {
score: number score: number
@@ -35,7 +35,7 @@ function mapResults(results: ResultNote[]): ResultNoteApi[] {
async function search(q: string): Promise<ResultNoteApi[]> { async function search(q: string): Promise<ResultNoteApi[]> {
const query = new Query(q) const query = new Query(q)
const raw = await SearchEngine.getEngine().getSuggestions(query) const raw = await searchEngine.getSuggestions(query)
return mapResults(raw) return mapResults(raw)
} }

View File

@@ -170,7 +170,7 @@ export async function filterAsync<T>(
callbackfn: (value: T, index: number, array: T[]) => Promise<boolean> callbackfn: (value: T, index: number, array: T[]) => Promise<boolean>
): Promise<T[]> { ): Promise<T[]> {
const filterMap = await mapAsync(array, callbackfn) const filterMap = await mapAsync(array, callbackfn)
return array.filter((value, index) => filterMap[index]) return array.filter((_value, index) => filterMap[index])
} }
/** /**

View File

@@ -86,7 +86,7 @@ export function parseQuery(
let val = term.slice(sepIndex + 1) let val = term.slice(sepIndex + 1)
// Strip backslashes respecting escapes // Strip backslashes respecting escapes
val = (val + '').replace(/\\(.?)/g, function (s, n1) { val = (val + '').replace(/\\(.?)/g, function (_s, n1) {
switch (n1) { switch (n1) {
case '\\': case '\\':
return '\\' return '\\'
@@ -115,7 +115,7 @@ export function parseQuery(
} }
// Strip backslashes respecting escapes // Strip backslashes respecting escapes
term = (term + '').replace(/\\(.?)/g, function (s, n1) { term = (term + '').replace(/\\(.?)/g, function (_s, n1) {
switch (n1) { switch (n1) {
case '\\': case '\\':
return '\\' return '\\'