Merge branch 'develop'

This commit is contained in:
Simon Cambier
2022-12-08 21:01:03 +01:00
23 changed files with 1123 additions and 1817 deletions

608
Cargo.lock generated
View File

@@ -1,608 +0,0 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "adler"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "adobe-cmap-parser"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3aaf5066d68c8ec9656cfd3a96bc9de83d4883f183d6c6b8d742e36a4819dda"
dependencies = [
"pom 1.1.0",
]
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "base-x"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4cbbc9d0964165b47557570cce6c952866c2678457aca742aafc9fb771d30270"
[[package]]
name = "bumpalo"
version = "3.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1ad822118d20d2c234f427000d5acc36eabe1e29a348c89b63dd60b13f28e5d"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "const_fn"
version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fbdcdcb6d86f71c5e97409ad45898af11cbc995b4ee8112d59095a28d376c935"
[[package]]
name = "crc32fast"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d"
dependencies = [
"cfg-if",
]
[[package]]
name = "discard"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "212d0f5754cb6769937f4501cc0e67f4f4483c8d2c3e1e922ee9edbe4ab4c7c0"
[[package]]
name = "dtoa"
version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56899898ce76aaf4a0f24d914c97ea6ed976d42fec6ad33fcbb0a1103e07b2b0"
[[package]]
name = "encoding"
version = "0.2.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b0d943856b990d12d3b55b359144ff341533e516d94098b1d3fc1ac666d36ec"
dependencies = [
"encoding-index-japanese",
"encoding-index-korean",
"encoding-index-simpchinese",
"encoding-index-singlebyte",
"encoding-index-tradchinese",
]
[[package]]
name = "encoding-index-japanese"
version = "1.20141219.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04e8b2ff42e9a05335dbf8b5c6f7567e5591d0d916ccef4e0b1710d32a0d0c91"
dependencies = [
"encoding_index_tests",
]
[[package]]
name = "encoding-index-korean"
version = "1.20141219.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4dc33fb8e6bcba213fe2f14275f0963fd16f0a02c878e3095ecfdf5bee529d81"
dependencies = [
"encoding_index_tests",
]
[[package]]
name = "encoding-index-simpchinese"
version = "1.20141219.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d87a7194909b9118fc707194baa434a4e3b0fb6a5a757c73c3adb07aa25031f7"
dependencies = [
"encoding_index_tests",
]
[[package]]
name = "encoding-index-singlebyte"
version = "1.20141219.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3351d5acffb224af9ca265f435b859c7c01537c0849754d3db3fdf2bfe2ae84a"
dependencies = [
"encoding_index_tests",
]
[[package]]
name = "encoding-index-tradchinese"
version = "1.20141219.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd0e20d5688ce3cab59eb3ef3a2083a5c77bf496cb798dc6fcdb75f323890c18"
dependencies = [
"encoding_index_tests",
]
[[package]]
name = "encoding_index_tests"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569"
[[package]]
name = "euclid"
version = "0.20.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2bb7ef65b3777a325d1eeefefab5b6d4959da54747e33bd6258e789640f307ad"
dependencies = [
"num-traits",
]
[[package]]
name = "flate2"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f82b0f4c27ad9f8bfd1f3208d882da2b09c301bc1c828fd3a00d0216d2fbbff6"
dependencies = [
"crc32fast",
"miniz_oxide",
]
[[package]]
name = "itoa"
version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4"
[[package]]
name = "itoa"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754"
[[package]]
name = "js-sys"
version = "0.3.60"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47"
dependencies = [
"wasm-bindgen",
]
[[package]]
name = "libc"
version = "0.2.134"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "329c933548736bc49fd575ee68c89e8be4d260064184389a5b77517cddd99ffb"
[[package]]
name = "linked-hash-map"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8dd5a6d5999d9907cda8ed67bbd137d3af8085216c2ac62de5be860bd41f304a"
[[package]]
name = "log"
version = "0.4.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
dependencies = [
"cfg-if",
]
[[package]]
name = "lopdf"
version = "0.26.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b49a0272112719d0037ab63d4bb67f73ba659e1e90bc38f235f163a457ac16f3"
dependencies = [
"dtoa",
"encoding",
"flate2",
"itoa 0.4.8",
"linked-hash-map",
"log",
"lzw",
"pom 3.2.0",
"time",
]
[[package]]
name = "lzw"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d947cbb889ed21c2a84be6ffbaebf5b4e0f4340638cba0444907e38b56be084"
[[package]]
name = "miniz_oxide"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96590ba8f175222643a85693f33d26e9c8a015f599c216509b1a6894af675d34"
dependencies = [
"adler",
]
[[package]]
name = "num-traits"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
dependencies = [
"autocfg",
]
[[package]]
name = "obsidian-search"
version = "0.1.0"
dependencies = [
"js-sys",
"pdf-extract",
"wasm-bindgen",
]
[[package]]
name = "once_cell"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e82dad04139b71a90c080c8463fe0dc7902db5192d939bd0950f074d014339e1"
[[package]]
name = "pdf-extract"
version = "0.6.5-alpha.0"
source = "git+https://github.com/scambier/pdf-extract#8f01969a0bb49bd71195dd4fd5c87a4a0b5f4b48"
dependencies = [
"adobe-cmap-parser",
"encoding",
"euclid",
"linked-hash-map",
"lopdf",
"postscript",
"type1-encoding-parser",
"unicode-normalization",
]
[[package]]
name = "pom"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60f6ce597ecdcc9a098e7fddacb1065093a3d66446fa16c675e7e71d1b5c28e6"
[[package]]
name = "pom"
version = "3.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07e2192780e9f8e282049ff9bffcaa28171e1cb0844f49ed5374e518ae6024ec"
[[package]]
name = "postscript"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac1825c05c4f9e2f781202d1a02fff5e5f722bbafca542d818364e1b1ea22575"
[[package]]
name = "proc-macro-hack"
version = "0.5.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
[[package]]
name = "proc-macro2"
version = "1.0.46"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94e2ef8dbfc347b10c094890f778ee2e36ca9bb4262e86dc99cd217e35f3470b"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rustc_version"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
dependencies = [
"semver",
]
[[package]]
name = "ryu"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09"
[[package]]
name = "semver"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
dependencies = [
"semver-parser",
]
[[package]]
name = "semver-parser"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
[[package]]
name = "serde"
version = "1.0.145"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "728eb6351430bccb993660dfffc5a72f91ccc1295abaa8ce19b27ebe4f75568b"
[[package]]
name = "serde_derive"
version = "1.0.145"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81fa1584d3d1bcacd84c277a0dfe21f5b0f6accf4a23d04d4c6d61f1af522b4c"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.85"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e55a28e3aaef9d5ce0506d0a14dbba8054ddc7e499ef522dd8b26859ec9d4a44"
dependencies = [
"itoa 1.0.3",
"ryu",
"serde",
]
[[package]]
name = "sha1"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1da05c97445caa12d05e848c4a4fcbbea29e748ac28f7e80e9b010392063770"
dependencies = [
"sha1_smol",
]
[[package]]
name = "sha1_smol"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae1a47186c03a32177042e55dbc5fd5aee900b8e0069a8d70fba96a9375cd012"
[[package]]
name = "standback"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e113fb6f3de07a243d434a56ec6f186dfd51cb08448239fe7bcae73f87ff28ff"
dependencies = [
"version_check",
]
[[package]]
name = "stdweb"
version = "0.4.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d022496b16281348b52d0e30ae99e01a73d737b2f45d38fed4edf79f9325a1d5"
dependencies = [
"discard",
"rustc_version",
"stdweb-derive",
"stdweb-internal-macros",
"stdweb-internal-runtime",
"wasm-bindgen",
]
[[package]]
name = "stdweb-derive"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c87a60a40fccc84bef0652345bbbbbe20a605bf5d0ce81719fc476f5c03b50ef"
dependencies = [
"proc-macro2",
"quote",
"serde",
"serde_derive",
"syn",
]
[[package]]
name = "stdweb-internal-macros"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58fa5ff6ad0d98d1ffa8cb115892b6e69d67799f6763e162a1c9db421dc22e11"
dependencies = [
"base-x",
"proc-macro2",
"quote",
"serde",
"serde_derive",
"serde_json",
"sha1",
"syn",
]
[[package]]
name = "stdweb-internal-runtime"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "213701ba3370744dcd1a12960caa4843b3d68b4d1c0a5d575e0d65b2ee9d16c0"
[[package]]
name = "syn"
version = "1.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e90cde112c4b9690b8cbe810cba9ddd8bc1d7472e2cae317b69e9438c1cba7d2"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "time"
version = "0.2.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4752a97f8eebd6854ff91f1c1824cd6160626ac4bd44287f7f4ea2035a02a242"
dependencies = [
"const_fn",
"libc",
"standback",
"stdweb",
"time-macros",
"version_check",
"winapi",
]
[[package]]
name = "time-macros"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "957e9c6e26f12cb6d0dd7fc776bb67a706312e7299aed74c8dd5b17ebb27e2f1"
dependencies = [
"proc-macro-hack",
"time-macros-impl",
]
[[package]]
name = "time-macros-impl"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd3c141a1b43194f3f56a1411225df8646c55781d5f26db825b3d98507eb482f"
dependencies = [
"proc-macro-hack",
"proc-macro2",
"quote",
"standback",
"syn",
]
[[package]]
name = "tinyvec"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50"
dependencies = [
"tinyvec_macros",
]
[[package]]
name = "tinyvec_macros"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
[[package]]
name = "type1-encoding-parser"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3d6cc09e1a99c7e01f2afe4953789311a1c50baebbdac5b477ecf78e2e92a5b"
dependencies = [
"pom 1.1.0",
]
[[package]]
name = "unicode-ident"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd"
[[package]]
name = "unicode-normalization"
version = "0.1.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921"
dependencies = [
"tinyvec",
]
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "wasm-bindgen"
version = "0.2.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268"
dependencies = [
"cfg-if",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142"
dependencies = [
"bumpalo",
"log",
"once_cell",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c"
dependencies = [
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

View File

@@ -1,18 +0,0 @@
[package]
name = "obsidian-search"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
crate-type = ["cdylib"]
[dependencies]
wasm-bindgen = "0.2"
js-sys = "0.3.49"
pdf-extract = { git = "https://github.com/scambier/pdf-extract" }
[profile.release]
lto = true
opt-level = 'z'

View File

@@ -6,9 +6,11 @@
![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/scambier/obsidian-omnisearch)
![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/scambier/obsidian-omnisearch?include_prereleases&label=BRAT%20beta)
> **Omnisearch** is a search engine that "_just works_". It always instantly shows you the most relevant results, thanks to its smart weighting algorithm.
> **Omnisearch** is a search engine that "_just works_". It always instantly shows you the most relevant results, thanks
> to its smart weighting algorithm.
Under the hood, it uses the excellent [MiniSearch](https://github.com/lucaong/minisearch) library.
Under the hood, it uses the excellent [MiniSearch](https://github.com/lucaong/minisearch) library,
and [obsidian-text-extract](https://github.com/scambier/obsidian-text-extract) for PDFs and images.
![](https://raw.githubusercontent.com/scambier/obsidian-omnisearch/master/images/omnisearch.gif)
@@ -17,9 +19,11 @@ Under the hood, it uses the excellent [MiniSearch](https://github.com/lucaong/mi
> Omnisearch's first goal is to _locate_ files instantly. You can see it as a _Quick Switcher_ on steroids.
- Find your **📝notes, 📄PDFs, and 🖼images** faster than ever
- _Images OCR and PDF indexing are only available on desktop_
- Automatic document scoring using the [BM25 algorithm](https://github.com/lucaong/minisearch/issues/129#issuecomment-1046257399)
- The relevance of a document against a query depends on the number of times the query terms appear in the document, its filename, and its headings
- _Images OCR and PDF indexing are only available on desktop_
- Automatic document scoring using
the [BM25 algorithm](https://github.com/lucaong/minisearch/issues/129#issuecomment-1046257399)
- The relevance of a document against a query depends on the number of times the query terms appear in the document,
its filename, and its headings
- Keyboard first: you never have to use your mouse
- Workflow similar to the "Quick Switcher" core plugin
- Resistance to typos
@@ -28,12 +32,15 @@ Under the hood, it uses the excellent [MiniSearch](https://github.com/lucaong/mi
- Directly Insert a `[[link]]` from the search results
- Supports Vim navigation keys
**Note:** support of Chinese, Japanese, Korean, etc. depends on [this additional plugin](https://github.com/aidenlx/cm-chs-patch). Please read its documentation for more information.
**Note:** support of Chinese, Japanese, Korean, etc. depends
on [this additional plugin](https://github.com/aidenlx/cm-chs-patch). Please read its documentation for more
information.
## Installation
- Omnisearch is available on [the official Community Plugins repository](https://obsidian.md/plugins?search=Omnisearch).
- Beta releases can be installed through [BRAT](https://github.com/TfTHacker/obsidian42-brat). **Be advised that those versions can be buggy and break things.**
- Beta releases can be installed through [BRAT](https://github.com/TfTHacker/obsidian42-brat). **Be advised that those
versions can be buggy and break things.** Only install beta versions if you're willing to
You can check the [CHANGELOG](./CHANGELOG.md) for more information on the different versions.
@@ -43,14 +50,15 @@ Omnisearch can be used within 2 different contexts:
### Vault Search
Omnisearch's core feature, accessible with the Command Palette "**_Omnisearch: Vault search_**". This modal searches through your vault and returns the most relevant notes. That's all you need to _find_ a note.
Omnisearch's core feature, accessible with the Command Palette "**_Omnisearch: Vault search_**". This modal searches
through your vault and returns the most relevant notes. That's all you need to _find_ a note.
If you want to list all the search matches of a single note, you can do so by using `tab` to open the In-File Search.
### In-File Search
Also accessible through the Command Palette "**_Omnisearch: In-file search_**". This modal searches through the active note's content and lists the matching results. Just press enter to automatically scroll to the right place.
Also accessible through the Command Palette "**_Omnisearch: In-file search_**". This modal searches through the active
note's content and lists the matching results. Just press enter to automatically scroll to the right place.
## URL Scheme & Public API
@@ -58,9 +66,11 @@ You can open Omnisearch with the following scheme: `obsidian://omnisearch?query=
----
For plugin developers and Dataview users, Omnisearch is also accessible through the global object `omnisearch` (`window.omnisearch`)
For plugin developers and Dataview users, Omnisearch is also accessible through the global
object `omnisearch` (`window.omnisearch`)
> This API is an experimental feature, the `ResultNote` interface may change in the future. The `search()` function returns at most 50 results.
> This API is an experimental feature, the `ResultNote` interface may change in the future. The `search()` function
> returns at most 50 results.
```ts
// API:
@@ -110,12 +120,13 @@ There are several CSS classes you can use to customize the appearance of Omnisea
.omnisearch-input-field
```
For example, if you'd like the usual yellow highlight on search matches, you can add this code inside a CSS snippet file:
For example, if you'd like the usual yellow highlight on search matches, you can add this code inside a CSS snippet
file:
```css
.omnisearch-highlight {
color: var(--text-normal);
background-color: var(--text-highlight-bg);
color: var(--text-normal);
background-color: var(--text-highlight-bg);
}
```
@@ -123,14 +134,27 @@ See [styles.css](./assets/styles.css) for more information.
## Issues & Solutions
**Omnisearch makes Obsidian sluggish at startup.**
**Omnisearch makes Obsidian sluggish/freeze at startup.**
- While Omnisearch does its best to work smoothly in the background, bigger vaults can cause some hiccups at startup because of the search index size.
- While Omnisearch does its best to work smoothly in the background, bigger vaults and files can make Obsidian stutter
during indexing.
- If you have several thousands of files, Obsidian may freeze a few seconds at startup while the Omnisearch cache is
loaded in memory.
**Omnisearch is slow to index my PDFs and images**
- The first time Omnisearch indexes those files, it needs to extract their text. This can take a long time, but
will only happen once. This process is also resumable, so you can temporarily disable PDFs/images indexing, or close
Obsidian without losing data.
**Can I index PDFs/images on mobile?**
- Not at the moment. On mobile devices, text extraction either doesn't work or consumes too much resources.
**Omnisearch gives inconsistent/invalid results, there are errors in the developer console**
- Restart Obsidian to force a reindex of Omnisearch.
- The cache can be corrupted; you can clear it at the bottom of the settings page, then restart Obsidian.
- The cache could be corrupted; you can clear it at the bottom of the settings page, then restart Obsidian.
**A query should return a result that does not appear.**
@@ -144,8 +168,8 @@ See [here](https://github.com/scambier/obsidian-omnisearch#css-customization).
**I'm still having an issue**
You can write your issue [here](https://github.com/scambier/obsidian-omnisearch/issues) with as much details as possible.
You can write your issue [here](https://github.com/scambier/obsidian-omnisearch/issues) with as much details as
possible.
## LICENSE
@@ -153,6 +177,7 @@ Omnisearch is licensed under [GPL-3](https://tldrlegal.com/license/gnu-general-p
## Thanks
To all people who donate through [Ko-Fi](https://ko-fi.com/scambier) or [Github Sponsors](https://github.com/sponsors/scambier) ❤
To all people who donate through [Ko-Fi](https://ko-fi.com/scambier)
or [Github Sponsors](https://github.com/sponsors/scambier) ❤
![JetBrains Logo (Main) logo](https://resources.jetbrains.com/storage/products/company/brand/logos/jb_beam.svg)

View File

@@ -30,6 +30,11 @@
.omnisearch-highlight {
}
.omnisearch-default-highlight {
color: var(--text-normal);
background-color: var(--text-highlight-bg);
}
.omnisearch-input-container {
display: flex;
flex-direction: row;

View File

@@ -1,6 +1,6 @@
{
"name": "scambier.obsidian-search",
"version": "1.8.1",
"version": "1.9.0-beta.8",
"description": "A search engine for Obsidian",
"main": "dist/main.js",
"scripts": {
@@ -14,13 +14,13 @@
"author": "Simon Cambier",
"license": "GPL-3",
"devDependencies": {
"@babel/preset-env": "^7.19.4",
"@babel/preset-env": "^7.20.2",
"@babel/preset-typescript": "^7.18.6",
"@testing-library/jest-dom": "^5.16.5",
"@tsconfig/svelte": "^3.0.0",
"@types/jest": "^27.5.2",
"@types/lodash-es": "^4.17.6",
"@types/node": "^16.11.64",
"@types/node": "^16.18.4",
"@types/pako": "^2.0.0",
"babel-jest": "^27.5.1",
"builtin-modules": "^3.3.0",
@@ -29,21 +29,21 @@
"esbuild-svelte": "0.7.1",
"jest": "^27.5.1",
"obsidian": "latest",
"prettier": "^2.7.1",
"prettier-plugin-svelte": "^2.8.0",
"svelte": "^3.51.0",
"svelte-check": "^2.9.2",
"prettier": "^2.8.0",
"prettier-plugin-svelte": "^2.8.1",
"svelte": "^3.53.1",
"svelte-check": "^2.10.0",
"svelte-jester": "^2.3.2",
"svelte-preprocess": "^4.10.7",
"tslib": "2.3.1",
"typescript": "^4.8.4",
"vite": "^3.2.1"
"typescript": "^4.9.3",
"vite": "^3.2.4"
},
"dependencies": {
"dexie": "^3.2.2",
"lodash-es": "4.17.21",
"minisearch": "github:scambier/minisearch#callback_desync",
"obsidian-text-extract": "1.0.3",
"minisearch": "6.0.0-beta.1",
"obsidian-text-extract": "1.0.4",
"pure-md5": "^0.1.14"
},
"pnpm": {

723
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,17 +1,102 @@
import { Notice, type TFile } from 'obsidian'
import type { IndexedDocument } from './globals'
import { Notice } from 'obsidian'
import type { DocumentRef, IndexedDocument } from './globals'
import { database } from './database'
import MiniSearch from 'minisearch'
import { minisearchOptions } from './search/search-engine'
import { makeMD5 } from './tools/utils'
import type { AsPlainObject } from 'minisearch'
import type MiniSearch from 'minisearch'
import {
extractHeadingsFromCache,
getAliasesFromMetadata,
getTagsFromMetadata,
isFileImage,
isFilePDF,
isFilePlaintext,
makeMD5,
removeDiacritics,
} from './tools/utils'
import { getImageText, getPdfText } from 'obsidian-text-extract'
async function getIndexedDocument(path: string): Promise<IndexedDocument> {
const file = app.vault.getFiles().find(f => f.path === path)
if (!file) throw new Error(`Invalid file path: "${path}"`)
let content: string
if (isFilePlaintext(path)) {
content = await app.vault.cachedRead(file)
} else if (isFilePDF(path)) {
content = await getPdfText(file)
} else if (isFileImage(file.path)) {
content = await getImageText(file)
} else {
throw new Error('Invalid file format: ' + file.path)
}
content = removeDiacritics(content)
const metadata = app.metadataCache.getFileCache(file)
// Look for links that lead to non-existing files,
// and add them to the index.
if (metadata) {
// // FIXME: https://github.com/scambier/obsidian-omnisearch/issues/129
// const nonExisting = getNonExistingNotes(file, metadata)
// for (const name of nonExisting.filter(
// o => !cacheManager.getLiveDocument(o)
// )) {
// NotesIndex.addNonExistingToIndex(name, file.path)
// }
// EXCALIDRAW
// Remove the json code
if (metadata.frontmatter?.['excalidraw-plugin']) {
const comments =
metadata.sections?.filter(s => s.type === 'comment') ?? []
for (const { start, end } of comments.map(c => c.position)) {
content =
content.substring(0, start.offset - 1) + content.substring(end.offset)
}
}
}
return {
basename: removeDiacritics(file.basename),
content,
path: file.path,
mtime: file.stat.mtime,
tags: getTagsFromMetadata(metadata),
aliases: getAliasesFromMetadata(metadata).join(''),
headings1: metadata ? extractHeadingsFromCache(metadata, 1).join(' ') : '',
headings2: metadata ? extractHeadingsFromCache(metadata, 2).join(' ') : '',
headings3: metadata ? extractHeadingsFromCache(metadata, 3).join(' ') : '',
}
}
class CacheManager {
private liveDocuments: Map<string, IndexedDocument> = new Map()
/**
* Show an empty input field next time the user opens Omnisearch modal
*/
private nextQueryIsEmpty = false
/**
* The "live cache", containing all indexed vault files
* in the form of IndexedDocuments
*/
private documents: Map<string, IndexedDocument> = new Map()
public async addToLiveCache(path: string): Promise<void> {
const doc = await getIndexedDocument(path)
this.documents.set(path, doc)
}
public removeFromLiveCache(path: string): void {
this.documents.delete(path)
}
public async getDocument(path: string): Promise<IndexedDocument> {
if (this.documents.has(path)) {
return this.documents.get(path)!
}
await this.addToLiveCache(path)
return this.documents.get(path)!
}
public async addToSearchHistory(query: string): Promise<void> {
if (!query) {
this.nextQueryIsEmpty = true
@@ -36,32 +121,6 @@ class CacheManager {
return data
}
/**
* Important: keep this method async for the day it _really_ becomes async.
* This will avoid a refactor.
* @param path
* @param note
*/
public async updateLiveDocument(
path: string,
note: IndexedDocument
): Promise<void> {
this.liveDocuments.set(path, note)
}
public deleteLiveDocument(key: string): void {
this.liveDocuments.delete(key)
}
public getLiveDocument(key: string): IndexedDocument | undefined {
return this.liveDocuments.get(key)
}
public isDocumentOutdated(file: TFile): boolean {
const indexedNote = this.getLiveDocument(file.path)
return !indexedNote || indexedNote.mtime !== file.stat.mtime
}
//#region Minisearch
public getDocumentsChecksum(documents: IndexedDocument[]): string {
@@ -79,28 +138,13 @@ class CacheManager {
)
}
public async getMinisearchCache(): Promise<MiniSearch | null> {
// Retrieve documents and make their checksum
const cachedDocs = await database.documents.toArray()
const checksum = this.getDocumentsChecksum(cachedDocs.map(d => d.document))
// Add those documents in the live cache
cachedDocs.forEach(doc =>
cacheManager.updateLiveDocument(doc.path, doc.document)
)
// Retrieve the search cache, and verify the checksum
const cachedIndex = (await database.minisearch.toArray())[0]
if (cachedIndex?.checksum !== checksum) {
console.warn("Omnisearch - Cache - Checksums don't match, clearing cache")
// Invalid (or null) cache, clear everything
await database.minisearch.clear()
await database.documents.clear()
return null
}
public async getMinisearchCache(): Promise<{
paths: DocumentRef[]
data: AsPlainObject
} | null> {
try {
return MiniSearch.loadJS(cachedIndex.data, minisearchOptions)
const cachedIndex = (await database.minisearch.toArray())[0]
return cachedIndex
} catch (e) {
new Notice(
'Omnisearch - Cache missing or invalid. Some freezes may occur while Omnisearch indexes your vault.'
@@ -111,75 +155,15 @@ class CacheManager {
}
}
/**
* Get a dict listing the deleted/added documents since last cache
* @param documents
*/
public async getDiffDocuments(documents: IndexedDocument[]): Promise<{
toDelete: IndexedDocument[]
toAdd: IndexedDocument[]
toUpdate: { oldDoc: IndexedDocument; newDoc: IndexedDocument }[]
}> {
let cachedDocs = await database.documents.toArray()
// present in `documents` but not in `cachedDocs`
const toAdd = documents.filter(
d => !cachedDocs.find(c => c.path === d.path)
)
// present in `cachedDocs` but not in `documents`
const toDelete = cachedDocs
.filter(c => !documents.find(d => d.path === c.path))
.map(d => d.document)
// toUpdate: same path, but different mtime
const toUpdate = cachedDocs
.filter(({ mtime: cMtime, path: cPath }) =>
documents.some(
({ mtime: dMtime, path: dPath }) =>
cPath === dPath && dMtime !== cMtime
)
)
.map(c => ({
oldDoc: c.document,
newDoc: documents.find(d => d.path === c.path)!,
}))
return {
toAdd,
toDelete,
toUpdate,
}
}
public async writeMinisearchCache(
minisearch: MiniSearch,
documents: IndexedDocument[]
indexed: Map<string, number>
): Promise<void> {
const { toDelete, toAdd, toUpdate } = await this.getDiffDocuments(documents)
// Delete
// console.log(`Omnisearch - Cache - Will delete ${toDelete.length} documents`)
await database.documents.bulkDelete(toDelete.map(o => o.path))
// Add
// console.log(`Omnisearch - Cache - Will add ${toAdd.length} documents`)
await database.documents.bulkAdd(
toAdd.map(o => ({ document: o, mtime: o.mtime, path: o.path }))
)
// Update
// console.log(`Omnisearch - Cache - Will update ${toUpdate.length} documents`)
await database.documents.bulkPut(
toUpdate.map(o => ({
document: o.newDoc,
mtime: o.newDoc.mtime,
path: o.newDoc.path,
}))
)
const paths = Array.from(indexed).map(([k, v]) => ({ path: k, mtime: v }))
await database.minisearch.clear()
await database.minisearch.add({
date: new Date().toISOString(),
checksum: this.getDocumentsChecksum(documents),
paths,
data: minisearch.toJSON(),
})
console.log('Omnisearch - Search cache written')

View File

@@ -32,7 +32,7 @@
// the next time we open the modal, the search field will be empty
cacheManager.addToSearchHistory('')
dispatch('input', value)
}, 200)
}, 500)
</script>
<div class="omnisearch-input-container">

View File

@@ -9,7 +9,6 @@
import { loopIndex } from 'src/tools/utils'
import { onDestroy, onMount, tick } from 'svelte'
import { MarkdownView } from 'obsidian'
import { SearchEngine } from 'src/search/search-engine'
import ModalContainer from './ModalContainer.svelte'
import {
OmnisearchInFileModal,
@@ -18,6 +17,7 @@
import ResultItemInFile from './ResultItemInFile.svelte'
import { Query } from 'src/search/query'
import { openNote } from 'src/tools/notes'
import { searchEngine } from 'src/search/omnisearch'
export let modal: OmnisearchInFileModal
export let parent: OmnisearchVaultModal | null = null
@@ -50,7 +50,7 @@
query = new Query(searchQuery)
note =
(
await SearchEngine.getEngine().getSuggestions(query, {
await searchEngine.getSuggestions(query, {
singleFilePath,
})
)[0] ?? null

View File

@@ -3,19 +3,16 @@
import { onDestroy, onMount, tick } from 'svelte'
import InputSearch from './InputSearch.svelte'
import ModalContainer from './ModalContainer.svelte'
import { eventBus, IndexingStep, type ResultNote } from 'src/globals'
import { eventBus, indexingStep, IndexingStepType, type ResultNote, } from 'src/globals'
import { createNote, openNote } from 'src/tools/notes'
import { SearchEngine } from 'src/search/search-engine'
import { getCtrlKeyLabel, getExtension, loopIndex } from 'src/tools/utils'
import {
OmnisearchInFileModal,
type OmnisearchVaultModal,
} from 'src/components/modals'
import { getCtrlKeyLabel, getExtension, isFilePDF, loopIndex, } from 'src/tools/utils'
import { OmnisearchInFileModal, type OmnisearchVaultModal, } from 'src/components/modals'
import ResultItemVault from './ResultItemVault.svelte'
import { Query } from 'src/search/query'
import { settings } from '../settings'
import * as NotesIndex from '../notes-index'
import { cacheManager } from '../cache-manager'
import { searchEngine } from 'src/search/omnisearch'
export let modal: OmnisearchVaultModal
export let previousQuery: string | undefined
@@ -24,32 +21,34 @@
let searchQuery: string | undefined
let resultNotes: ResultNote[] = []
let query: Query
let { indexingStep } = SearchEngine
let indexingStepDesc = ''
let searching = true
$: selectedNote = resultNotes[selectedIndex]
$: searchQuery = searchQuery ?? previousQuery
$: if (searchQuery) {
updateResults()
resultNotes = []
searching = true
updateResults().then(() => {
searching = false
})
} else {
searching = false
resultNotes = []
}
$: {
switch ($indexingStep) {
case IndexingStep.LoadingCache:
case IndexingStepType.LoadingCache:
indexingStepDesc = 'Loading cache...'
break
case IndexingStep.ReadingNotes:
case IndexingStepType.ReadingFiles:
indexingStepDesc = 'Reading files...'
break
case IndexingStepType.IndexingFiles:
indexingStepDesc = 'Indexing files...'
break
case IndexingStepType.WritingCache:
updateResults()
indexingStepDesc = 'Reading notes...'
break
case IndexingStep.ReadingPDFs:
indexingStepDesc = 'Reading PDFs...'
break
case IndexingStep.ReadingImages:
indexingStepDesc = 'Reading images...'
break
case IndexingStep.UpdatingCache:
indexingStepDesc = 'Updating cache...'
break
default:
@@ -99,7 +98,7 @@
async function updateResults() {
query = new Query(searchQuery)
resultNotes = (await SearchEngine.getEngine().getSuggestions(query)).sort(
resultNotes = (await searchEngine.getSuggestions(query)).sort(
(a, b) => b.score - a.score
)
selectedIndex = 0
@@ -139,7 +138,7 @@
openNote(note, newPane)
}
async function onClickCreateNote(e: MouseEvent) {
async function onClickCreateNote(_e: MouseEvent) {
await createNoteAndCloseModal()
}
@@ -189,7 +188,7 @@
function switchToInFileModal(): void {
// Do nothing if the selectedNote is a PDF,
// or if there is 0 match (e.g indexing in progress)
if (selectedNote?.path.endsWith('.pdf') || !selectedNote?.matches.length) {
if (selectedNote && (isFilePDF(selectedNote?.path) || !selectedNote?.matches.length)) {
return
}
@@ -250,11 +249,13 @@
on:mousemove="{_ => (selectedIndex = i)}"
on:click="{onClick}" />
{/each}
{#if !resultNotes.length && searchQuery}
<div style="text-align: center;">
<div style="text-align: center;">
{#if !resultNotes.length && searchQuery && !searching}
We found 0 result for your search here.
</div>
{/if}
{:else if searching}
Searching...
{/if}
</div>
</ModalContainer>
<div class="prompt-instructions">

View File

@@ -1,5 +1,4 @@
<script lang="ts">
import { cacheManager } from 'src/cache-manager'
import { settings, showExcerpt } from 'src/settings'
import type { ResultNote } from '../globals'
import {
@@ -21,13 +20,14 @@
// @ts-ignore
const file = app.vault.getFiles().find(f => f.path === note.path)
if (file) {
// @ts-ignore
imagePath = app.vault.getResourcePath(file)
}
}
}
$: reg = stringsToRegex(note.foundWords)
$: cleanedContent = makeExcerpt(note.content, note.matches[0]?.offset ?? -1)
$: glyph = cacheManager.getLiveDocument(note.path)?.doesNotExist
$: glyph = false //cacheManager.getLiveDocument(note.path)?.doesNotExist
$: title = settings.showShortName ? note.basename : note.path
</script>

View File

@@ -35,7 +35,7 @@ abstract class OmnisearchModal extends Modal {
{ k: 'K', dir: 'up' },
] as const) {
for (const modifier of ['Ctrl', 'Mod'] as const) {
this.scope.register([modifier], key.k, e => {
this.scope.register([modifier], key.k, _e => {
if (this.app.vault.getConfig('vimMode')) {
// e.preventDefault()
eventBus.emit('arrow-' + key.dir)
@@ -50,7 +50,7 @@ abstract class OmnisearchModal extends Modal {
{ k: 'P', dir: 'up' },
] as const) {
for (const modifier of ['Ctrl', 'Mod'] as const) {
this.scope.register([modifier], key.k, e => {
this.scope.register([modifier], key.k, _e => {
if (this.app.vault.getConfig('vimMode')) {
// e.preventDefault()
eventBus.emit('arrow-' + key.dir)
@@ -108,7 +108,7 @@ abstract class OmnisearchModal extends Modal {
})
// Context
this.scope.register(['Ctrl'], 'H', e => {
this.scope.register(['Ctrl'], 'H', _e => {
eventBus.emit(EventNames.ToggleExcerpts)
})
}

View File

@@ -1,13 +1,34 @@
import Dexie from 'dexie'
import type { AsPlainObject } from 'minisearch'
import type { IndexedDocument } from './globals'
import type { DocumentRef, IndexedDocument } from './globals'
export class OmnisearchCache extends Dexie {
public static readonly dbVersion = 7
public static readonly dbVersion = 8
public static readonly dbName = 'omnisearch/cache/' + app.appId
private static instance: OmnisearchCache
searchHistory!: Dexie.Table<{ id?: number; query: string }, number>
minisearch!: Dexie.Table<
{
date: string
paths: DocumentRef[]
data: AsPlainObject
},
string
>
private constructor() {
super(OmnisearchCache.dbName)
// Database structure
this.version(OmnisearchCache.dbVersion).stores({
searchHistory: '++id',
minisearch: 'date',
})
}
//#endregion Table declarations
/**
* Deletes Omnisearch databases that have an older version than the current one
*/
@@ -22,27 +43,12 @@ export class OmnisearchCache extends Dexie {
console.log('Omnisearch - Those IndexedDb databases will be deleted:')
for (const db of toDelete) {
if (db.name) {
console.log(db.name + ' ' + db.version)
indexedDB.deleteDatabase(db.name)
}
}
}
}
//#region Table declarations
documents!: Dexie.Table<
{ path: string; mtime: number; document: IndexedDocument },
string
>
searchHistory!: Dexie.Table<{ id?: number; query: string }, number>
minisearch!: Dexie.Table<
{ date: string; checksum: string; data: AsPlainObject },
string
>
//#endregion Table declarations
public static getInstance() {
if (!OmnisearchCache.instance) {
OmnisearchCache.instance = new OmnisearchCache()
@@ -50,19 +56,8 @@ export class OmnisearchCache extends Dexie {
return OmnisearchCache.instance
}
private constructor() {
super(OmnisearchCache.dbName)
// Database structure
this.version(OmnisearchCache.dbVersion).stores({
searchHistory: '++id',
documents: 'path',
minisearch: 'date',
})
}
public async clearCache() {
await this.minisearch.clear()
await this.documents.clear()
}
}

View File

@@ -1,17 +1,7 @@
import { cacheManager } from './cache-manager'
import {
extractHeadingsFromCache,
getAliasesFromMetadata,
getTagsFromMetadata,
isFileImage,
isFilePlaintext,
removeDiacritics,
} from './tools/utils'
import * as NotesIndex from './notes-index'
import { isFileImage, isFilePDF, isFilePlaintext } from './tools/utils'
import type { TFile } from 'obsidian'
import type { IndexedDocument } from './globals'
import { getNonExistingNotes } from './tools/notes'
import { getPdfText, getImageText } from 'obsidian-text-extract'
import { cacheManager } from './cache-manager'
/**
* Return all plaintext files as IndexedDocuments
@@ -20,9 +10,9 @@ export async function getPlainTextFiles(): Promise<IndexedDocument[]> {
const allFiles = app.vault.getFiles().filter(f => isFilePlaintext(f.path))
const data: IndexedDocument[] = []
for (const file of allFiles) {
const doc = await fileToIndexedDocument(file)
const doc = await cacheManager.getDocument(file.path)
data.push(doc)
await cacheManager.updateLiveDocument(file.path, doc)
// await cacheManager.updateLiveDocument(file.path, doc)
}
return data
}
@@ -31,7 +21,7 @@ export async function getPlainTextFiles(): Promise<IndexedDocument[]> {
* Return all PDFs as IndexedDocuments.
*/
export async function getPDFAsDocuments(): Promise<IndexedDocument[]> {
const files = app.vault.getFiles().filter(f => f.path.endsWith('.pdf'))
const files = app.vault.getFiles().filter(f => isFilePDF(f.path))
return await getBinaryFiles(files)
}
@@ -48,9 +38,8 @@ async function getBinaryFiles(files: TFile[]): Promise<IndexedDocument[]> {
const input = []
for (const file of files) {
input.push(
new Promise(async (resolve, reject) => {
const doc = await fileToIndexedDocument(file)
await cacheManager.updateLiveDocument(file.path, doc)
new Promise(async (resolve, _reject) => {
const doc = await cacheManager.getDocument(file.path)
data.push(doc)
return resolve(null)
})
@@ -59,61 +48,3 @@ async function getBinaryFiles(files: TFile[]): Promise<IndexedDocument[]> {
await Promise.all(input)
return data
}
/**
* Convert a file into an IndexedDocument.
* Will use the cache if possible.
*/
export async function fileToIndexedDocument(
file: TFile
): Promise<IndexedDocument> {
let content: string
if (isFilePlaintext(file.path)) {
content = await app.vault.cachedRead(file)
} else if (file.path.endsWith('.pdf')) {
content = await getPdfText(file)
} else if (isFileImage(file.path)) {
content = await getImageText(file)
} else {
throw new Error('Invalid file: ' + file.path)
}
content = removeDiacritics(content)
const metadata = app.metadataCache.getFileCache(file)
// Look for links that lead to non-existing files,
// and add them to the index.
if (metadata) {
// FIXME: https://github.com/scambier/obsidian-omnisearch/issues/129
const nonExisting = getNonExistingNotes(file, metadata)
for (const name of nonExisting.filter(
o => !cacheManager.getLiveDocument(o)
)) {
NotesIndex.addNonExistingToIndex(name, file.path)
}
// EXCALIDRAW
// Remove the json code
if (metadata.frontmatter?.['excalidraw-plugin']) {
const comments =
metadata.sections?.filter(s => s.type === 'comment') ?? []
for (const { start, end } of comments.map(c => c.position)) {
content =
content.substring(0, start.offset - 1) + content.substring(end.offset)
}
}
}
return {
basename: removeDiacritics(file.basename),
content,
path: file.path,
mtime: file.stat.mtime,
tags: getTagsFromMetadata(metadata),
aliases: getAliasesFromMetadata(metadata).join(''),
headings1: metadata ? extractHeadingsFromCache(metadata, 1).join(' ') : '',
headings2: metadata ? extractHeadingsFromCache(metadata, 2).join(' ') : '',
headings3: metadata ? extractHeadingsFromCache(metadata, 3).join(' ') : '',
}
}

View File

@@ -1,14 +1,19 @@
import { EventBus } from './tools/event-bus'
import { writable } from 'svelte/store'
import { settings } from './settings'
export const regexLineSplit = /\r?\n|\r|((\.|\?|!)( |\r?\n|\r))/g
export const regexYaml = /^---\s*\n(.*?)\n?^---\s?/ms
export const regexStripQuotes = /^"|"$|^'|'$/g
export const chsRegex = /[\u4e00-\u9fa5]/
export const chsSegmenter = (app as any).plugins.plugins['cm-chs-patch']
export const excerptBefore = 100
export const excerptAfter = 300
export const highlightClass = 'suggestion-highlight omnisearch-highlight'
export const highlightClass = `suggestion-highlight omnisearch-highlight ${
settings.hightlight ? 'omnisearch-default-highlight' : ''
}`
export const eventBus = new EventBus()
@@ -16,15 +21,16 @@ export const EventNames = {
ToggleExcerpts: 'toggle-excerpts',
} as const
export const enum IndexingStep {
export const enum IndexingStepType {
Done,
LoadingCache,
ReadingNotes,
ReadingPDFs,
ReadingImages,
UpdatingCache,
ReadingFiles,
IndexingFiles,
WritingCache,
}
export type DocumentRef = { path: string; mtime: number }
export type IndexedDocument = {
path: string
basename: string
@@ -37,6 +43,7 @@ export type IndexedDocument = {
headings2: string
headings3: string
// TODO: reimplement this
doesNotExist?: boolean
parent?: string
}
@@ -49,6 +56,8 @@ export const isSearchMatch = (o: { offset?: number }): o is SearchMatch => {
return o.offset !== undefined
}
export const indexingStep = writable(IndexingStepType.Done)
export type ResultNote = {
score: number
path: string

View File

@@ -1,25 +1,24 @@
import { Notice, Platform, Plugin, TFile } from 'obsidian'
import { SearchEngine } from './search/search-engine'
import { Notice, Platform, Plugin } from 'obsidian'
import {
OmnisearchInFileModal,
OmnisearchVaultModal,
} from './components/modals'
import { loadSettings, settings, SettingsTab, showExcerpt } from './settings'
import { eventBus, EventNames, IndexingStep } from './globals'
import { eventBus, EventNames, indexingStep, IndexingStepType } from './globals'
import api from './tools/api'
import { isFilePlaintext, wait } from './tools/utils'
import { isFileIndexable } from './tools/utils'
import { database, OmnisearchCache } from './database'
import * as NotesIndex from './notes-index'
import * as FileLoader from './file-loader'
import { OmnisearchCache } from './database'
import { searchEngine } from './search/omnisearch'
import { cacheManager } from './cache-manager'
export default class OmnisearchPlugin extends Plugin {
private ribbonButton?: HTMLElement
async onload(): Promise<void> {
await loadSettings(this)
await cleanOldCacheFiles()
await OmnisearchCache.clearOldDatabases()
await loadSettings(this)
registerAPI(this)
@@ -54,25 +53,34 @@ export default class OmnisearchPlugin extends Plugin {
app.workspace.onLayoutReady(async () => {
// Listeners to keep the search index up-to-date
this.registerEvent(
this.app.vault.on('create', file => {
NotesIndex.addToIndexAndMemCache(file)
this.app.vault.on('create', async file => {
if (isFileIndexable(file.path)) {
await cacheManager.addToLiveCache(file.path)
searchEngine.addFromPaths([file.path])
}
})
)
this.registerEvent(
this.app.vault.on('delete', file => {
NotesIndex.removeFromIndex(file.path)
cacheManager.removeFromLiveCache(file.path)
searchEngine.removeFromPaths([file.path])
})
)
this.registerEvent(
this.app.vault.on('modify', async file => {
NotesIndex.markNoteForReindex(file)
if (isFileIndexable(file.path)) {
await cacheManager.addToLiveCache(file.path)
NotesIndex.markNoteForReindex(file)
}
})
)
this.registerEvent(
this.app.vault.on('rename', async (file, oldPath) => {
if (file instanceof TFile && isFilePlaintext(file.path)) {
NotesIndex.removeFromIndex(oldPath)
await NotesIndex.addToIndexAndMemCache(file)
if (isFileIndexable(file.path)) {
cacheManager.removeFromLiveCache(oldPath)
cacheManager.addToLiveCache(file.path)
searchEngine.removeFromPaths([oldPath])
await searchEngine.addFromPaths([file.path])
}
})
)
@@ -80,12 +88,17 @@ export default class OmnisearchPlugin extends Plugin {
await populateIndex()
})
showWelcomeNotice(this)
executeFirstLaunchTasks(this)
}
onunload(): void {
async onunload(): Promise<void> {
// @ts-ignore
delete globalThis['omnisearch']
// Clear cache when disabling Omnisearch
if (process.env.NODE_ENV === 'production') {
await database.clearCache()
}
}
addRibbonButton(): void {
@@ -106,115 +119,56 @@ export default class OmnisearchPlugin extends Plugin {
*/
async function populateIndex(): Promise<void> {
console.time('Omnisearch - Indexing total time')
indexingStep.set(IndexingStepType.ReadingFiles)
const files = app.vault.getFiles().filter(f => isFileIndexable(f.path))
console.log(`Omnisearch - ${files.length} files total`)
// Initialize minisearch
let engine = SearchEngine.getEngine()
// Map documents in the background
// Promise.all(files.map(f => cacheManager.addToLiveCache(f.path)))
// if not iOS, load data from cache
if (!Platform.isIosApp) {
engine = await SearchEngine.initFromCache()
console.time('Omnisearch - Loading index from cache')
indexingStep.set(IndexingStepType.LoadingCache)
await searchEngine.loadCache()
console.timeEnd('Omnisearch - Loading index from cache')
}
// Load plaintext files
SearchEngine.indexingStep.set(IndexingStep.ReadingNotes)
console.log('Omnisearch - Reading notes')
const plainTextFiles = await FileLoader.getPlainTextFiles()
let allFiles = [...plainTextFiles]
// iOS: since there's no cache, directly index the documents
if (Platform.isIosApp) {
await wait(1000)
await engine.addAllToMinisearch(plainTextFiles)
}
const diff = searchEngine.getDiff(
files.map(f => ({ path: f.path, mtime: f.stat.mtime }))
)
// Load PDFs
if (settings.PDFIndexing) {
SearchEngine.indexingStep.set(IndexingStep.ReadingPDFs)
console.log('Omnisearch - Reading PDFs')
const pdfDocuments = await FileLoader.getPDFAsDocuments()
// iOS: since there's no cache, just index the documents
if (Platform.isIosApp) {
await wait(1000)
await engine.addAllToMinisearch(pdfDocuments)
}
// Add PDFs to the files list
allFiles = [...allFiles, ...pdfDocuments]
}
// Load Images
if (settings.imagesIndexing) {
SearchEngine.indexingStep.set(IndexingStep.ReadingImages)
console.log('Omnisearch - Reading Images')
const imagesDocuments = await FileLoader.getImagesAsDocuments()
// iOS: since there's no cache, just index the documents
if (Platform.isIosApp) {
await wait(1000)
await engine.addAllToMinisearch(imagesDocuments)
}
// Add Images to the files list
allFiles = [...allFiles, ...imagesDocuments]
}
console.log('Omnisearch - Total number of files: ' + allFiles.length)
let needToUpdateCache = false
// Other platforms: make a diff of what's to add/update/delete
if (!Platform.isIosApp) {
SearchEngine.indexingStep.set(IndexingStep.UpdatingCache)
console.log('Omnisearch - Checking index cache diff...')
// Check which documents need to be removed/added/updated
const diffDocs = await cacheManager.getDiffDocuments(allFiles)
if (diff.toAdd.length) {
console.log(
`Omnisearch - Files to add/remove/update: ${diffDocs.toAdd.length}/${diffDocs.toDelete.length}/${diffDocs.toUpdate.length}`
'Omnisearch - Total number of files to add/update: ' + diff.toAdd.length
)
if (
diffDocs.toAdd.length +
diffDocs.toDelete.length +
diffDocs.toUpdate.length >
100
) {
new Notice(
`Omnisearch - A great number of files need to be added/updated/cleaned. This process may make cause slowdowns.`
)
}
needToUpdateCache = !!(
diffDocs.toAdd.length ||
diffDocs.toDelete.length ||
diffDocs.toUpdate.length
}
if (diff.toRemove.length) {
console.log(
'Omnisearch - Total number of files to remove: ' + diff.toRemove.length
)
// Add
await engine.addAllToMinisearch(diffDocs.toAdd)
diffDocs.toAdd.forEach(doc =>
cacheManager.updateLiveDocument(doc.path, doc)
)
// Delete
for (const [i, doc] of diffDocs.toDelete.entries()) {
await wait(0)
engine.removeFromMinisearch(doc)
cacheManager.deleteLiveDocument(doc.path)
}
// Update (delete + add)
diffDocs.toUpdate.forEach(({ oldDoc, newDoc }) => {
engine.removeFromMinisearch(oldDoc)
cacheManager.updateLiveDocument(oldDoc.path, newDoc)
})
await engine.addAllToMinisearch(diffDocs.toUpdate.map(d => d.newDoc))
}
// Load PDFs into the main search engine, and write cache
// SearchEngine.loadTmpDataIntoMain()
SearchEngine.indexingStep.set(IndexingStep.Done)
if (diff.toAdd.length >= 500) {
new Notice(
`Omnisearch - ${diff.toAdd.length} files need to be indexed. Obsidian may experience stutters and freezes during the process`,
10_000
)
}
if (!Platform.isIosApp && needToUpdateCache) {
console.log('Omnisearch - Writing cache...')
await SearchEngine.getEngine().writeToCache(allFiles)
indexingStep.set(IndexingStepType.IndexingFiles)
searchEngine.removeFromPaths(diff.toRemove.map(o => o.path))
await searchEngine.addFromPaths(diff.toAdd.map(o => o.path))
if (diff.toRemove.length || diff.toAdd.length) {
indexingStep.set(IndexingStepType.WritingCache)
await searchEngine.writeToCache()
}
console.timeEnd('Omnisearch - Indexing total time')
if (diff.toAdd.length >= 500) {
new Notice(`Omnisearch - Your files have been indexed.`)
}
indexingStep.set(IndexingStepType.Done)
}
async function cleanOldCacheFiles() {
@@ -235,7 +189,7 @@ async function cleanOldCacheFiles() {
}
}
function showWelcomeNotice(plugin: Plugin) {
function executeFirstLaunchTasks(plugin: Plugin) {
const code = '1.8.0-beta.3'
if (settings.welcomeMessage !== code) {
const welcome = new DocumentFragment()

View File

@@ -1,43 +1,7 @@
import { Notice, TAbstractFile, TFile } from 'obsidian'
import { isFileIndexable, wait } from './tools/utils'
import type { TAbstractFile } from 'obsidian'
import { removeAnchors } from './tools/notes'
import { SearchEngine } from './search/search-engine'
import { cacheManager } from './cache-manager'
import type { IndexedDocument } from './globals'
import { fileToIndexedDocument } from './file-loader'
/**
* Adds a file to the search index
* @param file
* @returns
*/
export async function addToIndexAndMemCache(
file: TAbstractFile
): Promise<void> {
if (!(file instanceof TFile) || !isFileIndexable(file.path)) {
return
}
// Check if the file was already indexed as non-existent.
// If so, remove it from the index, and add it again as a real note.
if (cacheManager.getLiveDocument(file.path)?.doesNotExist) {
removeFromIndex(file.path)
}
try {
if (cacheManager.getLiveDocument(file.path)) {
throw new Error(`${file.basename} is already indexed`)
}
// Make the document and index it
const note = await fileToIndexedDocument(file)
SearchEngine.getEngine().addSingleToMinisearch(note)
await cacheManager.updateLiveDocument(note.path, note)
} catch (e) {
// console.trace('Error while indexing ' + file.basename)
console.error(e)
}
}
import { searchEngine } from './search/omnisearch'
/**
* Index a non-existing note.
@@ -48,7 +12,6 @@ export async function addToIndexAndMemCache(
export function addNonExistingToIndex(name: string, parent: string): void {
name = removeAnchors(name)
const filename = name + (name.endsWith('.md') ? '' : '.md')
if (cacheManager.getLiveDocument(filename)) return
const note: IndexedDocument = {
path: filename,
@@ -65,33 +28,7 @@ export function addNonExistingToIndex(name: string, parent: string): void {
doesNotExist: true,
parent,
}
SearchEngine.getEngine().addSingleToMinisearch(note)
cacheManager.updateLiveDocument(filename, note)
}
/**
* Removes a file from the index, by its path.
*/
export function removeFromIndex(path: string): void {
if (!isFileIndexable(path)) {
console.info(`"${path}" is not an indexable file`)
return
}
const note = cacheManager.getLiveDocument(path)
if (note) {
SearchEngine.getEngine().removeFromMinisearch(note)
cacheManager.deleteLiveDocument(path)
// FIXME: only remove non-existing notes if they don't have another parent
// cacheManager
// .getNonExistingNotesFromMemCache()
// .filter(n => n.parent === path)
// .forEach(n => {
// removeFromIndex(n.path)
// })
} else {
console.warn(`Omnisearch - Note not found under path ${path}`)
}
// searchEngine.addDocuments([note])
}
const notesToReindex = new Set<TAbstractFile>()
@@ -105,13 +42,11 @@ export function markNoteForReindex(note: TAbstractFile): void {
}
export async function refreshIndex(): Promise<void> {
if (notesToReindex.size > 0) {
console.info(`Omnisearch - Reindexing ${notesToReindex.size} notes`)
for (const note of notesToReindex) {
removeFromIndex(note.path)
await addToIndexAndMemCache(note)
await wait(0)
}
const paths = [...notesToReindex].map(n => n.path)
if (paths.length) {
searchEngine.removeFromPaths(paths)
searchEngine.addFromPaths(paths)
notesToReindex.clear()
// console.log(`Omnisearch - Reindexed ${paths.length} file(s)`)
}
}

364
src/search/omnisearch.ts Normal file
View File

@@ -0,0 +1,364 @@
import MiniSearch, { type Options, type SearchResult } from 'minisearch'
import type {
DocumentRef,
IndexedDocument,
ResultNote,
SearchMatch,
} from '../globals'
import {
chsRegex,
chsSegmenter,
indexingStep,
IndexingStepType,
SPACE_OR_PUNCTUATION,
} from '../globals'
import { settings } from '../settings'
import {
chunkArray,
removeDiacritics,
stringsToRegex,
stripMarkdownCharacters,
} from '../tools/utils'
import { Notice, Platform } from 'obsidian'
import type { Query } from './query'
import { cacheManager } from '../cache-manager'
const tokenize = (text: string): string[] => {
const tokens = text.split(SPACE_OR_PUNCTUATION)
if (chsSegmenter) {
return tokens.flatMap(word =>
chsRegex.test(word) ? chsSegmenter.cut(word) : [word]
)
} else return tokens
}
export class Omnisearch {
public static readonly options: Options<IndexedDocument> = {
tokenize,
processTerm: (term: string) =>
(settings.ignoreDiacritics ? removeDiacritics(term) : term).toLowerCase(),
idField: 'path',
fields: [
'basename',
'aliases',
'content',
'headings1',
'headings2',
'headings3',
],
storeFields: ['tags'],
logger(_level, _message, code) {
if (code === 'version_conflict') {
new Notice(
'Omnisearch - Your index cache may be incorrect or corrupted. If this message keeps appearing, go to Settings to clear the cache.',
5000
)
}
},
}
private minisearch: MiniSearch
private indexedDocuments: Map<string, number> = new Map()
private previousResults: SearchResult[] = []
constructor() {
this.minisearch = new MiniSearch(Omnisearch.options)
}
async loadCache(): Promise<void> {
const cache = await cacheManager.getMinisearchCache()
if (cache) {
this.minisearch = MiniSearch.loadJS(cache.data, Omnisearch.options)
this.indexedDocuments = new Map(cache.paths.map(o => [o.path, o.mtime]))
}
}
/**
* Returns the list of documents that need to be reindexed
* @param docs
*/
getDiff(docs: DocumentRef[]): {
toAdd: DocumentRef[]
toRemove: DocumentRef[]
} {
const indexedArr = [...this.indexedDocuments]
const docsMap = new Map(docs.map(d => [d.path, d.mtime]))
const toAdd = docs.filter(
d =>
!this.indexedDocuments.has(d.path) ||
this.indexedDocuments.get(d.path) !== d.mtime
)
const toRemove = [...this.indexedDocuments]
.filter(
([path, mtime]) => !docsMap.has(path) || docsMap.get(path) !== mtime
)
.map(o => ({ path: o[0], mtime: o[1] }))
return { toAdd, toRemove }
}
/**
* Add notes/PDFs/images to the search index
* @param paths
*/
public async addFromPaths(paths: string[]): Promise<void> {
let documents = await Promise.all(
paths.map(async path => await cacheManager.getDocument(path))
)
// If a document is already added, discard it
this.removeFromPaths(
documents.filter(d => this.indexedDocuments.has(d.path)).map(d => d.path)
)
// Split the documents in smaller chunks to regularly save the cache.
// If the user shuts off Obsidian mid-indexing, we at least saved some
const chunkedDocs = chunkArray(documents, 500)
for (const docs of chunkedDocs) {
// Update the list of indexed docs
docs.forEach(doc => this.indexedDocuments.set(doc.path, doc.mtime))
// Discard files that may have been already added (though it shouldn't happen)
const alreadyAdded = docs.filter(doc => this.minisearch.has(doc.path))
this.removeFromPaths(alreadyAdded.map(o => o.path))
// Add docs to minisearch
await this.minisearch.addAllAsync(docs)
}
}
/**
* Discard a document from minisearch
* @param paths
*/
public removeFromPaths(paths: string[]): void {
paths.forEach(p => this.indexedDocuments.delete(p))
// Make sure to not discard a file that we don't have
const existing = paths.filter(p => this.minisearch.has(p))
this.minisearch.discardAll(existing)
}
/**
* Searches the index for the given query,
* and returns an array of raw results
*/
public async search(
query: Query,
options: { prefixLength: number; singleFilePath?: string }
): Promise<SearchResult[]> {
if (query.isEmpty()) {
this.previousResults = []
return []
}
let results = this.minisearch.search(query.segmentsToStr(), {
prefix: term => term.length >= options.prefixLength,
fuzzy: 0.2,
combineWith: 'AND',
boost: {
basename: settings.weightBasename,
aliases: settings.weightBasename,
headings1: settings.weightH1,
headings2: settings.weightH2,
headings3: settings.weightH3,
},
})
if (!results.length) return this.previousResults
if (options.singleFilePath) {
return results.filter(r => r.id === options.singleFilePath)
}
// Hide or downrank files that are in Obsidian's excluded list
if (settings.hideExcluded) {
// Filter the files out
results = results.filter(
result =>
!(
app.metadataCache.isUserIgnored &&
app.metadataCache.isUserIgnored(result.id)
)
)
} else {
// Just downrank them
results.forEach(result => {
if (
app.metadataCache.isUserIgnored &&
app.metadataCache.isUserIgnored(result.id)
) {
result.score /= 10
}
})
}
// Extract tags from the query
const tags = query.segments
.filter(s => s.value.startsWith('#'))
.map(s => s.value)
// Put the results with tags on top
for (const tag of tags) {
for (const result of results) {
if ((result.tags ?? []).includes(tag)) {
result.score *= 100
}
}
}
results = results.slice(0, 50)
const documents = await Promise.all(
results.map(async result => await cacheManager.getDocument(result.id))
)
// If the search query contains quotes, filter out results that don't have the exact match
const exactTerms = query.getExactTerms()
if (exactTerms.length) {
results = results.filter(r => {
const document = documents.find(d => d.path === r.id)
const title = document?.path.toLowerCase() ?? ''
const content = stripMarkdownCharacters(
document?.content ?? ''
).toLowerCase()
return exactTerms.every(q => content.includes(q) || title.includes(q))
})
}
// If the search query contains exclude terms, filter out results that have them
const exclusions = query.exclusions
if (exclusions.length) {
results = results.filter(r => {
const content = stripMarkdownCharacters(
documents.find(d => d.path === r.id)?.content ?? ''
).toLowerCase()
return exclusions.every(q => !content.includes(q.value))
})
}
// FIXME:
// Dedupe results - clutch for https://github.com/scambier/obsidian-omnisearch/issues/129
results = results.filter(
(result, index, arr) => arr.findIndex(t => t.id === result.id) === index
)
this.previousResults = results
return results
}
public getMatches(text: string, reg: RegExp, query: Query): SearchMatch[] {
let match: RegExpExecArray | null = null
const matches: SearchMatch[] = []
let count = 0
while ((match = reg.exec(text)) !== null) {
if (++count >= 100) break // Avoid infinite loops, stop looking after 100 matches
const m = match[0]
if (m) matches.push({ match: m, offset: match.index })
}
// If the query can be found "as is" in the text, put this match first
const best = text.toLowerCase().indexOf(query.segmentsToStr())
if (best > -1) {
matches.unshift({
offset: best,
match: query.segmentsToStr(),
})
}
return matches
}
/**
* Searches the index, and returns an array of ResultNote objects.
* If we have the singleFile option set,
* the array contains a single result from that file
* @param query
* @param options
* @returns
*/
public async getSuggestions(
query: Query,
options?: Partial<{ singleFilePath?: string }>
): Promise<ResultNote[]> {
// Get the raw results
let results: SearchResult[]
if (settings.simpleSearch) {
results = await this.search(query, {
prefixLength: 1,
singleFilePath: options?.singleFilePath,
})
} else {
results = await this.search(query, {
prefixLength: 3,
singleFilePath: options?.singleFilePath,
})
}
// Extract tags from the query
const tags = query.segments
.filter(s => s.value.startsWith('#'))
.map(s => s.value)
const documents = await Promise.all(
results.map(async result => await cacheManager.getDocument(result.id))
)
// Map the raw results to get usable suggestions
const resultNotes = results.map(result => {
let note = documents.find(d => d.path === result.id)
if (!note) {
// throw new Error(`Omnisearch - Note "${result.id}" not indexed`)
console.warn(`Omnisearch - Note "${result.id}" not in the live cache`)
note = {
content: '',
basename: result.id,
path: result.id,
} as IndexedDocument
}
// Remove '#' from tags, for highlighting
query.segments.forEach(s => {
s.value = s.value.replace(/^#/, '')
})
// Clean search matches that match quoted expressions,
// and inject those expressions instead
const foundWords = [
// Matching terms from the result,
// do not necessarily match the query
...Object.keys(result.match),
// Quoted expressions
...query.segments.filter(s => s.exact).map(s => s.value),
// Tags, starting with #
...tags,
].filter(w => w.length > 1)
// console.log(foundWords)
const matches = this.getMatches(
note.content,
stringsToRegex(foundWords),
query
)
const resultNote: ResultNote = {
score: result.score,
foundWords,
matches,
...note,
}
return resultNote
})
return resultNotes
}
public async writeToCache(): Promise<void> {
if (Platform.isIosApp) {
return
}
await cacheManager.writeMinisearchCache(
this.minisearch,
this.indexedDocuments
)
}
}
export const searchEngine = new Omnisearch()

View File

@@ -1,305 +0,0 @@
import MiniSearch, { type Options, type SearchResult } from 'minisearch'
import {
chsRegex,
type IndexedDocument,
type ResultNote,
type SearchMatch,
SPACE_OR_PUNCTUATION,
IndexingStep,
} from '../globals'
import {
removeDiacritics,
stringsToRegex,
stripMarkdownCharacters,
} from '../tools/utils'
import type { Query } from './query'
import { settings } from '../settings'
import { cacheManager } from '../cache-manager'
import { writable } from 'svelte/store'
import { Notice } from 'obsidian'
let previousResults: ResultNote[] = []
const tokenize = (text: string): string[] => {
const tokens = text.split(SPACE_OR_PUNCTUATION)
const chsSegmenter = (app as any).plugins.plugins['cm-chs-patch']
if (chsSegmenter) {
return tokens.flatMap(word =>
chsRegex.test(word) ? chsSegmenter.cut(word) : [word]
)
} else return tokens
}
export const minisearchOptions: Options<IndexedDocument> = {
tokenize,
processTerm: (term: string) =>
(settings.ignoreDiacritics ? removeDiacritics(term) : term).toLowerCase(),
idField: 'path',
fields: [
'basename',
'aliases',
'content',
'headings1',
'headings2',
'headings3',
],
storeFields: ['tags'],
callbackWhenDesync() {
new Notice(
'Omnisearch - Your index cache may be incorrect or corrupted. If this message keeps appearing, go to Settings to clear the cache.'
)
},
}
export class SearchEngine {
private static engine?: SearchEngine
public static indexingStep = writable(IndexingStep.LoadingCache)
/**
* The main singleton SearchEngine instance.
* Should be used for all queries
*/
public static getEngine(): SearchEngine {
if (!this.engine) {
this.engine = new SearchEngine()
}
return this.engine
}
/**
* Instantiates the main instance with cache data (if it exists)
*/
public static async initFromCache(): Promise<SearchEngine> {
try {
const cache = await cacheManager.getMinisearchCache()
if (cache) {
this.getEngine().minisearch = cache
}
} catch (e) {
new Notice(
'Omnisearch - Cache missing or invalid. Some freezes may occur while Omnisearch indexes your vault.'
)
console.error('Omnisearch - Could not init engine from cache')
console.error(e)
}
return this.getEngine()
}
private minisearch: MiniSearch
private constructor() {
this.minisearch = new MiniSearch(minisearchOptions)
}
/**
* Searches the index for the given query,
* and returns an array of raw results
*/
public async search(
query: Query,
options: { prefixLength: number }
): Promise<SearchResult[]> {
if (!query.segmentsToStr()) return []
let results = this.minisearch.search(query.segmentsToStr(), {
prefix: term => term.length >= options.prefixLength,
fuzzy: 0.2,
combineWith: 'AND',
boost: {
basename: settings.weightBasename,
aliases: settings.weightBasename,
headings1: settings.weightH1,
headings2: settings.weightH2,
headings3: settings.weightH3,
},
})
// Downrank files that are in Obsidian's excluded list
if (settings.respectExcluded) {
results.forEach(result => {
if (
app.metadataCache.isUserIgnored &&
app.metadataCache.isUserIgnored(result.id)
) {
result.score /= 10 // TODO: make this value configurable or toggleable?
}
})
}
// If the search query contains quotes, filter out results that don't have the exact match
const exactTerms = query.getExactTerms()
if (exactTerms.length) {
results = results.filter(r => {
const title =
cacheManager.getLiveDocument(r.id)?.path.toLowerCase() ?? ''
const content = stripMarkdownCharacters(
cacheManager.getLiveDocument(r.id)?.content ?? ''
).toLowerCase()
return exactTerms.every(q => content.includes(q) || title.includes(q))
})
}
// If the search query contains exclude terms, filter out results that have them
const exclusions = query.exclusions
if (exclusions.length) {
results = results.filter(r => {
const content = stripMarkdownCharacters(
cacheManager.getLiveDocument(r.id)?.content ?? ''
).toLowerCase()
return exclusions.every(q => !content.includes(q.value))
})
}
// FIXME:
// Dedupe results - clutch for https://github.com/scambier/obsidian-omnisearch/issues/129
return results.filter(
(result, index, arr) => arr.findIndex(t => t.id === result.id) === index
)
}
/**
* Parses a text against a regex, and returns the { string, offset } matches
*/
public getMatches(text: string, reg: RegExp, query: Query): SearchMatch[] {
let match: RegExpExecArray | null = null
const matches: SearchMatch[] = []
let count = 0
while ((match = reg.exec(text)) !== null) {
if (++count >= 100) break // Avoid infinite loops, stop looking after 100 matches
const m = match[0]
if (m) matches.push({ match: m, offset: match.index })
}
// If the query can be found "as is" in the text, put this match first
const best = text.toLowerCase().indexOf(query.segmentsToStr())
if (best > -1) {
matches.unshift({
offset: best,
match: query.segmentsToStr(),
})
}
return matches
}
/**
* Searches the index, and returns an array of ResultNote objects.
* If we have the singleFile option set,
* the array contains a single result from that file
* @param query
* @param options
* @returns
*/
public async getSuggestions(
query: Query,
options?: Partial<{ singleFilePath: string | null }>
): Promise<ResultNote[]> {
if (query.isEmpty()) {
previousResults = []
return []
}
// Get the raw results
let results: SearchResult[]
if (settings.simpleSearch) {
results = await this.search(query, { prefixLength: 1 })
} else {
results = await this.search(query, { prefixLength: 3 })
}
if (!results.length) return previousResults
// Extract tags from the query
const tags = query.segments
.filter(s => s.value.startsWith('#'))
.map(s => s.value)
// Either keep the 50 first results,
// or the one corresponding to `singleFile`
if (options?.singleFilePath) {
const result = results.find(r => r.id === options.singleFilePath)
if (result) results = [result]
else results = []
} else {
results = results.slice(0, 50)
// Put the results with tags on top
for (const tag of tags) {
for (const result of results) {
if ((result.tags ?? []).includes(tag)) {
result.score *= 100
}
}
}
}
// Map the raw results to get usable suggestions
const resultNotes = results.map(result => {
let note = cacheManager.getLiveDocument(result.id)
if (!note) {
// throw new Error(`Omnisearch - Note "${result.id}" not indexed`)
console.warn(`Omnisearch - Note "${result.id}" not in the live cache`)
note = {
content: '',
basename: result.id,
path: result.id,
} as IndexedDocument
}
// Remove '#' from tags, for highlighting
query.segments.forEach(s => {
s.value = s.value.replace(/^#/, '')
})
// Clean search matches that match quoted expressions,
// and inject those expressions instead
const foundWords = [
// Matching terms from the result,
// do not necessarily match the query
...Object.keys(result.match),
// Quoted expressions
...query.segments.filter(s => s.exact).map(s => s.value),
// Tags, starting with #
...tags,
].filter(w => w.length > 1)
// console.log(foundWords)
const matches = this.getMatches(
note.content,
stringsToRegex(foundWords),
query
)
const resultNote: ResultNote = {
score: result.score,
foundWords,
matches,
...note,
}
return resultNote
})
previousResults = resultNotes
return resultNotes
}
// #region Read/write minisearch index
public async addAllToMinisearch(
documents: IndexedDocument[],
chunkSize = 10
): Promise<void> {
await this.minisearch.addAllAsync(documents, { chunkSize })
}
public addSingleToMinisearch(document: IndexedDocument): void {
this.minisearch.add(document)
}
public removeFromMinisearch(document: IndexedDocument): void {
this.minisearch.remove(document)
}
// #endregion
public async writeToCache(documents: IndexedDocument[]): Promise<void> {
await cacheManager.writeMinisearchCache(this.minisearch, documents)
}
}

View File

@@ -19,7 +19,7 @@ interface WeightingSettings {
export interface OmnisearchSettings extends WeightingSettings {
/** Respect the "excluded files" Obsidian setting by downranking results ignored files */
respectExcluded: boolean
hideExcluded: boolean
/** Ignore diacritics when indexing files */
ignoreDiacritics: boolean
/** Extensions of plain text files to index, in addition to .md */
@@ -44,6 +44,7 @@ export interface OmnisearchSettings extends WeightingSettings {
welcomeMessage: string
/** If a query returns 0 result, try again with more relax conditions */
simpleSearch: boolean
hightlight: boolean
}
/**
@@ -149,7 +150,7 @@ export class SettingsTab extends PluginSettingTab {
//#endregion Indexing
// #region Behavior
//#region Behavior
new Setting(containerEl).setName('Behavior').setHeading()
@@ -157,11 +158,12 @@ export class SettingsTab extends PluginSettingTab {
new Setting(containerEl)
.setName('Respect Obsidian\'s "Excluded Files"')
.setDesc(
'Files that are in Obsidian\'s "Options > Files & Links > Excluded Files" list will be downranked in results.'
`By default, fFiles that are in Obsidian\'s "Options > Files & Links > Excluded Files" list are downranked in results.
Enable this option to completely hide them`
)
.addToggle(toggle =>
toggle.setValue(settings.respectExcluded).onChange(async v => {
settings.respectExcluded = v
toggle.setValue(settings.hideExcluded).onChange(async v => {
settings.hideExcluded = v
await saveSettings(this.plugin)
})
)
@@ -188,8 +190,7 @@ export class SettingsTab extends PluginSettingTab {
new Setting(containerEl)
.setName('Simpler search')
.setDesc(
`When enabled, Omnisearch is a bit more restrictive when using your query terms as prefixes.
May return less results, but will be quicker. You should enable this if Omnisearch makes Obsidian freeze while searching.`
`Enable this if Obsidian often freezes while making searches. This will return more strict results.`
)
.addToggle(toggle =>
toggle.setValue(settings.simpleSearch).onChange(async v => {
@@ -198,9 +199,9 @@ export class SettingsTab extends PluginSettingTab {
})
)
// #endregion Behavior
//#endregion Behavior
// #region User Interface
//#region User Interface
new Setting(containerEl).setName('User Interface').setHeading()
@@ -235,7 +236,9 @@ export class SettingsTab extends PluginSettingTab {
// Keep line returns in excerpts
new Setting(containerEl)
.setName('Render line return in excerpts')
.setDesc('Activate this option render line returns in result excerpts.')
.setDesc(
'Activate this option to render line returns in result excerpts.'
)
.addToggle(toggle =>
toggle
.setValue(settings.renderLineReturnInExcerpts)
@@ -285,9 +288,22 @@ export class SettingsTab extends PluginSettingTab {
})
)
// #endregion User Interface
// Highlight results
new Setting(containerEl)
.setName('Highlight matching words in results')
.setDesc(
'Will highlight matching results when enabled. See README for more customization options.'
)
.addToggle(toggle =>
toggle.setValue(settings.hightlight).onChange(async v => {
settings.hightlight = v
await saveSettings(this.plugin)
})
)
// #region Results Weighting
//#endregion User Interface
//#region Results Weighting
new Setting(containerEl).setName('Results weighting').setHeading()
@@ -309,29 +325,29 @@ export class SettingsTab extends PluginSettingTab {
.setName(`Headings level 3 (default: ${DEFAULT_SETTINGS.weightH3})`)
.addSlider(cb => this.weightSlider(cb, 'weightH3'))
// #endregion Results Weighting
//#endregion Results Weighting
// #region Danger Zone
//#region Danger Zone
if (!Platform.isIosApp) {
new Setting(containerEl).setName('Danger Zone').setHeading()
new Setting(containerEl).setName('Danger Zone').setHeading()
const resetCacheDesc = new DocumentFragment()
resetCacheDesc.createSpan({}, span => {
span.innerHTML = `Erase all Omnisearch cache data.
const resetCacheDesc = new DocumentFragment()
resetCacheDesc.createSpan({}, span => {
span.innerHTML = `Erase all Omnisearch cache data.
Use this if Omnisearch results are inconsistent, missing, or appear outdated.<br>
<strong style="color: var(--text-accent)">Needs a restart to fully take effect.</strong>`
})
new Setting(containerEl)
.setName('Clear cache data')
.setDesc(resetCacheDesc)
.addButton(cb => {
cb.setButtonText('Clear cache')
cb.onClick(async () => {
await database.clearCache()
new Notice('Omnisearch - Cache cleared. Please restart Obsidian.')
})
})
new Setting(containerEl)
.setName('Clear cache data')
.setDesc(resetCacheDesc)
.addButton(cb => {
cb.setButtonText('Clear cache')
cb.onClick(async () => {
await database.clearCache()
new Notice('Omnisearch - Cache cleared. Please restart Obsidian.')
})
})
}
//#endregion Danger Zone
}
@@ -347,7 +363,7 @@ export class SettingsTab extends PluginSettingTab {
}
export const DEFAULT_SETTINGS: OmnisearchSettings = {
respectExcluded: true,
hideExcluded: false,
ignoreDiacritics: true,
indexedFileTypes: [] as string[],
PDFIndexing: false,
@@ -358,6 +374,7 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = {
showExcerpt: true,
renderLineReturnInExcerpts: true,
showCreateButton: false,
hightlight: true,
showPreviousQueryResults: true,
simpleSearch: false,
@@ -373,12 +390,6 @@ export let settings = Object.assign({}, DEFAULT_SETTINGS) as OmnisearchSettings
export async function loadSettings(plugin: Plugin): Promise<void> {
settings = Object.assign({}, DEFAULT_SETTINGS, await plugin.loadData())
if (Platform.isMobileApp) {
settings.PDFIndexing = false
settings.imagesIndexing = false
}
showExcerpt.set(settings.showExcerpt)
}

View File

@@ -1,6 +1,6 @@
import type { ResultNote } from '../globals'
import { Query } from '../search/query'
import { SearchEngine } from '../search/search-engine'
import { searchEngine } from '../search/omnisearch'
type ResultNoteApi = {
score: number
@@ -35,8 +35,8 @@ function mapResults(results: ResultNote[]): ResultNoteApi[] {
async function search(q: string): Promise<ResultNoteApi[]> {
const query = new Query(q)
const raw = await SearchEngine.getEngine().getSuggestions(query)
const raw = await searchEngine.getSuggestions(query)
return mapResults(raw)
}
export default { search }
export default {search}

View File

@@ -1,12 +1,13 @@
import {
type CachedMetadata,
Notice,
Platform,
getAllTags,
Notice,
parseFrontMatterAliases,
Platform,
} from 'obsidian'
import type { SearchMatch } from '../globals'
import {
chsSegmenter,
excerptAfter,
excerptBefore,
highlightClass,
@@ -70,7 +71,10 @@ export function getAllIndices(text: string, regex: RegExp): SearchMatch[] {
*/
export function stringsToRegex(strings: string[]): RegExp {
if (!strings.length) return /^$/g
const joined = strings.map(s => '\\b' + escapeRegex(s)).join('|')
// \\b is "word boundary", and is not applied if the user uses the cm-chs-patch plugin
const joined = strings
.map(s => (chsSegmenter ? '' : '\\b') + escapeRegex(s))
.join('|')
const reg = new RegExp(`(${joined})`, 'gi')
// console.log(reg)
return reg
@@ -170,7 +174,7 @@ export async function filterAsync<T>(
callbackfn: (value: T, index: number, array: T[]) => Promise<boolean>
): Promise<T[]> {
const filterMap = await mapAsync(array, callbackfn)
return array.filter((value, index) => filterMap[index])
return array.filter((_value, index) => filterMap[index])
}
/**
@@ -207,9 +211,9 @@ export function getCtrlKeyLabel(): 'ctrl' | '⌘' {
export function isFileIndexable(path: string): boolean {
return (
(settings.PDFIndexing && path.endsWith('.pdf')) ||
isFilePlaintext(path) ||
(settings.imagesIndexing && isFileImage(path))
(!Platform.isMobileApp && settings.PDFIndexing && isFilePDF(path)) ||
(!Platform.isMobileApp && settings.imagesIndexing && isFileImage(path))
)
}
@@ -219,6 +223,10 @@ export function isFileImage(path: string): boolean {
)
}
export function isFilePDF(path: string): boolean {
return path.endsWith('.pdf')
}
export function isFilePlaintext(path: string): boolean {
return getPlaintextExtensions().some(t => path.endsWith(`.${t}`))
}
@@ -240,3 +248,15 @@ export function makeMD5(data: BinaryLike): string {
}
return createHash('md5').update(data).digest('hex')
}
export function chunkArray<T>(arr: T[], len: number): T[][] {
var chunks = [],
i = 0,
n = arr.length
while (i < n) {
chunks.push(arr.slice(i, (i += len)))
}
return chunks
}

View File

@@ -86,7 +86,7 @@ export function parseQuery(
let val = term.slice(sepIndex + 1)
// Strip backslashes respecting escapes
val = (val + '').replace(/\\(.?)/g, function (s, n1) {
val = (val + '').replace(/\\(.?)/g, function (_s, n1) {
switch (n1) {
case '\\':
return '\\'
@@ -115,7 +115,7 @@ export function parseQuery(
}
// Strip backslashes respecting escapes
term = (term + '').replace(/\\(.?)/g, function (s, n1) {
term = (term + '').replace(/\\(.?)/g, function (_s, n1) {
switch (n1) {
case '\\':
return '\\'