Squashed commit of the following:

commit 603b9bbde4c6efc90c81032e4e765c64d3075e75
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Tue Oct 11 21:47:03 2022 +0200

    Basic PDF indexing ok

commit 200331bb5c5111493af1e1f6ef8cd4bbfbdbfd4f
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Tue Oct 11 20:56:44 2022 +0200

    Tweaks and comments

commit 434b9662d40c5fea9d8b28d43828b11916db8c94
Author: Simon Cambier <simon.cambier@ores.be>
Date:   Tue Oct 11 16:22:55 2022 +0200

    Refactoring notes & minisearch cache

commit 7253c676c8ed161782ba8e33f0c4c162880925ad
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Tue Oct 11 09:50:33 2022 +0200

    wip

commit 77736e6ef6f28ccfddb64fb768732927d43bbd77
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Mon Oct 10 20:49:02 2022 +0200

    Small rewrites & deps updates

commit 59845fdb89eb6a3ad3f3f9ad75b39e7a3e604c45
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Mon Oct 10 12:22:11 2022 +0200

    wasm + worker ok

commit 1cf3b506e56147586cd0ebcc003642c5230e04cc
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Sun Oct 2 20:04:49 2022 +0200

    no disk access, of course

commit eb3dd9dd4f616a479a53e10856f6c96c6725e911
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Sun Oct 2 19:08:48 2022 +0200

    Rollup build ok

commit 54f2b7e615456c0e1b1504691689d1ba2c72d9e8
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Sun Oct 2 16:03:31 2022 +0200

    Rollup build + wasm PoC
This commit is contained in:
Simon Cambier
2022-10-11 21:54:11 +02:00
parent cf7f6af257
commit 7ddae6dc08
28 changed files with 18437 additions and 923 deletions

5
.gitignore vendored
View File

@@ -20,3 +20,8 @@ data.json
dist
.pnpm-debug.log
coverage
# Added by cargo
/target

608
Cargo.lock generated Normal file
View File

@@ -0,0 +1,608 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "adler"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "adobe-cmap-parser"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3aaf5066d68c8ec9656cfd3a96bc9de83d4883f183d6c6b8d742e36a4819dda"
dependencies = [
"pom 1.1.0",
]
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "base-x"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4cbbc9d0964165b47557570cce6c952866c2678457aca742aafc9fb771d30270"
[[package]]
name = "bumpalo"
version = "3.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1ad822118d20d2c234f427000d5acc36eabe1e29a348c89b63dd60b13f28e5d"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "const_fn"
version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fbdcdcb6d86f71c5e97409ad45898af11cbc995b4ee8112d59095a28d376c935"
[[package]]
name = "crc32fast"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d"
dependencies = [
"cfg-if",
]
[[package]]
name = "discard"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "212d0f5754cb6769937f4501cc0e67f4f4483c8d2c3e1e922ee9edbe4ab4c7c0"
[[package]]
name = "dtoa"
version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56899898ce76aaf4a0f24d914c97ea6ed976d42fec6ad33fcbb0a1103e07b2b0"
[[package]]
name = "encoding"
version = "0.2.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b0d943856b990d12d3b55b359144ff341533e516d94098b1d3fc1ac666d36ec"
dependencies = [
"encoding-index-japanese",
"encoding-index-korean",
"encoding-index-simpchinese",
"encoding-index-singlebyte",
"encoding-index-tradchinese",
]
[[package]]
name = "encoding-index-japanese"
version = "1.20141219.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04e8b2ff42e9a05335dbf8b5c6f7567e5591d0d916ccef4e0b1710d32a0d0c91"
dependencies = [
"encoding_index_tests",
]
[[package]]
name = "encoding-index-korean"
version = "1.20141219.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4dc33fb8e6bcba213fe2f14275f0963fd16f0a02c878e3095ecfdf5bee529d81"
dependencies = [
"encoding_index_tests",
]
[[package]]
name = "encoding-index-simpchinese"
version = "1.20141219.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d87a7194909b9118fc707194baa434a4e3b0fb6a5a757c73c3adb07aa25031f7"
dependencies = [
"encoding_index_tests",
]
[[package]]
name = "encoding-index-singlebyte"
version = "1.20141219.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3351d5acffb224af9ca265f435b859c7c01537c0849754d3db3fdf2bfe2ae84a"
dependencies = [
"encoding_index_tests",
]
[[package]]
name = "encoding-index-tradchinese"
version = "1.20141219.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd0e20d5688ce3cab59eb3ef3a2083a5c77bf496cb798dc6fcdb75f323890c18"
dependencies = [
"encoding_index_tests",
]
[[package]]
name = "encoding_index_tests"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569"
[[package]]
name = "euclid"
version = "0.20.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2bb7ef65b3777a325d1eeefefab5b6d4959da54747e33bd6258e789640f307ad"
dependencies = [
"num-traits",
]
[[package]]
name = "flate2"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f82b0f4c27ad9f8bfd1f3208d882da2b09c301bc1c828fd3a00d0216d2fbbff6"
dependencies = [
"crc32fast",
"miniz_oxide",
]
[[package]]
name = "itoa"
version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4"
[[package]]
name = "itoa"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754"
[[package]]
name = "js-sys"
version = "0.3.60"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47"
dependencies = [
"wasm-bindgen",
]
[[package]]
name = "libc"
version = "0.2.134"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "329c933548736bc49fd575ee68c89e8be4d260064184389a5b77517cddd99ffb"
[[package]]
name = "linked-hash-map"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8dd5a6d5999d9907cda8ed67bbd137d3af8085216c2ac62de5be860bd41f304a"
[[package]]
name = "log"
version = "0.4.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
dependencies = [
"cfg-if",
]
[[package]]
name = "lopdf"
version = "0.26.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b49a0272112719d0037ab63d4bb67f73ba659e1e90bc38f235f163a457ac16f3"
dependencies = [
"dtoa",
"encoding",
"flate2",
"itoa 0.4.8",
"linked-hash-map",
"log",
"lzw",
"pom 3.2.0",
"time",
]
[[package]]
name = "lzw"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d947cbb889ed21c2a84be6ffbaebf5b4e0f4340638cba0444907e38b56be084"
[[package]]
name = "miniz_oxide"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96590ba8f175222643a85693f33d26e9c8a015f599c216509b1a6894af675d34"
dependencies = [
"adler",
]
[[package]]
name = "num-traits"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
dependencies = [
"autocfg",
]
[[package]]
name = "obsidian-search"
version = "0.1.0"
dependencies = [
"js-sys",
"pdf-extract",
"wasm-bindgen",
]
[[package]]
name = "once_cell"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e82dad04139b71a90c080c8463fe0dc7902db5192d939bd0950f074d014339e1"
[[package]]
name = "pdf-extract"
version = "0.6.5-alpha.0"
source = "git+https://github.com/scambier/pdf-extract#8f01969a0bb49bd71195dd4fd5c87a4a0b5f4b48"
dependencies = [
"adobe-cmap-parser",
"encoding",
"euclid",
"linked-hash-map",
"lopdf",
"postscript",
"type1-encoding-parser",
"unicode-normalization",
]
[[package]]
name = "pom"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60f6ce597ecdcc9a098e7fddacb1065093a3d66446fa16c675e7e71d1b5c28e6"
[[package]]
name = "pom"
version = "3.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07e2192780e9f8e282049ff9bffcaa28171e1cb0844f49ed5374e518ae6024ec"
[[package]]
name = "postscript"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac1825c05c4f9e2f781202d1a02fff5e5f722bbafca542d818364e1b1ea22575"
[[package]]
name = "proc-macro-hack"
version = "0.5.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
[[package]]
name = "proc-macro2"
version = "1.0.46"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94e2ef8dbfc347b10c094890f778ee2e36ca9bb4262e86dc99cd217e35f3470b"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rustc_version"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
dependencies = [
"semver",
]
[[package]]
name = "ryu"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09"
[[package]]
name = "semver"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
dependencies = [
"semver-parser",
]
[[package]]
name = "semver-parser"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
[[package]]
name = "serde"
version = "1.0.145"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "728eb6351430bccb993660dfffc5a72f91ccc1295abaa8ce19b27ebe4f75568b"
[[package]]
name = "serde_derive"
version = "1.0.145"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81fa1584d3d1bcacd84c277a0dfe21f5b0f6accf4a23d04d4c6d61f1af522b4c"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.85"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e55a28e3aaef9d5ce0506d0a14dbba8054ddc7e499ef522dd8b26859ec9d4a44"
dependencies = [
"itoa 1.0.3",
"ryu",
"serde",
]
[[package]]
name = "sha1"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1da05c97445caa12d05e848c4a4fcbbea29e748ac28f7e80e9b010392063770"
dependencies = [
"sha1_smol",
]
[[package]]
name = "sha1_smol"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae1a47186c03a32177042e55dbc5fd5aee900b8e0069a8d70fba96a9375cd012"
[[package]]
name = "standback"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e113fb6f3de07a243d434a56ec6f186dfd51cb08448239fe7bcae73f87ff28ff"
dependencies = [
"version_check",
]
[[package]]
name = "stdweb"
version = "0.4.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d022496b16281348b52d0e30ae99e01a73d737b2f45d38fed4edf79f9325a1d5"
dependencies = [
"discard",
"rustc_version",
"stdweb-derive",
"stdweb-internal-macros",
"stdweb-internal-runtime",
"wasm-bindgen",
]
[[package]]
name = "stdweb-derive"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c87a60a40fccc84bef0652345bbbbbe20a605bf5d0ce81719fc476f5c03b50ef"
dependencies = [
"proc-macro2",
"quote",
"serde",
"serde_derive",
"syn",
]
[[package]]
name = "stdweb-internal-macros"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58fa5ff6ad0d98d1ffa8cb115892b6e69d67799f6763e162a1c9db421dc22e11"
dependencies = [
"base-x",
"proc-macro2",
"quote",
"serde",
"serde_derive",
"serde_json",
"sha1",
"syn",
]
[[package]]
name = "stdweb-internal-runtime"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "213701ba3370744dcd1a12960caa4843b3d68b4d1c0a5d575e0d65b2ee9d16c0"
[[package]]
name = "syn"
version = "1.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e90cde112c4b9690b8cbe810cba9ddd8bc1d7472e2cae317b69e9438c1cba7d2"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "time"
version = "0.2.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4752a97f8eebd6854ff91f1c1824cd6160626ac4bd44287f7f4ea2035a02a242"
dependencies = [
"const_fn",
"libc",
"standback",
"stdweb",
"time-macros",
"version_check",
"winapi",
]
[[package]]
name = "time-macros"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "957e9c6e26f12cb6d0dd7fc776bb67a706312e7299aed74c8dd5b17ebb27e2f1"
dependencies = [
"proc-macro-hack",
"time-macros-impl",
]
[[package]]
name = "time-macros-impl"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd3c141a1b43194f3f56a1411225df8646c55781d5f26db825b3d98507eb482f"
dependencies = [
"proc-macro-hack",
"proc-macro2",
"quote",
"standback",
"syn",
]
[[package]]
name = "tinyvec"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50"
dependencies = [
"tinyvec_macros",
]
[[package]]
name = "tinyvec_macros"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
[[package]]
name = "type1-encoding-parser"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3d6cc09e1a99c7e01f2afe4953789311a1c50baebbdac5b477ecf78e2e92a5b"
dependencies = [
"pom 1.1.0",
]
[[package]]
name = "unicode-ident"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd"
[[package]]
name = "unicode-normalization"
version = "0.1.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921"
dependencies = [
"tinyvec",
]
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "wasm-bindgen"
version = "0.2.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268"
dependencies = [
"cfg-if",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142"
dependencies = [
"bumpalo",
"log",
"once_cell",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c"
dependencies = [
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

18
Cargo.toml Normal file
View File

@@ -0,0 +1,18 @@
[package]
name = "obsidian-search"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
crate-type = ["cdylib"]
[dependencies]
wasm-bindgen = "0.2"
js-sys = "0.3.49"
pdf-extract = { git = "https://github.com/scambier/pdf-extract" }
[profile.release]
lto = true
opt-level = 'z'

View File

@@ -1,71 +0,0 @@
import esbuild from 'esbuild'
import sveltePlugin from 'esbuild-svelte'
import sveltePreprocess from 'svelte-preprocess'
import { copy } from 'esbuild-plugin-copy'
import process from 'process'
import builtins from 'builtin-modules'
import path from 'path'
const banner = `/*
THIS IS A GENERATED/BUNDLED FILE BY ESBUILD
if you want to view the source, please visit the github repository of this plugin
*/
`
const prod = process.argv[2] === 'production'
esbuild
.build({
banner: {
js: banner,
},
entryPoints: ['./src/main.ts'],
bundle: true,
external: [
'obsidian',
'electron',
'@codemirror/autocomplete',
'@codemirror/closebrackets',
'@codemirror/collab',
'@codemirror/commands',
'@codemirror/comment',
'@codemirror/fold',
'@codemirror/gutter',
'@codemirror/highlight',
'@codemirror/history',
'@codemirror/language',
'@codemirror/lint',
'@codemirror/matchbrackets',
'@codemirror/panel',
'@codemirror/rangeset',
'@codemirror/rectangular-selection',
'@codemirror/search',
'@codemirror/state',
'@codemirror/stream-parser',
'@codemirror/text',
'@codemirror/tooltip',
'@codemirror/view',
...builtins,
],
outfile: path.join('./dist', 'main.js'),
plugins: [
sveltePlugin({
preprocess: sveltePreprocess(),
}),
copy({
assets: {
from: ['./assets/styles.css', './manifest.json'],
to: ['./'],
},
}),
],
format: 'cjs',
watch: !prod,
target: 'chrome98',
logLevel: 'info',
sourcemap: prod ? false : 'inline',
treeShaking: true,
minify: prod,
legalComments: 'none',
})
.catch(() => process.exit(1))

16523
package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -4,8 +4,8 @@
"description": "A search engine for Obsidian",
"main": "dist/main.js",
"scripts": {
"dev": "pnpm run check && node esbuild.config.mjs",
"build": "pnpm run check && node esbuild.config.mjs production",
"dev": "wasm-pack build --target web && rollup -c -w",
"build": "wasm-pack build --target web && rollup -c",
"check": "tsc -noEmit -skipLibCheck",
"version": "node version-bump.mjs && git add manifest.json versions.json package.json",
"test": "jest"
@@ -14,31 +14,41 @@
"author": "Simon Cambier",
"license": "GPL-3",
"devDependencies": {
"@babel/preset-env": "^7.19.0",
"@babel/preset-env": "^7.19.4",
"@babel/preset-typescript": "^7.18.6",
"@rollup/plugin-commonjs": "^23.0.0",
"@rollup/plugin-node-resolve": "^13.3.0",
"@rollup/plugin-typescript": "^8.5.0",
"@testing-library/jest-dom": "^5.16.5",
"@tsconfig/svelte": "^3.0.0",
"@types/jest": "^27.5.2",
"@types/node": "^16.11.58",
"@types/lodash-es": "^4.17.6",
"@types/node": "^16.11.64",
"@types/pako": "^2.0.0",
"babel-jest": "^27.5.1",
"builtin-modules": "^3.3.0",
"esbuild": "0.13.12",
"esbuild-plugin-copy": "^1.3.0",
"esbuild-svelte": "^0.7.1",
"jest": "^27.5.1",
"obsidian": "latest",
"prettier": "^2.7.1",
"prettier-plugin-svelte": "^2.7.0",
"svelte": "^3.50.1",
"prettier-plugin-svelte": "^2.8.0",
"rollup": "^2.79.1",
"rollup-plugin-base64": "^1.0.1",
"rollup-plugin-copy": "^3.4.0",
"rollup-plugin-svelte": "^7.1.0",
"rollup-plugin-terser": "^7.0.2",
"rollup-plugin-web-worker-loader": "^1.6.1",
"svelte": "^3.51.0",
"svelte-jester": "^2.3.2",
"svelte-preprocess": "^4.10.7",
"tslib": "2.3.1",
"typescript": "^4.8.3"
"typescript": "^4.8.4"
},
"dependencies": {
"@vanakat/plugin-api": "^0.1.0",
"minisearch": "^5.0.0",
"pdfjs-dist": "^2.16.105"
"@vanakat/plugin-api": "0.1.0",
"lodash-es": "4.17.21",
"minisearch": "5.0.0",
"p-queue-compat": "1.0.187",
"pako": "^2.0.4"
},
"pnpm": {
"overrides": {

1281
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

46
rollup.config.js Normal file
View File

@@ -0,0 +1,46 @@
import { nodeResolve } from '@rollup/plugin-node-resolve'
import commonjs from '@rollup/plugin-commonjs'
import { base64 } from 'rollup-plugin-base64'
import typescript from '@rollup/plugin-typescript'
import svelte from 'rollup-plugin-svelte'
import autoPreprocess from 'svelte-preprocess'
import copy from 'rollup-plugin-copy'
import { terser } from 'rollup-plugin-terser'
import webWorkerLoader from 'rollup-plugin-web-worker-loader'
const banner = `/*
THIS IS A GENERATED/BUNDLED FILE BY ROLLUP
if you want to view the source visit the plugins github repository
*/
`
const production = !process.env.ROLLUP_WATCH
export default {
input: './src/main.ts',
output: {
file: './dist/main.js',
sourcemap: !production && 'inline',
format: 'cjs',
exports: 'default',
banner,
},
external: ['obsidian'],
plugins: [
nodeResolve({ browser: true }),
svelte({
preprocess: autoPreprocess(),
}),
typescript(),
commonjs(),
base64({ include: '**/*.wasm' }),
copy({
targets: [
{ src: 'assets/styles.css', dest: 'dist' },
{ src: 'manifest.json', dest: 'dist' },
],
}),
webWorkerLoader({ inline: true, forceInline: true, targetPlatform: "browser" }),
production && terser(),
],
}

View File

@@ -1,6 +1,6 @@
import type { ResultNote, SearchMatch } from './globals'
import { Query } from './query'
import { getSuggestions } from './search'
import * as Search from './search'
type ResultNoteApi = {
score: number
@@ -30,7 +30,7 @@ function mapResults(results: ResultNote[]): ResultNoteApi[] {
async function search(q: string): Promise<ResultNoteApi[]> {
const query = new Query(q)
const raw = await getSuggestions(query)
const raw = await Search.getSuggestions(query)
return mapResults(raw)
}

120
src/cache-manager.ts Normal file
View File

@@ -0,0 +1,120 @@
import { throttle } from 'lodash-es'
import type MiniSearch from 'minisearch'
import type { TFile } from 'obsidian'
import { deflate, inflate } from 'pako'
import {
notesCacheFilePath,
minisearchCacheFilePath,
type IndexedNote,
} from './globals'
import { settings } from './settings'
class CacheManager {
notesCache: Record<string, IndexedNote> = {}
compress = true
writeInterval = 5_000 // In milliseconds
//#region Minisearch
/**
* Serializes and writes the Minisearch index on the disk
*/
public writeMinisearchIndex = throttle(
this._writeMinisearchIndex,
this.writeInterval,
{
leading: true,
trailing: true,
}
)
private async _writeMinisearchIndex(minisearch: MiniSearch): Promise<void> {
if (!settings.persistCache) {
return
}
const json = JSON.stringify(minisearch)
const data = this.compress ? deflate(json) : json
await app.vault.adapter.writeBinary(minisearchCacheFilePath, data as any)
console.log('Omnisearch - Minisearch index saved on disk')
}
public async readMinisearchIndex(): Promise<string | null> {
if (!settings.persistCache) {
return null
}
if (await app.vault.adapter.exists(minisearchCacheFilePath)) {
try {
const data = await app.vault.adapter.readBinary(minisearchCacheFilePath)
return (
this.compress ? new TextDecoder('utf8').decode(inflate(data)) : data
) as any
} catch (e) {
console.trace(
'Omnisearch - Could not load MiniSearch index from the file:'
)
console.warn(e)
app.vault.adapter.remove(minisearchCacheFilePath)
}
}
return null
}
//#endregion Minisearch
public async loadNotesCache() {
if (!settings.persistCache) {
return null
}
if (await app.vault.adapter.exists(notesCacheFilePath)) {
try {
const data = await app.vault.adapter.readBinary(notesCacheFilePath)
const json = (
this.compress ? new TextDecoder('utf8').decode(inflate(data)) : data
) as any
this.notesCache = JSON.parse(json)
} catch (e) {
console.trace('Omnisearch - Could not load notes cache:')
console.warn(e)
app.vault.adapter.remove(notesCacheFilePath)
}
}
return null
}
public saveNotesCache = throttle(this._saveNotesCache, this.writeInterval, {
leading: true,
trailing: true,
})
private async _saveNotesCache() {
if (!settings.persistCache) {
return
}
const json = JSON.stringify(this.notesCache)
const data = this.compress ? deflate(json) : json
await app.vault.adapter.writeBinary(notesCacheFilePath, data as any)
console.log('Omnisearch - Notes cache saved on disk')
}
public addNoteToCache(path: string, note: IndexedNote) {
this.notesCache[path] = note
this.saveNotesCache()
}
public removeNoteFromCache(key: string): void {
delete this.notesCache[key]
}
public getNoteFromCache(key: string): IndexedNote | undefined {
return this.notesCache[key]
}
public getNonExistingNotesFromCache(): IndexedNote[] {
return Object.values(this.notesCache).filter(note => note.doesNotExist)
}
public isCacheOutdated(file: TFile): boolean {
const indexedNote = this.getNoteFromCache(file.path)
return !indexedNote || indexedNote.mtime !== file.stat.mtime
}
}
export const cacheManager = new CacheManager()

View File

@@ -13,13 +13,13 @@
import { loopIndex } from 'src/utils'
import { onDestroy, onMount, tick } from 'svelte'
import { MarkdownView } from 'obsidian'
import { getSuggestions } from 'src/search'
import * as Search from 'src/search'
import ModalContainer from './ModalContainer.svelte'
import { OmnisearchInFileModal, OmnisearchVaultModal } from 'src/modals'
import ResultItemInFile from './ResultItemInFile.svelte'
import { Query } from 'src/query'
import { openNote } from 'src/notes'
import {saveSearchHistory} from "../search-history";
import { saveSearchHistory } from '../search-history'
export let modal: OmnisearchInFileModal
export let parent: OmnisearchVaultModal | null = null
@@ -50,7 +50,7 @@
$: (async () => {
if (searchQuery) {
query = new Query(searchQuery)
note = (await getSuggestions(query, { singleFilePath }))[0] ?? null
note = (await Search.getSuggestions(query, { singleFilePath }))[0] ?? null
lastSearch = searchQuery
}
selectedIndex = 0
@@ -143,20 +143,20 @@
</script>
<InputSearch
value={searchQuery}
on:input={e => (searchQuery = e.detail)}
value="{searchQuery}"
on:input="{e => (searchQuery = e.detail)}"
placeholder="Omnisearch - File" />
<ModalContainer>
{#if groupedOffsets.length && note}
{#each groupedOffsets as offset, i}
<ResultItemInFile
{offset}
{note}
index={i}
selected={i === selectedIndex}
on:mousemove={_e => (selectedIndex = i)}
on:click={openSelection} />
offset="{offset}"
note="{note}"
index="{i}"
selected="{i === selectedIndex}"
on:mousemove="{_e => (selectedIndex = i)}"
on:click="{openSelection}" />
{/each}
{:else}
<div style="text-align: center;">

View File

@@ -5,14 +5,14 @@
import ModalContainer from './ModalContainer.svelte'
import { eventBus, type ResultNote } from 'src/globals'
import { createNote, openNote } from 'src/notes'
import { getSuggestions } from 'src/search'
import * as Search from 'src/search'
import { getCtrlKeyLabel, getExtension, loopIndex } from 'src/utils'
import { OmnisearchInFileModal, type OmnisearchVaultModal } from 'src/modals'
import ResultItemVault from './ResultItemVault.svelte'
import { Query } from 'src/query'
import { saveSearchHistory, searchHistory } from 'src/search-history'
import { settings } from '../settings'
import { refreshIndex } from '../notes-index'
import * as NotesIndex from '../notes-index'
export let modal: OmnisearchVaultModal
let selectedIndex = 0
@@ -29,7 +29,7 @@
}
onMount(async () => {
await refreshIndex()
await NotesIndex.refreshIndex()
searchQuery = searchHistory[historySearchIndex]
eventBus.enable('vault')
eventBus.on('vault', 'enter', openNoteAndCloseModal)
@@ -63,7 +63,7 @@
async function updateResults() {
query = new Query(searchQuery)
resultNotes = (await getSuggestions(query)).sort(
resultNotes = (await Search.getSuggestions(query)).sort(
(a, b) => b.score - a.score
)
selectedIndex = 0

View File

@@ -1,8 +1,8 @@
<script lang="ts">
import { getNoteFromCache } from 'src/notes'
import { cacheManager } from 'src/cache-manager'
import { settings, showExcerpt } from 'src/settings'
import type { ResultNote } from '../globals'
import { getMatches } from '../search'
import * as Search from '../search'
import { highlighter, makeExcerpt, stringsToRegex } from '../utils'
import ResultItemContainer from './ResultItemContainer.svelte'
@@ -10,13 +10,18 @@
export let note: ResultNote
$: reg = stringsToRegex(note.foundWords)
$: matches = getMatches(note.content, reg)
$: matches = Search.getMatches(note.content, reg)
$: cleanedContent = makeExcerpt(note.content, note.matches[0]?.offset ?? -1)
$: glyph = getNoteFromCache(note.path)?.doesNotExist
$: glyph = cacheManager.getNoteFromCache(note.path)?.doesNotExist
$: title = settings.showShortName ? note.basename : note.path
</script>
<ResultItemContainer id={note.path} {selected} on:mousemove on:click {glyph}>
<ResultItemContainer
id="{note.path}"
selected="{selected}"
on:mousemove
on:click
glyph="{glyph}">
<div>
<span class="omnisearch-result__title">
{@html title.replace(reg, highlighter)}

View File

@@ -12,8 +12,9 @@ export const highlightClass = 'suggestion-highlight omnisearch-highlight'
export const eventBus = new EventBus()
export const searchIndexFilePath = `${app.vault.configDir}/plugins/omnisearch/searchIndex.json`
export const notesCacheFilePath = `${app.vault.configDir}/plugins/omnisearch/notesCache.json`
export const minisearchCacheFilePath = `${app.vault.configDir}/plugins/omnisearch/searchIndex.data`
export const notesCacheFilePath = `${app.vault.configDir}/plugins/omnisearch/notesCache.data`
export const pdfCacheFilePath = `${app.vault.configDir}/plugins/omnisearch/pdfCache.data`
export const historyFilePath = `${app.vault.configDir}/plugins/omnisearch/historyCache.json`
export const EventNames = {

57
src/lib.rs Normal file
View File

@@ -0,0 +1,57 @@
use js_sys::Uint8Array;
use pdf_extract::extract_text_from_mem;
use wasm_bindgen::prelude::*;
mod obsidian;
#[wasm_bindgen]
pub fn extract_pdf_text(arr: Uint8Array) -> String {
// FIXME: return a Result<> here, to throw in JS in case of an error
let txt = match extract_text_from_mem(&arr.to_vec()) {
Ok(txt) => txt,
Err(e) => e.to_string(),
};
txt
}
// #[wasm_bindgen]
// pub struct ExampleCommand {
// id: JsString,
// name: JsString,
// }
// #[wasm_bindgen]
// impl ExampleCommand {
// #[wasm_bindgen(getter)]
// pub fn id(&self) -> JsString {
// self.id.clone()
// }
// #[wasm_bindgen(setter)]
// pub fn set_id(&mut self, id: &str) {
// self.id = JsString::from(id)
// }
// #[wasm_bindgen(getter)]
// pub fn name(&self) -> JsString {
// self.name.clone()
// }
// #[wasm_bindgen(setter)]
// pub fn set_name(&mut self, name: &str) {
// self.name = JsString::from(name)
// }
// pub fn callback(&self) {
// obsidian::Notice::new("hello from rust");
// }
// }
// #[wasm_bindgen]
// pub fn onload(plugin: &obsidian::Plugin) {
// let cmd = ExampleCommand {
// id: JsString::from("example"),
// name: JsString::from("Example"),
// };
// plugin.addCommand(JsValue::from(cmd))
// }

View File

@@ -1,5 +1,5 @@
import { Plugin, TFile } from 'obsidian'
import { initGlobalSearchIndex } from './search'
import * as Search from './search'
import { OmnisearchInFileModal, OmnisearchVaultModal } from './modals'
import { loadSettings, settings, SettingsTab, showExcerpt } from './settings'
import { eventBus, EventNames } from './globals'
@@ -7,7 +7,9 @@ import { registerAPI } from '@vanakat/plugin-api'
import api from './api'
import { loadSearchHistory } from './search-history'
import { isFilePlaintext, showWelcomeNotice } from './utils'
import { addNoteToReindex, addToIndex, removeFromIndex } from './notes-index'
import * as NotesIndex from './notes-index'
import { cacheManager } from './cache-manager'
import { pdfManager } from './pdf-manager'
function _registerAPI(plugin: OmnisearchPlugin): void {
registerAPI('omnisearch', api, plugin as any)
@@ -19,10 +21,12 @@ function _registerAPI(plugin: OmnisearchPlugin): void {
export default class OmnisearchPlugin extends Plugin {
async onload(): Promise<void> {
// additional files to index by Omnisearch
await loadSettings(this)
await loadSearchHistory()
await cacheManager.loadNotesCache()
await pdfManager.loadPDFCache()
_registerAPI(this)
if (settings.ribbonIcon) {
@@ -57,35 +61,38 @@ export default class OmnisearchPlugin extends Plugin {
// Listeners to keep the search index up-to-date
this.registerEvent(
this.app.vault.on('create', file => {
addToIndex(file)
NotesIndex.addToIndexAndCache(file)
})
)
this.registerEvent(
this.app.vault.on('delete', file => {
removeFromIndex(file.path)
NotesIndex.removeFromIndex(file.path)
})
)
this.registerEvent(
this.app.vault.on('modify', async file => {
addNoteToReindex(file)
NotesIndex.addNoteToReindex(file)
})
)
this.registerEvent(
this.app.vault.on('rename', async (file, oldPath) => {
if (file instanceof TFile && isFilePlaintext(file.path)) {
removeFromIndex(oldPath)
await addToIndex(file)
NotesIndex.removeFromIndex(oldPath)
await NotesIndex.addToIndexAndCache(file)
}
})
)
await initGlobalSearchIndex()
await Search.initGlobalSearchIndex()
})
// showWelcomeNotice(this)
}
onunload(): void {}
onunload(): void {
console.log('Omnisearch - Interrupting PDF indexing')
NotesIndex.pdfQueue.pause()
}
addRibbonButton(): void {
this.addRibbonIcon('search', 'Omnisearch', _evt => {

View File

@@ -1,6 +1,5 @@
import { Notice, TAbstractFile, TFile } from 'obsidian'
import {
canIndexPDFs,
extractHeadingsFromCache,
getAliasesFromMetadata,
getTagsFromMetadata,
@@ -9,36 +8,33 @@ import {
removeDiacritics,
wait,
} from './utils'
import {
addNoteToCache,
getNonExistingNotes,
getNonExistingNotesFromCache,
getNoteFromCache,
removeAnchors,
removeNoteFromCache,
saveNotesCacheToFile,
} from './notes'
import {getPdfText} from './pdf-parser'
import { getNonExistingNotes, removeAnchors } from './notes'
import * as PDF from './pdf-manager'
import type { IndexedNote } from './globals'
import {searchIndexFilePath} from './globals'
import { settings } from './settings'
import {minisearchInstance} from './search'
import * as Search from './search'
import PQueue from 'p-queue-compat'
import { cacheManager } from './cache-manager'
let isIndexChanged: boolean
export const pdfQueue = new PQueue({
concurrency: settings.backgroundProcesses,
})
/**
* Adds a file to the index
* @param file
* @returns
*/
export async function addToIndex(file: TAbstractFile): Promise<void> {
export async function addToIndexAndCache(file: TAbstractFile): Promise<void> {
if (!(file instanceof TFile) || !isFileIndexable(file.path)) {
return
}
// Check if the file was already indexed as non-existent,
// and if so, remove it from the index (before adding it again)
if (getNoteFromCache(file.path)?.doesNotExist) {
if (cacheManager.getNoteFromCache(file.path)?.doesNotExist) {
removeFromIndex(file.path)
}
@@ -50,18 +46,20 @@ export async function addToIndex(file: TAbstractFile): Promise<void> {
const metadata = app.metadataCache.getFileCache(file)
if (metadata) {
const nonExisting = getNonExistingNotes(file, metadata)
for (const name of nonExisting.filter(o => !getNoteFromCache(o))) {
for (const name of nonExisting.filter(
o => !cacheManager.getNoteFromCache(o)
)) {
addNonExistingToIndex(name, file.path)
}
}
if (getNoteFromCache(file.path)) {
if (cacheManager.getNoteFromCache(file.path)) {
throw new Error(`${file.basename} is already indexed`)
}
let content
if (file.path.endsWith('.pdf')) {
content = removeDiacritics(await getPdfText(file as TFile))
content = removeDiacritics(await PDF.pdfManager.getPdfText(file as TFile))
} else {
// Fetch content from the cache to index it as-is
content = removeDiacritics(await app.vault.cachedRead(file))
@@ -87,9 +85,9 @@ export async function addToIndex(file: TAbstractFile): Promise<void> {
: '',
}
minisearchInstance.add(note)
Search.minisearchInstance.add(note)
isIndexChanged = true
addNoteToCache(note.path, note)
cacheManager.addNoteToCache(note.path, note)
} catch (e) {
console.trace('Error while indexing ' + file.basename)
console.error(e)
@@ -105,7 +103,7 @@ export async function addToIndex(file: TAbstractFile): Promise<void> {
export function addNonExistingToIndex(name: string, parent: string): void {
name = removeAnchors(name)
const filename = name + (name.endsWith('.md') ? '' : '.md')
if (getNoteFromCache(filename)) return
if (cacheManager.getNoteFromCache(filename)) return
const note = {
path: filename,
@@ -121,9 +119,9 @@ export function addNonExistingToIndex(name: string, parent: string): void {
doesNotExist: true,
parent,
} as IndexedNote
minisearchInstance.add(note)
Search.minisearchInstance.add(note)
isIndexChanged = true
addNoteToCache(filename, note)
cacheManager.addNoteToCache(filename, note)
}
/**
@@ -135,18 +133,19 @@ export function removeFromIndex(path: string): void {
console.info(`"${path}" is not an indexable file`)
return
}
const note = getNoteFromCache(path)
const note = cacheManager.getNoteFromCache(path)
if (note) {
minisearchInstance.remove(note)
Search.minisearchInstance.remove(note)
isIndexChanged = true
removeNoteFromCache(path)
getNonExistingNotesFromCache()
cacheManager.removeNoteFromCache(path)
cacheManager
.getNonExistingNotesFromCache()
.filter(n => n.parent === path)
.forEach(n => {
removeFromIndex(n.path)
})
} else {
console.warn(`not not found under path ${path}`)
console.warn(`Omnisearch - Note not found under path ${path}`)
}
}
@@ -157,54 +156,40 @@ export function addNoteToReindex(note: TAbstractFile): void {
}
export async function refreshIndex(): Promise<void> {
if (settings.showIndexingNotices && notesToReindex.size > 0) {
if (notesToReindex.size > 0) {
if (settings.showIndexingNotices) {
new Notice(`Omnisearch - Reindexing ${notesToReindex.size} notes`, 2000)
}
for (const note of notesToReindex) {
removeFromIndex(note.path)
await addToIndex(note)
await addToIndexAndCache(note)
await wait(0)
}
notesToReindex.clear()
await saveIndexToFile()
}
export async function saveIndexToFile(): Promise<void> {
if (settings.storeIndexInFile && minisearchInstance && isIndexChanged) {
const json = JSON.stringify(minisearchInstance)
await app.vault.adapter.write(searchIndexFilePath, json)
console.log('Omnisearch - Index saved on disk')
await saveNotesCacheToFile()
isIndexChanged = false
await cacheManager.writeMinisearchIndex(Search.minisearchInstance)
}
}
export async function indexPDFs() {
if (canIndexPDFs()) {
const start = new Date().getTime()
if (settings.PDFIndexing) {
const files = app.vault.getFiles().filter(f => f.path.endsWith('.pdf'))
if (files.length > 50) {
new Notice(`⚠️ Omnisearch is indexing ${files.length} PDFs. You can experience slowdowns while this work is in progress.`)
}
const promises: Promise<void>[] = []
console.time('PDF Indexing')
console.log(`Omnisearch - Indexing ${files.length} PDFs`)
for (const file of files) {
if (getNoteFromCache(file.path)) {
if (cacheManager.getNoteFromCache(file.path)) {
removeFromIndex(file.path)
}
promises.push(addToIndex(file))
pdfQueue.add(async () => {
await addToIndexAndCache(file)
await cacheManager.writeMinisearchIndex(Search.minisearchInstance)
})
}
await Promise.all(promises)
// Notice & log
const message = `Omnisearch - Indexed ${files.length} PDFs in ${
new Date().getTime() - start
}ms`
await pdfQueue.onEmpty()
console.timeEnd('PDF Indexing')
if (settings.showIndexingNotices) {
new Notice(message)
}
console.log(message)
new Notice(`Omnisearch - Indexed ${files.length} PDFs`)
}
}
}

View File

@@ -1,55 +1,6 @@
import { type CachedMetadata, MarkdownView, TFile } from 'obsidian'
import {
type IndexedNote,
notesCacheFilePath,
type ResultNote,
} from './globals'
import { stringsToRegex } from './utils'
import { settings } from './settings'
/**
* This is an in-memory cache of the notes, with all their computed fields
* used by the search engine.
* This cache allows us to quickly de-index notes when they are deleted or updated.
*/
export let notesCache: Record<string, IndexedNote> = {}
export function resetNotesCache(): void {
notesCache = {}
}
export async function loadNotesCache(): Promise<void> {
if (
settings.storeIndexInFile &&
(await app.vault.adapter.exists(notesCacheFilePath))
) {
try {
const json = await app.vault.adapter.read(notesCacheFilePath)
notesCache = JSON.parse(json)
console.log('Omnisearch - Notes cache loaded from the file')
} catch (e) {
console.trace('Omnisearch - Could not load Notes cache from the file')
console.error(e)
}
}
notesCache ||= {}
}
export function getNoteFromCache(key: string): IndexedNote | undefined {
return notesCache[key]
}
export function getNonExistingNotesFromCache(): IndexedNote[] {
return Object.values(notesCache).filter(note => note.doesNotExist)
}
export function addNoteToCache(filename: string, note: IndexedNote): void {
notesCache[filename] = note
}
export function removeNoteFromCache(key: string): void {
delete notesCache[key]
}
import type { ResultNote } from './globals'
export async function openNote(
item: ResultNote,
@@ -145,14 +96,3 @@ export function getNonExistingNotes(
export function removeAnchors(name: string): string {
return name.split(/[\^#]+/)[0]
}
export async function saveNotesCacheToFile(): Promise<void> {
const json = JSON.stringify(notesCache)
await app.vault.adapter.write(notesCacheFilePath, json)
console.log('Omnisearch - Notes cache saved to the file')
}
export function isCacheOutdated(file: TFile): boolean {
const indexedNote = getNoteFromCache(file.path)
return !indexedNote || indexedNote.mtime !== file.stat.mtime
}

14
src/obsidian.rs Normal file
View File

@@ -0,0 +1,14 @@
use wasm_bindgen::prelude::*;
#[wasm_bindgen(module = "obsidian")]
extern "C" {
pub type Plugin;
#[wasm_bindgen(structural, method)]
pub fn addCommand(this: &Plugin, command: JsValue);
pub type Notice;
#[wasm_bindgen(constructor)]
pub fn new(message: &str) -> Notice;
}

53
src/pdf-manager.ts Normal file
View File

@@ -0,0 +1,53 @@
import type { TFile } from 'obsidian'
import PQueue from 'p-queue-compat'
import PDFWorker from 'web-worker:./pdf-worker.ts'
import { pdfCacheFilePath } from './globals'
import { deflate, inflate } from 'pako'
import { md5 } from './utils'
class PDFManager {
private cache: Map<string, { content: string }> = new Map()
private serializeQueue = new PQueue({ concurrency: 1 })
public async loadPDFCache(): Promise<void> {
if (await app.vault.adapter.exists(pdfCacheFilePath)) {
try {
const data = await app.vault.adapter.readBinary(pdfCacheFilePath)
const json = new TextDecoder('utf8').decode(inflate(data))
this.cache = new Map(JSON.parse(json))
} catch (e) {
console.error(e)
this.cache = new Map()
}
}
}
public async getPdfText(file: TFile): Promise<string> {
const data = new Uint8Array(await app.vault.readBinary(file))
const hash = md5(data)
if (this.cache.has(hash)) {
return this.cache.get(hash)!.content
}
const worker = new PDFWorker({ name: 'PDF Text Extractor' })
return new Promise(async (resolve, reject) => {
// @ts-ignore
worker.postMessage({ data })
worker.onmessage = (evt: any) => {
const txt = evt.data.text
this.updatePDFCache(hash, txt)
resolve(txt)
}
})
}
private async updatePDFCache(hash: string, content: string): Promise<void> {
this.serializeQueue.add(() => {
this.cache.set(hash, { content })
const data = deflate(JSON.stringify(Array.from(this.cache), null, 1))
app.vault.adapter.writeBinary(pdfCacheFilePath, data as any)
})
}
}
export const pdfManager = new PDFManager()

View File

@@ -1,17 +0,0 @@
import type { TFile } from 'obsidian'
import PDFJs from 'pdfjs-dist'
import pdfjsWorker from 'pdfjs-dist/build/pdf.worker.entry'
PDFJs.GlobalWorkerOptions.workerSrc = pdfjsWorker
// https://stackoverflow.com/a/59929946
export async function getPdfText(file: TFile): Promise<string> {
const data = await app.vault.readBinary(file)
const doc = await PDFJs.getDocument(data).promise
const pageTexts = Array.from({ length: doc.numPages }, async (v, i) => {
const page = await doc.getPage(i + 1)
const content = await page.getTextContent()
return (content.items as any[]).map(token => token.str).join('')
})
return (await Promise.all(pageTexts)).join('')
}

16
src/pdf-worker.ts Normal file
View File

@@ -0,0 +1,16 @@
import rustPlugin from '../pkg/obsidian_search_bg.wasm'
import * as plugin from '../pkg/obsidian_search'
const decodedPlugin = decodeBase64(rustPlugin as any)
onmessage = async evt => {
const buffer = Uint8Array.from(decodedPlugin, c => c.charCodeAt(0))
await plugin.default(Promise.resolve(buffer))
const text = plugin.extract_pdf_text(evt.data.data as Uint8Array)
self.postMessage({ text })
}
function decodeBase64(data: string) {
return atob(data)
// return Buffer.from(data, 'base64').toString()
}

View File

@@ -4,12 +4,12 @@ import {
chsRegex,
type IndexedNote,
type ResultNote,
searchIndexFilePath,
minisearchCacheFilePath,
type SearchMatch,
SPACE_OR_PUNCTUATION,
} from './globals'
import {
canIndexPDFs,
isFileIndexable,
isFilePlaintext,
removeDiacritics,
stringsToRegex,
@@ -18,13 +18,15 @@ import {
} from './utils'
import type { Query } from './query'
import { settings } from './settings'
import {
getNoteFromCache,
isCacheOutdated,
loadNotesCache,
resetNotesCache,
} from './notes'
import {addToIndex, indexPDFs, removeFromIndex, saveIndexToFile} from './notes-index'
// import {
// getNoteFromCache,
// isCacheOutdated,
// loadNotesCache,
// resetNotesCache,
// } from './notes'
import * as NotesIndex from './notes-index'
import PQueue from 'p-queue-compat'
import { cacheManager } from './cache-manager'
export let minisearchInstance: MiniSearch<IndexedNote>
@@ -60,15 +62,18 @@ export async function initGlobalSearchIndex(): Promise<void> {
storeFields: ['tags'],
}
if (
settings.storeIndexInFile &&
(await app.vault.adapter.exists(searchIndexFilePath))
) {
// Default instance
minisearchInstance = new MiniSearch(options)
// Load Minisearch cache, if it exists
if (await app.vault.adapter.exists(minisearchCacheFilePath)) {
try {
const json = await app.vault.adapter.read(searchIndexFilePath)
const json = await cacheManager.readMinisearchIndex()
if (json) {
// If we have cache data, reload it
minisearchInstance = MiniSearch.loadJSON(json, options)
}
console.log('Omnisearch - MiniSearch index loaded from the file')
await loadNotesCache()
} catch (e) {
console.trace(
'Omnisearch - Could not load MiniSearch index from the file'
@@ -77,10 +82,9 @@ export async function initGlobalSearchIndex(): Promise<void> {
}
}
if (!minisearchInstance) {
minisearchInstance = new MiniSearch(options)
resetNotesCache()
}
// if (!minisearchInstance) {
// resetNotesCache()
// }
// Index files that are already present
const start = new Date().getTime()
@@ -89,32 +93,28 @@ export async function initGlobalSearchIndex(): Promise<void> {
let files
let notesSuffix
if (settings.storeIndexInFile) {
files = allFiles.filter(file => isCacheOutdated(file))
if (settings.persistCache) {
files = allFiles.filter(file => cacheManager.isCacheOutdated(file))
notesSuffix = 'modified notes'
} else {
files = allFiles
notesSuffix = 'notes'
}
console.log(`Omnisearch - indexing ${files.length} ${notesSuffix}`)
if (files.length > 0) {
console.log(`Omnisearch - Indexing ${files.length} ${notesSuffix}`)
}
// This is basically the same behavior as MiniSearch's `addAllAsync()`.
// We index markdown and plaintext files by batches of 10
let promises: Promise<void>[] = []
for (let i = 0; i < files.length; ++i) {
const file = files[i]
if (getNoteFromCache(file.path)) {
removeFromIndex(file.path)
// Read and index all the files into the search engine
const queue = new PQueue({ concurrency: 10 })
for (const file of files) {
if (cacheManager.getNoteFromCache(file.path)) {
NotesIndex.removeFromIndex(file.path)
}
promises.push(addToIndex(file))
if (i % 10 === 0) {
await wait(1)
await Promise.all(promises)
promises = []
queue.add(() => NotesIndex.addToIndexAndCache(file))
}
}
await Promise.all(promises)
await queue.onEmpty()
if (files.length > 0) {
const message = `Omnisearch - Indexed ${files.length} ${notesSuffix} in ${
@@ -127,10 +127,10 @@ export async function initGlobalSearchIndex(): Promise<void> {
new Notice(message)
}
await saveIndexToFile()
await cacheManager.writeMinisearchIndex(minisearchInstance)
// PDFs are indexed later, since they're heavier
await indexPDFs()
await NotesIndex.indexPDFs()
}
}
@@ -172,9 +172,10 @@ async function search(query: Query): Promise<SearchResult[]> {
const exactTerms = query.getExactTerms()
if (exactTerms.length) {
results = results.filter(r => {
const title = getNoteFromCache(r.id)?.path.toLowerCase() ?? ''
const title =
cacheManager.getNoteFromCache(r.id)?.path.toLowerCase() ?? ''
const content = stripMarkdownCharacters(
getNoteFromCache(r.id)?.content ?? ''
cacheManager.getNoteFromCache(r.id)?.content ?? ''
).toLowerCase()
return exactTerms.every(q => content.includes(q) || title.includes(q))
})
@@ -185,7 +186,7 @@ async function search(query: Query): Promise<SearchResult[]> {
if (exclusions.length) {
results = results.filter(r => {
const content = stripMarkdownCharacters(
getNoteFromCache(r.id)?.content ?? ''
cacheManager.getNoteFromCache(r.id)?.content ?? ''
).toLowerCase()
return exclusions.every(q => !content.includes(q.value))
})
@@ -253,7 +254,7 @@ export async function getSuggestions(
// Map the raw results to get usable suggestions
return results.map(result => {
const note = getNoteFromCache(result.id)
const note = cacheManager.getNoteFromCache(result.id)
if (!note) {
throw new Error(`Note "${result.id}" not indexed`)
}

View File

@@ -1,6 +1,6 @@
import { Plugin, PluginSettingTab, Setting, SliderComponent } from 'obsidian'
import { writable } from 'svelte/store'
import { notesCacheFilePath, searchIndexFilePath } from './globals'
import { notesCacheFilePath, minisearchCacheFilePath } from './globals'
import type OmnisearchPlugin from './main'
interface WeightingSettings {
@@ -11,20 +11,33 @@ interface WeightingSettings {
}
export interface OmnisearchSettings extends WeightingSettings {
/** Respect the "excluded files" Obsidian setting by downranking results ignored files */
respectExcluded: boolean
/** Ignore diacritics when indexing files */
ignoreDiacritics: boolean
/** Extensions of plain text files to index, in addition to .md */
indexedFileTypes: string[]
indexPDFs: boolean
storeIndexInFile: boolean
/** Enable PDF indexing */
PDFIndexing: boolean
/** Max number of spawned processes for background tasks, such as extracting text from PDFs */
backgroundProcesses: number
/** Write cache files on disk (unrelated to PDFs) */
persistCache: boolean
/** Display Omnisearch popup notices over Obsidian */
showIndexingNotices: boolean
/** Activate the small 🔍 button on Obsidian's ribbon */
ribbonIcon: boolean
/** Display short filenames in search results, instead of the full path */
showShortName: boolean
/** Display the small contextual excerpt in search results */
showExcerpt: boolean
/** Enable a "create note" button in the Vault Search modal */
showCreateButton: boolean
/** Vim mode shortcuts */
CtrlJK: boolean
/** Vim mode shortcuts */
CtrlNP: boolean
/** Key for the welcome message when Obsidian is updated. A message is only shown once. */
welcomeMessage: string
}
@@ -74,7 +87,7 @@ export class SettingsTab extends PluginSettingTab {
const diacriticsDesc = new DocumentFragment()
diacriticsDesc.createSpan({}, span => {
span.innerHTML = `Normalize diacritics in search terms. Words like "brûlée" or "žluťoučký" will be indexed as "brulee" and "zlutoucky".<br/>
<strong>Needs a restart to fully take effect.</strong>`
<strong style="color: var(--text-accent)">Needs a restart to fully take effect.</strong>`
})
new Setting(containerEl)
.setName('Ignore diacritics')
@@ -91,7 +104,7 @@ export class SettingsTab extends PluginSettingTab {
indexedFileTypesDesc.createSpan({}, span => {
span.innerHTML = `In addition to standard <code>md</code> files, Omnisearch can also index other plain text files.<br/>
Add extensions separated by a space. Example: <code>txt org</code>.<br />
<strong>Needs a restart to fully take effect.</strong>`
<strong style="color: var(--text-accent)">Needs a restart to fully take effect.</strong>`
})
new Setting(containerEl)
.setName('Additional files to index')
@@ -106,50 +119,68 @@ export class SettingsTab extends PluginSettingTab {
})
})
// Index PDFs
const indexPDFsDesc = new DocumentFragment()
indexPDFsDesc.createSpan({}, span => {
span.innerHTML = `Omnisearch will index your PDFs, and return them in search results.
This feature is currently a work-in-progress, please report slowdowns or issues that you might experience.<br>
PDFs being quite slow to index, <strong style="color: var(--text-accent)">it is strongly recommended to also enable "Store index in file"</strong>.<br>
<strong>Needs a restart to fully take effect.</strong>`
})
new Setting(containerEl)
.setName('BETA - Index PDFs')
.setDesc(indexPDFsDesc)
.addToggle(toggle =>
toggle.setValue(settings.indexPDFs).onChange(async v => {
settings.indexPDFs = v
await saveSettings(this.plugin)
})
)
// // Background processes
// new Setting(containerEl)
// .setName(
// `Background processes (default: ${DEFAULT_SETTINGS.backgroundProcesses})`
// )
// .setDesc('The maximum number of processes for background work, like PDF indexing. This value should not be higher than your number of CPU cores.')
// .addSlider(cb => {
// cb.setLimits(1, 16, 1)
// .setValue(settings.backgroundProcesses)
// .setDynamicTooltip()
// .onChange(v => {
// settings.backgroundProcesses = v
// saveSettings(this.plugin)
// })
// })
// Store index
const serializedIndexDesc = new DocumentFragment()
serializedIndexDesc.createSpan({}, span => {
span.innerHTML = `The search index is stored on disk, instead of being rebuilt at every startup.
This results in faster loading times for bigger vaults and mobile devices.<br />
<em>⚠️ Note: the index can become corrupted - if you notice any issue, disable and re-enable this option to clear the cache.</em><br/>
<em>⚠️ Cache files in <code>.obsidian/plugins/omnisearch/</code> must not be synchronized.</em><br/>
<strong>Needs a restart to fully take effect.</strong>
span.innerHTML = `This will speedup startup times after the initial indexing. Do not activate it unless indexing is too slow on your device:
<ul>
<li>PDF indexing is not affected by this setting</li>
<li>⚠️ The index can become corrupted - if you notice any issue, disable and re-enable this option to clear the cache.</li>
<li>⚠️ Cache files in <code>.obsidian/plugins/omnisearch/*.data</code> must not be synchronized between your devices.</li>
</ul>
<strong style="color: var(--text-accent)">Needs a restart to fully take effect.</strong>
`
})
new Setting(containerEl)
.setName('Store index in file')
.setName('Persist cache on disk')
.setDesc(serializedIndexDesc)
.addToggle(toggle =>
toggle.setValue(settings.storeIndexInFile).onChange(async v => {
toggle.setValue(settings.persistCache).onChange(async v => {
try {
await app.vault.adapter.remove(notesCacheFilePath)
} catch (e) {
console.warn(e)
}
try {
await app.vault.adapter.remove(searchIndexFilePath)
await app.vault.adapter.remove(minisearchCacheFilePath)
} catch (e) {
console.warn(e)
}
settings.storeIndexInFile = v
settings.persistCache = v
await saveSettings(this.plugin)
})
)
// PDF Indexing
const indexPDFsDesc = new DocumentFragment()
indexPDFsDesc.createSpan({}, span => {
span.innerHTML = `Omnisearch will include PDFs in search results.
This feature is currently a work-in-progress, please report slowdowns or issues that you might experience.<br>
Each PDF can take a few seconds to be indexed, so it may not appear immediately in search results.<br>
<strong style="color: var(--text-accent)">Needs a restart to fully take effect.</strong>`
})
new Setting(containerEl)
.setName('BETA - PDF Indexing')
.setDesc(indexPDFsDesc)
.addToggle(toggle =>
toggle.setValue(settings.PDFIndexing).onChange(async v => {
settings.PDFIndexing = v
await saveSettings(this.plugin)
})
)
@@ -285,9 +316,9 @@ export class SettingsTab extends PluginSettingTab {
weightSlider(cb: SliderComponent, key: keyof WeightingSettings): void {
cb.setLimits(1, 3, 0.1)
cb.setValue(settings[key])
cb.setDynamicTooltip()
cb.onChange(v => {
.setValue(settings[key])
.setDynamicTooltip()
.onChange(v => {
settings[key] = v
saveSettings(this.plugin)
})
@@ -298,7 +329,8 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = {
respectExcluded: true,
ignoreDiacritics: true,
indexedFileTypes: [] as string[],
indexPDFs: false,
PDFIndexing: false,
backgroundProcesses: Math.max(1, Math.floor(require('os').cpus().length / 2)),
showIndexingNotices: false,
showShortName: false,
@@ -314,7 +346,7 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = {
CtrlJK: false,
CtrlNP: false,
storeIndexInFile: false,
persistCache: false,
welcomeMessage: '',
} as const

4
src/typings/workers.d.ts vendored Normal file
View File

@@ -0,0 +1,4 @@
declare module "web-worker:*" {
const WorkerFactory: new (options: any) => Worker;
export default WorkerFactory;
}

View File

@@ -10,6 +10,7 @@ import {
regexYaml,
} from './globals'
import { settings } from './settings'
import { createHash, type BinaryLike } from 'crypto'
export function highlighter(str: string): string {
return `<span class="${highlightClass}">${str}</span>`
@@ -172,12 +173,10 @@ export function getCtrlKeyLabel(): 'ctrl' | '⌘' {
return Platform.isMacOS ? '⌘' : 'ctrl'
}
export function canIndexPDFs(): boolean {
return settings.indexPDFs
}
export function isFileIndexable(path: string): boolean {
return (canIndexPDFs() && path.endsWith('.pdf')) || isFilePlaintext(path)
return (
(settings.PDFIndexing && path.endsWith('.pdf')) || isFilePlaintext(path)
)
}
export function isFilePlaintext(path: string): boolean {
@@ -194,6 +193,7 @@ export function getExtension(path: string): string {
}
export function showWelcomeNotice(plugin: Plugin) {
return
const code = '1.6.0'
if (settings.welcomeMessage !== code) {
const welcome = new DocumentFragment()
@@ -208,3 +208,7 @@ New beta feature: PDF search 🔎📄
plugin.saveData(settings)
}
export function md5(data: BinaryLike): string {
return createHash('md5').update(data).digest('hex')
}