Merge branch 'develop' into feature/ocr

This commit is contained in:
Simon Cambier
2022-11-02 14:43:13 +01:00
4 changed files with 66 additions and 18 deletions

View File

@@ -14,17 +14,20 @@ Under the hood, it uses the excellent [MiniSearch](https://github.com/lucaong/mi
## Features
- Find your notes faster than ever
- Workflow similar to the "Quick Switcher" core plugin
- Automatic document scoring using the [BM25 algorithm](https://github.com/lucaong/minisearch/issues/129#issuecomment-1046257399)
- The relevance of a document against a query depends on the number of times the query terms appear in the document, its filename, and its headings
- Can search other plaintext files and PDFs (configurable in settings)
- Workflow similar to "Quick Switcher" plugins
- Can search other plaintext files and PDFs
- Opt-in in settings
- PDF indexing is disabled on iOS
- Keyboard first: you never have to use your mouse
- Resistance to typos
- Switch between Vault and In-file search to quickly skim multiple results in a single note
- Supports `"expressions in quotes"` and `-exclusions`
- Directly Insert a `[[link]]` from the search results
- Respects Obsidian's "Excluded Files" list - results are downranked, not hidden
- Optional support for Vim navigation keys (ctrl + j, k, n, p)
- Supports Vim navigation keys (ctrl + j, k, n, p)
**Note:** support of Chinese, Japanese, Korean, etc. depends on [this additional plugin](https://github.com/aidenlx/cm-chs-patch). Please read its documentation for more information.
@@ -121,17 +124,15 @@ See [styles.css](./assets/styles.css) for more information.
**Omnisearch makes Obsidian sluggish at startup.**
- You may have _big_ documents. Huge notes (like novels) can freeze the interface for a short time when being indexed. Enabling the setting "_Persist cache on disk_" may help you in this case.
- You may have _big_ documents. Huge notes (like novels) can freeze the interface for a short time when being indexed. While Omnisearch uses a cache between sessions, it's still rebuilt at startup to keep it up-to-date.
**I have thousands of notes, and at startup I have to wait a few seconds before making a query, or else Omnisearch does not return all the expected results.**
**I have thousands of notes, and at startup I have to wait a few seconds before Omnisearch gives me the context of a result.**
- Enabling the setting "_Persist cache on disk_" may help you in this case.
- Omnisearch refreshes its index at startup. During this time, you can still find notes, but Omnisearch is not able to show you the excerpts.
**Omnisearch gives inconsistent/invalid results, or there are errors in the developer console.**
- Go in Omnisearch settings.
- If applicable, disable and re-enable "*Persist cache on disk*".
- Restart Obsidian to clear the cache and force a reindex.
- Restart Obsidian to force a reindex of Omnisearch
**A query should return a result that does not appear.**

View File

@@ -1,6 +1,35 @@
import Dexie from 'dexie'
class OmnisearchCache extends Dexie {
export class OmnisearchCache extends Dexie {
public static readonly dbVersion = 6
public static readonly dbPrefix = 'omnisearch/cache/'
public static readonly dbName = OmnisearchCache.dbPrefix + app.appId
private static instance: OmnisearchCache
/**
* Deletes Omnisearch databases that have an older version than the current one
*/
public static async clearOldDatabases(): Promise<void> {
const toDelete = (await indexedDB.databases()).filter(
db =>
db.name?.startsWith(OmnisearchCache.dbPrefix) &&
// version multiplied by 10 https://github.com/dexie/Dexie.js/issues/59
db.version !== OmnisearchCache.dbVersion * 10
)
if (toDelete.length) {
console.log('Omnisearch - Those IndexedDb databases will be deleted:')
for (const db of toDelete) {
if (db.name) {
console.log(db.name + ' ' + db.version)
indexedDB.deleteDatabase(db.name)
}
}
}
}
//#region Table declarations
pdf!: Dexie.Table<
{ path: string; hash: string; size: number; text: string },
string
@@ -8,9 +37,19 @@ class OmnisearchCache extends Dexie {
searchHistory!: Dexie.Table<{ id?: number; query: string }, number>
minisearch!: Dexie.Table<{ date: string; data: string }, string>
constructor() {
super('omnisearch/cache/' + app.appId)
this.version(5).stores({
//#endregion Table declarations
public static getInstance() {
if (!OmnisearchCache.instance) {
OmnisearchCache.instance = new OmnisearchCache()
}
return OmnisearchCache.instance
}
private constructor() {
super(OmnisearchCache.dbName)
// Database structure
this.version(OmnisearchCache.dbVersion).stores({
pdf: 'path, hash, size',
searchHistory: '++id',
minisearch: 'date',
@@ -18,4 +57,4 @@ class OmnisearchCache extends Dexie {
}
}
export const database = new OmnisearchCache()
export const database = OmnisearchCache.getInstance()

View File

@@ -11,12 +11,14 @@ import api from './tools/api'
import { isFilePlaintext, wait } from './tools/utils'
import * as NotesIndex from './notes-index'
import * as FileLoader from './file-loader'
import { OmnisearchCache } from './database'
export default class OmnisearchPlugin extends Plugin {
private ribbonButton?: HTMLElement
async onload(): Promise<void> {
await cleanOldCacheFiles()
await OmnisearchCache.clearOldDatabases()
await loadSettings(this)
// Initialize minisearch
@@ -105,6 +107,14 @@ export default class OmnisearchPlugin extends Plugin {
* Read the files and feed them to Minisearch
*/
async function populateIndex(): Promise<void> {
// We use a tmp minisearch instance to leave the main instance mostly untouched.
// Otherwise, we'd have to clear the main instance, and (asynchronously) load the notes.
// That would cause a "downtime" in Omnisearch while the index is being gradually rebuilt.
//
// With the tmp method, we still have access to the cache data while all the
// fresh indexing is done in the background.
// Once all notes are loaded in tmp, we (synchronously) export tmp and import it into main.
// That can cause a small freeze, but no downtime.
const tmpEngine = SearchEngine.getTmpEngine()
// Load plain text files
@@ -123,7 +133,7 @@ async function populateIndex(): Promise<void> {
console.time('Omnisearch - Timing')
const pdfs = await FileLoader.getPDFFiles()
// Index them
await tmpEngine.addAllToMinisearch(pdfs)
await SearchEngine.getEngine().addAllToMinisearch(pdfs)
console.log(`Omnisearch - Indexed ${pdfs.length} PDFs`)
console.timeEnd('Omnisearch - Timing')
}
@@ -141,7 +151,7 @@ async function populateIndex(): Promise<void> {
// Load PDFs into the main search engine, and write cache
SearchEngine.loadTmpDataIntoMain()
SearchEngine.isIndexing.set(false)
await tmpEngine.writeToCache()
await SearchEngine.getEngine().writeToCache()
// Clear memory
SearchEngine.clearTmp()

View File

@@ -317,8 +317,6 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = {
weightH2: 1.3,
weightH3: 1.1,
// persistCache: false,
welcomeMessage: '',
} as const