Merge branch 'master' into feature/ocr
# Conflicts: # src/database.ts # src/file-loader.ts # src/main.ts # src/pdf/pdf-manager.ts
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"id": "omnisearch",
|
||||
"name": "Omnisearch",
|
||||
"version": "1.7.10",
|
||||
"version": "1.8.0-beta.1",
|
||||
"minAppVersion": "1.0.0",
|
||||
"description": "A search engine that just works",
|
||||
"author": "Simon Cambier",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "scambier.obsidian-search",
|
||||
"version": "1.7.10",
|
||||
"version": "1.8.0-beta.1",
|
||||
"description": "A search engine for Obsidian",
|
||||
"main": "dist/main.js",
|
||||
"scripts": {
|
||||
@@ -44,7 +44,7 @@
|
||||
"@vanakat/plugin-api": "0.1.0",
|
||||
"dexie": "^3.2.2",
|
||||
"lodash-es": "4.17.21",
|
||||
"minisearch": "5.0.0",
|
||||
"minisearch": "github:scambier/minisearch#callback_desync",
|
||||
"p-limit": "^4.0.0",
|
||||
"pure-md5": "^0.1.14"
|
||||
},
|
||||
|
||||
14
pnpm-lock.yaml
generated
14
pnpm-lock.yaml
generated
@@ -21,7 +21,7 @@ specifiers:
|
||||
esbuild-svelte: ^0.7.1
|
||||
jest: ^27.5.1
|
||||
lodash-es: 4.17.21
|
||||
minisearch: 5.0.0
|
||||
minisearch: github:scambier/minisearch#callback_desync
|
||||
obsidian: latest
|
||||
obsidian-text-extract: link:C:/Dev/Obsidian/obsidian-text-extract/dist
|
||||
p-limit: ^4.0.0
|
||||
@@ -40,7 +40,7 @@ dependencies:
|
||||
'@vanakat/plugin-api': 0.1.0
|
||||
dexie: 3.2.2
|
||||
lodash-es: 4.17.21
|
||||
minisearch: 5.0.0
|
||||
minisearch: github.com/scambier/minisearch/adf11cab46d851220a41c9ad95ed986b630f0f3c
|
||||
p-limit: 4.0.0
|
||||
pure-md5: 0.1.14
|
||||
|
||||
@@ -4090,10 +4090,6 @@ packages:
|
||||
resolution: {integrity: sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==}
|
||||
dev: true
|
||||
|
||||
/minisearch/5.0.0:
|
||||
resolution: {integrity: sha512-VEwBhl8aFtc2UG2XmP7a4XaZxVfNhe7GvB2W/ZRGbLL3P3LbBhkoOezBWsMqG8Mr5VonqXAMRWth79XXKja1bQ==}
|
||||
dev: false
|
||||
|
||||
/mkdirp/0.5.6:
|
||||
resolution: {integrity: sha512-FP+p8RB8OWpF3YZBCrP5gtADmtXApB5AMLn+vdyA+PyxCjrCs00mjyUozssO33cwDeT3wNGdLxJ5M//YqtHAJw==}
|
||||
hasBin: true
|
||||
@@ -5132,3 +5128,9 @@ packages:
|
||||
resolution: {integrity: sha512-9bnSc/HEW2uRy67wc+T8UwauLuPJVn28jb+GtJY16iiKWyvmYJRXVT4UamsAEGQfPohgr2q4Tq0sQbQlxTfi1g==}
|
||||
engines: {node: '>=12.20'}
|
||||
dev: false
|
||||
|
||||
github.com/scambier/minisearch/adf11cab46d851220a41c9ad95ed986b630f0f3c:
|
||||
resolution: {tarball: https://codeload.github.com/scambier/minisearch/tar.gz/adf11cab46d851220a41c9ad95ed986b630f0f3c}
|
||||
name: minisearch
|
||||
version: 5.0.0
|
||||
dev: false
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
import type { TFile } from 'obsidian'
|
||||
import { Notice, type TFile } from 'obsidian'
|
||||
import type { IndexedDocument } from './globals'
|
||||
import { database } from './database'
|
||||
import MiniSearch from 'minisearch'
|
||||
import { minisearchOptions } from './search/search-engine'
|
||||
import { makeMD5, wait } from './tools/utils'
|
||||
import { settings } from './settings'
|
||||
|
||||
class CacheManager {
|
||||
private documentsCache: Map<string, IndexedDocument> = new Map()
|
||||
private liveDocuments: Map<string, IndexedDocument> = new Map()
|
||||
/**
|
||||
* Show an empty input field next time the user opens Omnisearch modal
|
||||
*/
|
||||
@@ -35,48 +37,147 @@ class CacheManager {
|
||||
return data
|
||||
}
|
||||
|
||||
public async updateDocument(path: string, note: IndexedDocument) {
|
||||
this.documentsCache.set(path, note)
|
||||
/**
|
||||
* Important: keep this method async for the day it _really_ becomes async.
|
||||
* This will avoid a refactor.
|
||||
* @param path
|
||||
* @param note
|
||||
*/
|
||||
public async updateLiveDocument(
|
||||
path: string,
|
||||
note: IndexedDocument
|
||||
): Promise<void> {
|
||||
this.liveDocuments.set(path, note)
|
||||
}
|
||||
|
||||
public deleteDocument(key: string): void {
|
||||
this.documentsCache.delete(key)
|
||||
public deleteLiveDocument(key: string): void {
|
||||
this.liveDocuments.delete(key)
|
||||
}
|
||||
|
||||
public getDocument(key: string): IndexedDocument | undefined {
|
||||
return this.documentsCache.get(key)
|
||||
}
|
||||
|
||||
public getNonExistingNotesFromMemCache(): IndexedDocument[] {
|
||||
return Object.values(this.documentsCache).filter(note => note.doesNotExist)
|
||||
public getLiveDocument(key: string): IndexedDocument | undefined {
|
||||
return this.liveDocuments.get(key)
|
||||
}
|
||||
|
||||
public isDocumentOutdated(file: TFile): boolean {
|
||||
const indexedNote = this.getDocument(file.path)
|
||||
const indexedNote = this.getLiveDocument(file.path)
|
||||
return !indexedNote || indexedNote.mtime !== file.stat.mtime
|
||||
}
|
||||
|
||||
//#region Minisearch
|
||||
|
||||
public getDocumentsChecksum(documents: IndexedDocument[]): string {
|
||||
return makeMD5(
|
||||
JSON.stringify(
|
||||
documents.sort((a, b) => {
|
||||
if (a.path < b.path) {
|
||||
return -1
|
||||
} else if (a.path > b.path) {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
})
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
public async getMinisearchCache(): Promise<MiniSearch | null> {
|
||||
const cache = (await database.minisearch.toArray())[0]
|
||||
if (!cache) {
|
||||
// Retrieve documents and make their checksum
|
||||
const cachedDocs = await database.documents.toArray()
|
||||
const checksum = this.getDocumentsChecksum(cachedDocs.map(d => d.document))
|
||||
|
||||
// Add those documents in the live cache
|
||||
cachedDocs.forEach(doc =>
|
||||
cacheManager.updateLiveDocument(doc.path, doc.document)
|
||||
)
|
||||
|
||||
// Retrieve the search cache, and verify the checksum
|
||||
const cachedIndex = (await database.minisearch.toArray())[0]
|
||||
if (cachedIndex?.checksum !== checksum) {
|
||||
console.warn("Omnisearch - Cache - Checksums don't match, clearing cache")
|
||||
// Invalid (or null) cache, clear everything
|
||||
await database.minisearch.clear()
|
||||
await database.documents.clear()
|
||||
return null
|
||||
}
|
||||
|
||||
try {
|
||||
return MiniSearch.loadJSON(cache.data, minisearchOptions)
|
||||
return MiniSearch.loadJS(cachedIndex.data, minisearchOptions)
|
||||
} catch (e) {
|
||||
if (settings.showIndexingNotices) {
|
||||
new Notice(
|
||||
'Omnisearch - Cache missing or invalid. Some freezes may occur while Omnisearch indexes your vault.'
|
||||
)
|
||||
}
|
||||
console.error('Omnisearch - Error while loading Minisearch cache')
|
||||
console.error(e)
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
public async writeMinisearchCache(minisearch: MiniSearch): Promise<void> {
|
||||
/**
|
||||
* Get a dict listing the deleted/added documents since last cache
|
||||
* @param documents
|
||||
*/
|
||||
public async getDiffDocuments(documents: IndexedDocument[]): Promise<{
|
||||
toDelete: IndexedDocument[]
|
||||
toAdd: IndexedDocument[]
|
||||
toUpdate: { old: IndexedDocument; new: IndexedDocument }[]
|
||||
}> {
|
||||
let cachedDocs = await database.documents.toArray()
|
||||
const toAdd = documents.filter(
|
||||
d => !cachedDocs.find(c => c.path === d.path)
|
||||
)
|
||||
const toDelete = cachedDocs
|
||||
.filter(c => !documents.find(d => d.path === c.path))
|
||||
.map(d => d.document)
|
||||
|
||||
const toUpdate = cachedDocs
|
||||
.filter(c =>
|
||||
documents.find(d => d.path === c.path && d.mtime !== c.mtime)
|
||||
)
|
||||
.map(c => ({
|
||||
old: c.document,
|
||||
new: documents.find(d => d.path === c.path)!,
|
||||
}))
|
||||
|
||||
return {
|
||||
toDelete,
|
||||
toAdd,
|
||||
toUpdate,
|
||||
}
|
||||
}
|
||||
|
||||
public async writeMinisearchCache(
|
||||
minisearch: MiniSearch,
|
||||
documents: IndexedDocument[]
|
||||
): Promise<void> {
|
||||
const { toDelete, toAdd, toUpdate } = await this.getDiffDocuments(documents)
|
||||
|
||||
// Delete
|
||||
// console.log(`Omnisearch - Cache - Will delete ${toDelete.length} documents`)
|
||||
await database.documents.bulkDelete(toDelete.map(o => o.path))
|
||||
|
||||
// Add
|
||||
// console.log(`Omnisearch - Cache - Will add ${toAdd.length} documents`)
|
||||
await database.documents.bulkAdd(
|
||||
toAdd.map(o => ({ document: o, mtime: o.mtime, path: o.path }))
|
||||
)
|
||||
|
||||
// Update
|
||||
// console.log(`Omnisearch - Cache - Will update ${toUpdate.length} documents`)
|
||||
await database.documents.bulkPut(
|
||||
toUpdate.map(o => ({
|
||||
document: o.new,
|
||||
mtime: o.new.mtime,
|
||||
path: o.new.path,
|
||||
}))
|
||||
)
|
||||
|
||||
await database.minisearch.clear()
|
||||
await database.minisearch.add({
|
||||
date: new Date().toISOString(),
|
||||
data: JSON.stringify(minisearch.toJSON()),
|
||||
checksum: this.getDocumentsChecksum(documents),
|
||||
data: minisearch.toJSON(),
|
||||
})
|
||||
console.log('Omnisearch - Search cache written')
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
}
|
||||
$: reg = stringsToRegex(note.foundWords)
|
||||
$: cleanedContent = makeExcerpt(note.content, note.matches[0]?.offset ?? -1)
|
||||
$: glyph = cacheManager.getDocument(note.path)?.doesNotExist
|
||||
$: glyph = cacheManager.getLiveDocument(note.path)?.doesNotExist
|
||||
$: title = settings.showShortName ? note.basename : note.path
|
||||
</script>
|
||||
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
import Dexie from 'dexie'
|
||||
import type { AsPlainObject } from 'minisearch'
|
||||
import type { IndexedDocument } from './globals'
|
||||
|
||||
export class OmnisearchCache extends Dexie {
|
||||
public static readonly dbVersion = 6
|
||||
public static readonly dbVersion = 7
|
||||
public static readonly dbPrefix = 'omnisearch/cache/'
|
||||
public static readonly dbName = OmnisearchCache.dbPrefix + app.appId
|
||||
|
||||
@@ -30,12 +32,16 @@ export class OmnisearchCache extends Dexie {
|
||||
|
||||
//#region Table declarations
|
||||
|
||||
pdf!: Dexie.Table<
|
||||
{ path: string; hash: string; size: number; text: string },
|
||||
pdf!: Dexie.Table<{ path: string; hash: string; text: string }, string>
|
||||
documents!: Dexie.Table<
|
||||
{ path: string; mtime: number; document: IndexedDocument },
|
||||
string
|
||||
>
|
||||
searchHistory!: Dexie.Table<{ id?: number; query: string }, number>
|
||||
minisearch!: Dexie.Table<{ date: string; data: string }, string>
|
||||
minisearch!: Dexie.Table<
|
||||
{ date: string; checksum: string; data: AsPlainObject },
|
||||
string
|
||||
>
|
||||
|
||||
//#endregion Table declarations
|
||||
|
||||
@@ -52,9 +58,15 @@ export class OmnisearchCache extends Dexie {
|
||||
this.version(OmnisearchCache.dbVersion).stores({
|
||||
pdf: 'path, hash, size',
|
||||
searchHistory: '++id',
|
||||
documents: 'path',
|
||||
minisearch: 'date',
|
||||
})
|
||||
}
|
||||
|
||||
public async clearCache() {
|
||||
await this.minisearch.clear()
|
||||
await this.documents.clear()
|
||||
}
|
||||
}
|
||||
|
||||
export const database = OmnisearchCache.getInstance()
|
||||
|
||||
@@ -3,7 +3,6 @@ import {
|
||||
extractHeadingsFromCache,
|
||||
getAliasesFromMetadata,
|
||||
getTagsFromMetadata,
|
||||
isFileImage,
|
||||
isFilePlaintext,
|
||||
removeDiacritics,
|
||||
} from './tools/utils'
|
||||
@@ -11,6 +10,7 @@ import * as NotesIndex from './notes-index'
|
||||
import type { TFile } from 'obsidian'
|
||||
import type { IndexedDocument } from './globals'
|
||||
import { getNonExistingNotes } from './tools/notes'
|
||||
import { database } from './database'
|
||||
import { getImageText, getPdfText } from 'obsidian-text-extract'
|
||||
|
||||
/**
|
||||
@@ -22,7 +22,7 @@ export async function getPlainTextFiles(): Promise<IndexedDocument[]> {
|
||||
for (const file of allFiles) {
|
||||
const doc = await fileToIndexedDocument(file)
|
||||
data.push(doc)
|
||||
await cacheManager.updateDocument(file.path, doc)
|
||||
await cacheManager.updateLiveDocument(file.path, doc)
|
||||
}
|
||||
return data
|
||||
}
|
||||
@@ -32,44 +32,19 @@ export async function getPlainTextFiles(): Promise<IndexedDocument[]> {
|
||||
* If a PDF isn't cached, it will be read from the disk and added to the IndexedDB
|
||||
*/
|
||||
export async function getPDFFiles(): Promise<IndexedDocument[]> {
|
||||
const allFiles = app.vault.getFiles().filter(f => f.path.endsWith('.pdf'))
|
||||
const data: IndexedDocument[] = []
|
||||
const fromDisk = app.vault.getFiles().filter(f => f.path.endsWith('.pdf'))
|
||||
const fromDb = await database.pdf.toArray()
|
||||
|
||||
const data: IndexedDocument[] = []
|
||||
const input = []
|
||||
for (const file of allFiles) {
|
||||
for (const file of fromDisk) {
|
||||
input.push(
|
||||
NotesIndex.processQueue(async () => {
|
||||
const doc = await fileToIndexedDocument(file)
|
||||
await cacheManager.updateDocument(file.path, doc)
|
||||
data.push(doc)
|
||||
})
|
||||
const doc = await fileToIndexedDocument(
|
||||
file,
|
||||
fromDb.find(o => o.path === file.path)?.text
|
||||
)
|
||||
}
|
||||
await Promise.all(input)
|
||||
return data
|
||||
}
|
||||
|
||||
/**
|
||||
* Return all Image files as IndexedDocuments.
|
||||
* If a PDF isn't cached, it will be read from the disk and added to the IndexedDB
|
||||
*/
|
||||
export async function getImageFiles(): Promise<IndexedDocument[]> {
|
||||
const allFiles = app.vault
|
||||
.getFiles()
|
||||
.filter(
|
||||
f =>
|
||||
f.path.endsWith('.png') ||
|
||||
f.path.endsWith('.jpg') ||
|
||||
f.path.endsWith('.jpeg')
|
||||
)
|
||||
const data: IndexedDocument[] = []
|
||||
|
||||
const input = []
|
||||
for (const file of allFiles) {
|
||||
input.push(
|
||||
NotesIndex.processQueue(async () => {
|
||||
const doc = await fileToIndexedDocument(file)
|
||||
await cacheManager.updateDocument(file.path, doc)
|
||||
await cacheManager.updateLiveDocument(file.path, doc)
|
||||
data.push(doc)
|
||||
})
|
||||
)
|
||||
@@ -82,41 +57,45 @@ export async function getImageFiles(): Promise<IndexedDocument[]> {
|
||||
* Convert a file into an IndexedDocument.
|
||||
* Will use the cache if possible.
|
||||
* @param file
|
||||
* @param content If we give a text content, will skip the fetching part
|
||||
*/
|
||||
export async function fileToIndexedDocument(
|
||||
file: TFile
|
||||
file: TFile,
|
||||
content?: string
|
||||
): Promise<IndexedDocument> {
|
||||
let content: string
|
||||
if (!content) {
|
||||
if (isFilePlaintext(file.path)) {
|
||||
content = removeDiacritics(await app.vault.cachedRead(file))
|
||||
content = await app.vault.cachedRead(file)
|
||||
} else if (file.path.endsWith('.pdf')) {
|
||||
content = removeDiacritics(await getPdfText(file))
|
||||
} else if (isFileImage(file.path)) {
|
||||
content = removeDiacritics(await getImageText(file))
|
||||
content = await getPdfText(file)
|
||||
} else {
|
||||
throw new Error('Invalid file: ' + file.path)
|
||||
}
|
||||
}
|
||||
|
||||
content = removeDiacritics(content)
|
||||
const metadata = app.metadataCache.getFileCache(file)
|
||||
|
||||
// EXCALIDRAW
|
||||
// Remove the json code
|
||||
if (metadata?.frontmatter?.['excalidraw-plugin']) {
|
||||
const comments = metadata.sections?.filter(s => s.type === 'comment') ?? []
|
||||
for (const { start, end } of comments.map(c => c.position)) {
|
||||
content =
|
||||
content.substring(0, start.offset - 1) + content.substring(end.offset)
|
||||
}
|
||||
}
|
||||
|
||||
// Look for links that lead to non-existing files,
|
||||
// and add them to the index.
|
||||
if (metadata) {
|
||||
const nonExisting = getNonExistingNotes(file, metadata)
|
||||
for (const name of nonExisting.filter(o => !cacheManager.getDocument(o))) {
|
||||
for (const name of nonExisting.filter(
|
||||
o => !cacheManager.getLiveDocument(o)
|
||||
)) {
|
||||
NotesIndex.addNonExistingToIndex(name, file.path)
|
||||
}
|
||||
|
||||
// EXCALIDRAW
|
||||
// Remove the json code
|
||||
if (metadata.frontmatter?.['excalidraw-plugin']) {
|
||||
const comments =
|
||||
metadata.sections?.filter(s => s.type === 'comment') ?? []
|
||||
for (const { start, end } of comments.map(c => c.position)) {
|
||||
content =
|
||||
content.substring(0, start.offset - 1) + content.substring(end.offset)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
|
||||
92
src/main.ts
92
src/main.ts
@@ -1,4 +1,4 @@
|
||||
import { Notice, Plugin, TFile } from 'obsidian'
|
||||
import { Notice, Platform, Plugin, TFile } from 'obsidian'
|
||||
import { SearchEngine } from './search/search-engine'
|
||||
import {
|
||||
OmnisearchInFileModal,
|
||||
@@ -12,6 +12,7 @@ import { isFilePlaintext, wait } from './tools/utils'
|
||||
import * as NotesIndex from './notes-index'
|
||||
import * as FileLoader from './file-loader'
|
||||
import { OmnisearchCache } from './database'
|
||||
import { cacheManager } from './cache-manager'
|
||||
|
||||
export default class OmnisearchPlugin extends Plugin {
|
||||
private ribbonButton?: HTMLElement
|
||||
@@ -21,9 +22,6 @@ export default class OmnisearchPlugin extends Plugin {
|
||||
await OmnisearchCache.clearOldDatabases()
|
||||
await loadSettings(this)
|
||||
|
||||
// Initialize minisearch
|
||||
await SearchEngine.initFromCache()
|
||||
|
||||
_registerAPI(this)
|
||||
|
||||
if (settings.ribbonIcon) {
|
||||
@@ -107,54 +105,68 @@ export default class OmnisearchPlugin extends Plugin {
|
||||
* Read the files and feed them to Minisearch
|
||||
*/
|
||||
async function populateIndex(): Promise<void> {
|
||||
// We use a tmp minisearch instance to leave the main instance mostly untouched.
|
||||
// Otherwise, we'd have to clear the main instance, and (asynchronously) load the notes.
|
||||
// That would cause a "downtime" in Omnisearch while the index is being gradually rebuilt.
|
||||
//
|
||||
// With the tmp method, we still have access to the cache data while all the
|
||||
// fresh indexing is done in the background.
|
||||
// Once all notes are loaded in tmp, we (synchronously) export tmp and import it into main.
|
||||
// That can cause a small freeze, but no downtime.
|
||||
const tmpEngine = SearchEngine.getTmpEngine()
|
||||
console.time('Omnisearch - Indexing duration')
|
||||
|
||||
// Load plain text files
|
||||
console.time('Omnisearch - Timing')
|
||||
const files = await FileLoader.getPlainTextFiles()
|
||||
// Index them
|
||||
await tmpEngine.addAllToMinisearch(files)
|
||||
console.log(`Omnisearch - Indexed ${files.length} notes`)
|
||||
console.timeEnd('Omnisearch - Timing')
|
||||
// Initialize minisearch
|
||||
let engine = SearchEngine.getEngine()
|
||||
|
||||
// Load normal notes into the main search engine
|
||||
SearchEngine.loadTmpDataIntoMain()
|
||||
// No cache for iOS
|
||||
if (!Platform.isIosApp) {
|
||||
engine = await SearchEngine.initFromCache()
|
||||
}
|
||||
|
||||
// Load plaintext files
|
||||
const plainTextFiles = await FileLoader.getPlainTextFiles()
|
||||
let allFiles = [...plainTextFiles]
|
||||
// iOS: since there's no cache, directly index the documents
|
||||
if (Platform.isIosApp) {
|
||||
await wait(1000)
|
||||
await engine.addAllToMinisearch(plainTextFiles)
|
||||
}
|
||||
|
||||
// Load PDFs
|
||||
if (settings.PDFIndexing) {
|
||||
console.time('Omnisearch - Timing')
|
||||
const pdfs = await FileLoader.getPDFFiles()
|
||||
// Index them
|
||||
await SearchEngine.getEngine().addAllToMinisearch(pdfs)
|
||||
console.log(`Omnisearch - Indexed ${pdfs.length} PDFs`)
|
||||
console.timeEnd('Omnisearch - Timing')
|
||||
// iOS: since there's no cache, just index the documents
|
||||
if (Platform.isIosApp) {
|
||||
await wait(1000)
|
||||
await engine.addAllToMinisearch(pdfs)
|
||||
}
|
||||
// Add PDFs to the files list
|
||||
allFiles = [...allFiles, ...pdfs]
|
||||
}
|
||||
|
||||
// Load Images
|
||||
// if (settings.PDFIndexing) {
|
||||
console.time('Omnisearch - Timing')
|
||||
const images = await FileLoader.getImageFiles()
|
||||
// Index them
|
||||
await tmpEngine.addAllToMinisearch(images)
|
||||
console.log(`Omnisearch - Indexed ${images.length} Images`)
|
||||
console.timeEnd('Omnisearch - Timing')
|
||||
// }
|
||||
// Other platforms: make a diff of what's to add/update/delete
|
||||
if (!Platform.isIosApp) {
|
||||
// Check which documents need to be removed/added/updated
|
||||
const diffDocs = await cacheManager.getDiffDocuments(allFiles)
|
||||
// Add
|
||||
await engine.addAllToMinisearch(diffDocs.toAdd)
|
||||
diffDocs.toAdd.forEach(doc =>
|
||||
cacheManager.updateLiveDocument(doc.path, doc)
|
||||
)
|
||||
|
||||
// Delete
|
||||
diffDocs.toDelete.forEach(d => engine.removeFromMinisearch(d))
|
||||
diffDocs.toDelete.forEach(doc => cacheManager.deleteLiveDocument(doc.path))
|
||||
|
||||
// Update (delete + add)
|
||||
diffDocs.toUpdate
|
||||
.map(d => d.old)
|
||||
.forEach(d => {
|
||||
engine.removeFromMinisearch(d)
|
||||
cacheManager.updateLiveDocument(d.path, d)
|
||||
})
|
||||
await engine.addAllToMinisearch(diffDocs.toUpdate.map(d => d.new))
|
||||
}
|
||||
// Load PDFs into the main search engine, and write cache
|
||||
SearchEngine.loadTmpDataIntoMain()
|
||||
// SearchEngine.loadTmpDataIntoMain()
|
||||
SearchEngine.isIndexing.set(false)
|
||||
await SearchEngine.getEngine().writeToCache()
|
||||
if (!Platform.isIosApp) {
|
||||
await SearchEngine.getEngine().writeToCache(allFiles)
|
||||
}
|
||||
|
||||
// Clear memory
|
||||
SearchEngine.clearTmp()
|
||||
console.timeEnd('Omnisearch - Indexing duration')
|
||||
}
|
||||
|
||||
async function cleanOldCacheFiles() {
|
||||
|
||||
@@ -27,19 +27,19 @@ export async function addToIndexAndMemCache(
|
||||
|
||||
// Check if the file was already indexed as non-existent.
|
||||
// If so, remove it from the index, and add it again as a real note.
|
||||
if (cacheManager.getDocument(file.path)?.doesNotExist) {
|
||||
if (cacheManager.getLiveDocument(file.path)?.doesNotExist) {
|
||||
removeFromIndex(file.path)
|
||||
}
|
||||
|
||||
try {
|
||||
if (cacheManager.getDocument(file.path)) {
|
||||
if (cacheManager.getLiveDocument(file.path)) {
|
||||
throw new Error(`${file.basename} is already indexed`)
|
||||
}
|
||||
|
||||
// Make the document and index it
|
||||
const note = await fileToIndexedDocument(file)
|
||||
SearchEngine.getEngine().addSingleToMinisearch(note)
|
||||
await cacheManager.updateDocument(note.path, note)
|
||||
await cacheManager.updateLiveDocument(note.path, note)
|
||||
} catch (e) {
|
||||
// console.trace('Error while indexing ' + file.basename)
|
||||
console.error(e)
|
||||
@@ -55,7 +55,7 @@ export async function addToIndexAndMemCache(
|
||||
export function addNonExistingToIndex(name: string, parent: string): void {
|
||||
name = removeAnchors(name)
|
||||
const filename = name + (name.endsWith('.md') ? '' : '.md')
|
||||
if (cacheManager.getDocument(filename)) return
|
||||
if (cacheManager.getLiveDocument(filename)) return
|
||||
|
||||
const note: IndexedDocument = {
|
||||
path: filename,
|
||||
@@ -73,7 +73,7 @@ export function addNonExistingToIndex(name: string, parent: string): void {
|
||||
parent,
|
||||
}
|
||||
SearchEngine.getEngine().addSingleToMinisearch(note)
|
||||
cacheManager.updateDocument(filename, note)
|
||||
cacheManager.updateLiveDocument(filename, note)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -84,10 +84,10 @@ export function removeFromIndex(path: string): void {
|
||||
console.info(`"${path}" is not an indexable file`)
|
||||
return
|
||||
}
|
||||
const note = cacheManager.getDocument(path)
|
||||
const note = cacheManager.getLiveDocument(path)
|
||||
if (note) {
|
||||
SearchEngine.getEngine().removeFromMinisearch(note)
|
||||
cacheManager.deleteDocument(path)
|
||||
cacheManager.deleteLiveDocument(path)
|
||||
|
||||
// FIXME: only remove non-existing notes if they don't have another parent
|
||||
// cacheManager
|
||||
|
||||
@@ -1,8 +1,4 @@
|
||||
import MiniSearch, {
|
||||
type AsPlainObject,
|
||||
type Options,
|
||||
type SearchResult,
|
||||
} from 'minisearch'
|
||||
import MiniSearch, { type Options, type SearchResult } from 'minisearch'
|
||||
import {
|
||||
chsRegex,
|
||||
type IndexedDocument,
|
||||
@@ -19,6 +15,7 @@ import type { Query } from './query'
|
||||
import { settings } from '../settings'
|
||||
import { cacheManager } from '../cache-manager'
|
||||
import { writable } from 'svelte/store'
|
||||
import { Notice } from 'obsidian'
|
||||
|
||||
const tokenize = (text: string): string[] => {
|
||||
const tokens = text.split(SPACE_OR_PUNCTUATION)
|
||||
@@ -45,11 +42,15 @@ export const minisearchOptions: Options<IndexedDocument> = {
|
||||
'headings3',
|
||||
],
|
||||
storeFields: ['tags'],
|
||||
callbackWhenDesync() {
|
||||
new Notice(
|
||||
'Omnisearch - Your index cache may be incorrect or corrupted. If this message keeps appearing, go to Settings to clear the cache.'
|
||||
)
|
||||
},
|
||||
}
|
||||
|
||||
export class SearchEngine {
|
||||
private static engine?: SearchEngine
|
||||
private static tmpEngine?: SearchEngine
|
||||
public static isIndexing = writable(true)
|
||||
|
||||
/**
|
||||
@@ -63,41 +64,23 @@ export class SearchEngine {
|
||||
return this.engine
|
||||
}
|
||||
|
||||
/**
|
||||
* The secondary instance. This one is indexed in the background,
|
||||
* while the main instance is quickly filled with cache data
|
||||
*/
|
||||
public static getTmpEngine(): SearchEngine {
|
||||
if (!this.tmpEngine) {
|
||||
this.tmpEngine = new SearchEngine()
|
||||
}
|
||||
return this.tmpEngine
|
||||
}
|
||||
|
||||
/**
|
||||
* Instantiates the main instance with cache data (if it exists)
|
||||
*/
|
||||
public static async initFromCache(): Promise<void> {
|
||||
public static async initFromCache(): Promise<SearchEngine> {
|
||||
try {
|
||||
const cache = await cacheManager.getMinisearchCache()
|
||||
if (cache) {
|
||||
this.getEngine().minisearch = cache
|
||||
}
|
||||
} catch (e) {
|
||||
new Notice(
|
||||
'Omnisearch - Cache missing or invalid. Some freezes may occur while Omnisearch indexes your vault.'
|
||||
)
|
||||
console.error('Omnisearch - Could not init engine from cache')
|
||||
console.error(e)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads the freshest indexed data into the main instance.
|
||||
*/
|
||||
public static loadTmpDataIntoMain(): void {
|
||||
const tmpData = this.getTmpEngine().minisearch.toJSON()
|
||||
this.getEngine().minisearch = MiniSearch.loadJS(tmpData, minisearchOptions)
|
||||
}
|
||||
|
||||
public static clearTmp(): void {
|
||||
this.getTmpEngine().minisearch = new MiniSearch(minisearchOptions)
|
||||
return this.getEngine()
|
||||
}
|
||||
|
||||
private minisearch: MiniSearch
|
||||
@@ -147,9 +130,10 @@ export class SearchEngine {
|
||||
const exactTerms = query.getExactTerms()
|
||||
if (exactTerms.length) {
|
||||
results = results.filter(r => {
|
||||
const title = cacheManager.getDocument(r.id)?.path.toLowerCase() ?? ''
|
||||
const title =
|
||||
cacheManager.getLiveDocument(r.id)?.path.toLowerCase() ?? ''
|
||||
const content = stripMarkdownCharacters(
|
||||
cacheManager.getDocument(r.id)?.content ?? ''
|
||||
cacheManager.getLiveDocument(r.id)?.content ?? ''
|
||||
).toLowerCase()
|
||||
return exactTerms.every(q => content.includes(q) || title.includes(q))
|
||||
})
|
||||
@@ -160,7 +144,7 @@ export class SearchEngine {
|
||||
if (exclusions.length) {
|
||||
results = results.filter(r => {
|
||||
const content = stripMarkdownCharacters(
|
||||
cacheManager.getDocument(r.id)?.content ?? ''
|
||||
cacheManager.getLiveDocument(r.id)?.content ?? ''
|
||||
).toLowerCase()
|
||||
return exclusions.every(q => !content.includes(q.value))
|
||||
})
|
||||
@@ -240,9 +224,10 @@ export class SearchEngine {
|
||||
|
||||
// Map the raw results to get usable suggestions
|
||||
return results.map(result => {
|
||||
let note = cacheManager.getDocument(result.id)
|
||||
let note = cacheManager.getLiveDocument(result.id)
|
||||
if (!note) {
|
||||
// throw new Error(`Omnisearch - Note "${result.id}" not indexed`)
|
||||
console.warn(`Omnisearch - Note "${result.id}" not in the live cache`)
|
||||
note = {
|
||||
content: '',
|
||||
basename: result.id,
|
||||
@@ -286,8 +271,11 @@ export class SearchEngine {
|
||||
|
||||
// #region Read/write minisearch index
|
||||
|
||||
public async addAllToMinisearch(documents: IndexedDocument[]): Promise<void> {
|
||||
await this.minisearch.addAllAsync(documents)
|
||||
public async addAllToMinisearch(
|
||||
documents: IndexedDocument[],
|
||||
chunkSize = 10
|
||||
): Promise<void> {
|
||||
await this.minisearch.addAllAsync(documents, { chunkSize })
|
||||
}
|
||||
|
||||
public addSingleToMinisearch(document: IndexedDocument): void {
|
||||
@@ -300,7 +288,7 @@ export class SearchEngine {
|
||||
|
||||
// #endregion
|
||||
|
||||
public async writeToCache(): Promise<void> {
|
||||
await cacheManager.writeMinisearchCache(this.minisearch)
|
||||
public async writeToCache(documents: IndexedDocument[]): Promise<void> {
|
||||
await cacheManager.writeMinisearchCache(this.minisearch, documents)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import {
|
||||
Notice,
|
||||
Platform,
|
||||
Plugin,
|
||||
PluginSettingTab,
|
||||
@@ -6,6 +7,7 @@ import {
|
||||
SliderComponent,
|
||||
} from 'obsidian'
|
||||
import { writable } from 'svelte/store'
|
||||
import { database } from './database'
|
||||
import type OmnisearchPlugin from './main'
|
||||
|
||||
interface WeightingSettings {
|
||||
@@ -143,8 +145,7 @@ export class SettingsTab extends PluginSettingTab {
|
||||
})
|
||||
)
|
||||
|
||||
// PDF Indexing - disabled on iOS
|
||||
if (!Platform.isIosApp) {
|
||||
// PDF Indexing
|
||||
const indexPDFsDesc = new DocumentFragment()
|
||||
indexPDFsDesc.createSpan({}, span => {
|
||||
span.innerHTML = `Omnisearch will include PDFs in search results.
|
||||
@@ -165,7 +166,7 @@ export class SettingsTab extends PluginSettingTab {
|
||||
await saveSettings(this.plugin)
|
||||
})
|
||||
)
|
||||
}
|
||||
|
||||
// #endregion Behavior
|
||||
|
||||
// #region User Interface
|
||||
@@ -276,6 +277,29 @@ export class SettingsTab extends PluginSettingTab {
|
||||
.addSlider(cb => this.weightSlider(cb, 'weightH3'))
|
||||
|
||||
// #endregion Results Weighting
|
||||
|
||||
// #region Danger Zone
|
||||
|
||||
new Setting(containerEl).setName('Danger Zone').setHeading()
|
||||
|
||||
const resetCacheDesc = new DocumentFragment()
|
||||
resetCacheDesc.createSpan({}, span => {
|
||||
span.innerHTML = `Erase all Omnisearch cache data.
|
||||
Use this if Omnisearch results are inconsistent, missing, or appear outdated.<br>
|
||||
<strong style="color: var(--text-accent)">Needs a restart to fully take effect.</strong>`
|
||||
})
|
||||
new Setting(containerEl)
|
||||
.setName('Clear cache data')
|
||||
.setDesc(resetCacheDesc)
|
||||
.addButton(cb => {
|
||||
cb.setButtonText('Clear cache')
|
||||
cb.onClick(async () => {
|
||||
await database.clearCache()
|
||||
new Notice('Omnisearch - Cache cleared. Please restart Obsidian.')
|
||||
})
|
||||
})
|
||||
|
||||
//#endregion Danger Zone
|
||||
}
|
||||
|
||||
weightSlider(cb: SliderComponent, key: keyof WeightingSettings): void {
|
||||
@@ -325,11 +349,6 @@ export let settings = Object.assign({}, DEFAULT_SETTINGS) as OmnisearchSettings
|
||||
export async function loadSettings(plugin: Plugin): Promise<void> {
|
||||
settings = Object.assign({}, DEFAULT_SETTINGS, await plugin.loadData())
|
||||
|
||||
// Make sure that PDF indexing is disabled on iOS
|
||||
if (Platform.isIosApp) {
|
||||
settings.PDFIndexing = false
|
||||
}
|
||||
|
||||
showExcerpt.set(settings.showExcerpt)
|
||||
}
|
||||
|
||||
|
||||
@@ -68,5 +68,6 @@
|
||||
"1.7.7": "1.0.0",
|
||||
"1.7.8": "1.0.0",
|
||||
"1.7.9": "1.0.0",
|
||||
"1.7.10": "1.0.0"
|
||||
"1.7.10": "1.0.0",
|
||||
"1.8.0-beta.1": "1.0.0"
|
||||
}
|
||||
Reference in New Issue
Block a user