diff --git a/rollup.config.js b/rollup.config.js index 8c69a5d..e21b160 100644 --- a/rollup.config.js +++ b/rollup.config.js @@ -14,7 +14,7 @@ if you want to view the source visit the plugins github repository */ ` -const production = false//!process.env.ROLLUP_WATCH +const production = !process.env.ROLLUP_WATCH export default { input: './src/main.ts', diff --git a/src/__tests__/event-bus-tests.ts b/src/__tests__/event-bus-tests.ts index 3da3d6f..f59a14b 100644 --- a/src/__tests__/event-bus-tests.ts +++ b/src/__tests__/event-bus-tests.ts @@ -1,4 +1,4 @@ -import { EventBus } from '../event-bus' +import { EventBus } from '../tools/event-bus' describe('EventBus', () => { it('should refuse the registering of invalid ctx/event names', () => { diff --git a/src/__tests__/query-tests.ts b/src/__tests__/query-tests.ts index 49c98bd..36bdc6a 100644 --- a/src/__tests__/query-tests.ts +++ b/src/__tests__/query-tests.ts @@ -1,4 +1,4 @@ -import { Query } from '../query' +import { Query } from '../search/query' describe('The Query class', () => { const stringQuery = diff --git a/src/__tests__/utils-tests.ts b/src/__tests__/utils-tests.ts index 986b458..043466d 100644 --- a/src/__tests__/utils-tests.ts +++ b/src/__tests__/utils-tests.ts @@ -1,5 +1,5 @@ import type { CachedMetadata } from 'obsidian' -import { getAliasesFromMetadata } from '../utils' +import { getAliasesFromMetadata } from '../tools/utils' describe('Utils', () => { describe('getAliasesFromMetadata', () => { diff --git a/src/cache-manager.ts b/src/cache-manager.ts index 54d4740..96a629e 100644 --- a/src/cache-manager.ts +++ b/src/cache-manager.ts @@ -1,120 +1,50 @@ -import { throttle } from 'lodash-es' -import type MiniSearch from 'minisearch' import type { TFile } from 'obsidian' -import { deflate, inflate } from 'pako' -import { - notesCacheFilePath, - minisearchCacheFilePath, - type IndexedDocument, -} from './globals' -import { settings } from './settings' +import type { IndexedDocument } from './globals' class CacheManager { - notesCache: Record = {} - compress = true - writeInterval = 5_000 // In milliseconds + private documentsCache: Map = new Map() + private writeInterval = 10_000 // In milliseconds - //#region Minisearch - - /** - * Serializes and writes the Minisearch index on the disk - */ - public writeMinisearchIndex = throttle( - this._writeMinisearchIndex, - this.writeInterval, - { - leading: true, - trailing: true, - } - ) - private async _writeMinisearchIndex(minisearch: MiniSearch): Promise { - if (!settings.persistCache) { - return - } - const json = JSON.stringify(minisearch) - const data = this.compress ? deflate(json) : json - await app.vault.adapter.writeBinary(minisearchCacheFilePath, data as any) - console.log('Omnisearch - Minisearch index saved on disk') + public async updateDocument(path: string, note: IndexedDocument) { + this.documentsCache.set(path, note) } - public async readMinisearchIndex(): Promise { - if (!settings.persistCache) { - return null - } - if (await app.vault.adapter.exists(minisearchCacheFilePath)) { - try { - const data = await app.vault.adapter.readBinary(minisearchCacheFilePath) - return ( - this.compress ? new TextDecoder('utf8').decode(inflate(data)) : data - ) as any - } catch (e) { - console.trace( - 'Omnisearch - Could not load MiniSearch index from the file:' - ) - console.warn(e) - app.vault.adapter.remove(minisearchCacheFilePath) - } - } - return null + public deleteDocument(key: string): void { + this.documentsCache.delete(key) } - //#endregion Minisearch - - public async loadNotesCache() { - if (!settings.persistCache) { - return null - } - if (await app.vault.adapter.exists(notesCacheFilePath)) { - try { - const data = await app.vault.adapter.readBinary(notesCacheFilePath) - const json = ( - this.compress ? new TextDecoder('utf8').decode(inflate(data)) : data - ) as any - this.notesCache = JSON.parse(json) - } catch (e) { - console.trace('Omnisearch - Could not load notes cache:') - console.warn(e) - app.vault.adapter.remove(notesCacheFilePath) - } - } - return null - } - - public saveNotesCache = throttle(this._saveNotesCache, this.writeInterval, { - leading: true, - trailing: true, - }) - private async _saveNotesCache() { - if (!settings.persistCache) { - return - } - const json = JSON.stringify(this.notesCache) - const data = this.compress ? deflate(json) : json - await app.vault.adapter.writeBinary(notesCacheFilePath, data as any) - console.log('Omnisearch - Notes cache saved on disk') - } - - public addNoteToMemCache(path: string, note: IndexedDocument) { - this.notesCache[path] = note - this.saveNotesCache() - } - - public removeNoteFromMemCache(key: string): void { - delete this.notesCache[key] - } - - public getNoteFromMemCache(key: string): IndexedDocument | undefined { - return this.notesCache[key] + public getDocument(key: string): IndexedDocument | undefined { + return this.documentsCache.get(key) } public getNonExistingNotesFromMemCache(): IndexedDocument[] { - return Object.values(this.notesCache).filter(note => note.doesNotExist) + return Object.values(this.documentsCache).filter(note => note.doesNotExist) } - public isNoteInMemCacheOutdated(file: TFile): boolean { - const indexedNote = this.getNoteFromMemCache(file.path) + public isDocumentOutdated(file: TFile): boolean { + const indexedNote = this.getDocument(file.path) return !indexedNote || indexedNote.mtime !== file.stat.mtime } + + // private async _writeMinisearchIndex(minisearch: MiniSearch): Promise { + // if (!settings.persistCache) { + // return + // } + // const json = JSON.stringify(minisearch) + // const data = deflate(json) + // await app.vault.adapter.writeBinary(minisearchCacheFilePath, data as any) + // console.log('Omnisearch - Minisearch index saved on disk') + // } + // + // private async _saveNotesCache() { + // if (!settings.persistCache) { + // return + // } + // const json = JSON.stringify(Array.from(this.documentsCache.entries())) + // const data = deflate(json) + // await app.vault.adapter.writeBinary(notesCacheFilePath, data as any) + // console.log('Omnisearch - Notes cache saved on disk') + // } } export const cacheManager = new CacheManager() diff --git a/src/components/ModalInFile.svelte b/src/components/ModalInFile.svelte index 3012a59..93d3d59 100644 --- a/src/components/ModalInFile.svelte +++ b/src/components/ModalInFile.svelte @@ -10,16 +10,16 @@ type ResultNote, type SearchMatch, } from 'src/globals' - import { loopIndex } from 'src/utils' + import { loopIndex } from 'src/tools/utils' import { onDestroy, onMount, tick } from 'svelte' import { MarkdownView } from 'obsidian' - import * as Search from 'src/search' + import * as Search from 'src/search/search' import ModalContainer from './ModalContainer.svelte' - import { OmnisearchInFileModal, OmnisearchVaultModal } from 'src/modals' + import { OmnisearchInFileModal, OmnisearchVaultModal } from 'src/components/modals' import ResultItemInFile from './ResultItemInFile.svelte' - import { Query } from 'src/query' - import { openNote } from 'src/notes' - import { saveSearchHistory } from '../search-history' + import { Query } from 'src/search/query' + import { openNote } from 'src/tools/notes' + import { saveSearchHistory } from '../search/search-history' export let modal: OmnisearchInFileModal export let parent: OmnisearchVaultModal | null = null diff --git a/src/components/ModalVault.svelte b/src/components/ModalVault.svelte index 353a827..792f0ce 100644 --- a/src/components/ModalVault.svelte +++ b/src/components/ModalVault.svelte @@ -4,13 +4,13 @@ import InputSearch from './InputSearch.svelte' import ModalContainer from './ModalContainer.svelte' import { eventBus, type ResultNote } from 'src/globals' - import { createNote, openNote } from 'src/notes' - import * as Search from 'src/search' - import { getCtrlKeyLabel, getExtension, loopIndex } from 'src/utils' - import { OmnisearchInFileModal, type OmnisearchVaultModal } from 'src/modals' + import { createNote, openNote } from 'src/tools/notes' + import * as Search from 'src/search/search' + import { getCtrlKeyLabel, getExtension, loopIndex } from 'src/tools/utils' + import { OmnisearchInFileModal, type OmnisearchVaultModal } from 'src/components/modals' import ResultItemVault from './ResultItemVault.svelte' - import { Query } from 'src/query' - import { saveSearchHistory, searchHistory } from 'src/search-history' + import { Query } from 'src/search/query' + import { saveSearchHistory, searchHistory } from 'src/search/search-history' import { settings } from '../settings' import * as NotesIndex from '../notes-index' diff --git a/src/components/ResultItemInFile.svelte b/src/components/ResultItemInFile.svelte index b4221e5..3f35c51 100644 --- a/src/components/ResultItemInFile.svelte +++ b/src/components/ResultItemInFile.svelte @@ -1,6 +1,6 @@ diff --git a/src/modals.ts b/src/components/modals.ts similarity index 94% rename from src/modals.ts rename to src/components/modals.ts index b74a709..508ccd3 100644 --- a/src/modals.ts +++ b/src/components/modals.ts @@ -1,8 +1,8 @@ import { App, Modal, TFile } from 'obsidian' -import ModalVault from './components/ModalVault.svelte' -import ModalInFile from './components/ModalInFile.svelte' -import {eventBus, EventNames, isInputComposition} from './globals' -import { settings } from './settings' +import ModalVault from './ModalVault.svelte' +import ModalInFile from './ModalInFile.svelte' +import {eventBus, EventNames, isInputComposition} from '../globals' +import { settings } from '../settings' abstract class OmnisearchModal extends Modal { protected constructor(app: App) { diff --git a/src/database.ts b/src/database.ts index 97cd728..5565aac 100644 --- a/src/database.ts +++ b/src/database.ts @@ -1,15 +1,23 @@ import Dexie from 'dexie' +import type { IndexedDocument } from './globals' class OmnisearchCache extends Dexie { pdf!: Dexie.Table< { path: string; hash: string; size: number; text: string }, string > + documents!: Dexie.Table< + { document: IndexedDocument; path: string; mtime: number }, + string + > + minisearch!: Dexie.Table constructor() { super(app.appId + '_omnisearch') - this.version(1).stores({ + this.version(2).stores({ pdf: 'path, hash, size, text', + documents: 'path, mtime, document', + minisearch: 'data', }) } } diff --git a/src/file-loader.ts b/src/file-loader.ts new file mode 100644 index 0000000..667f997 --- /dev/null +++ b/src/file-loader.ts @@ -0,0 +1,92 @@ +import { cacheManager } from './cache-manager' +import { + extractHeadingsFromCache, + getAliasesFromMetadata, + getTagsFromMetadata, + isFilePlaintext, + removeDiacritics, +} from './tools/utils' +import * as NotesIndex from './notes-index' +import type { TFile } from 'obsidian' +import type { IndexedDocument } from './globals' +import { pdfManager } from './pdf/pdf-manager' +import { getNonExistingNotes } from './tools/notes' + +/** + * Return all plaintext files as IndexedDocuments + */ +export async function getPlainTextFiles(): Promise { + const allFiles = app.vault.getFiles().filter(f => isFilePlaintext(f.path)) + const data: IndexedDocument[] = [] + for (const file of allFiles) { + const doc = await fileToIndexedDocument(file) + data.push(doc) + await cacheManager.updateDocument(file.path, doc) + } + return data +} + +/** + * Return all PDF files as IndexedDocuments. + * If a PDF isn't cached, it will be read from the disk and added to the IndexedDB + */ +export async function getPDFFiles(): Promise { + const allFiles = app.vault.getFiles().filter(f => f.path.endsWith('.pdf')) + const data: IndexedDocument[] = [] + + const input = [] + for (const file of allFiles) { + input.push( + NotesIndex.processQueue(async () => { + const doc = await fileToIndexedDocument(file) + cacheManager.updateDocument(file.path, doc) + data.push(doc) + }) + ) + } + await Promise.all(input) + return data +} + +/** + * Convert a file into an IndexedDocument. + * Will use the cache if possible. + * @param file + */ +export async function fileToIndexedDocument( + file: TFile +): Promise { + let content: string + if (isFilePlaintext(file.path)) { + content = removeDiacritics(await app.vault.cachedRead(file)) + } else if (file.path.endsWith('.pdf')) { + content = removeDiacritics(await pdfManager.getPdfText(file)) + } else { + throw new Error('Invalid file: ' + file.path) + } + + content = removeDiacritics(content) + const metadata = app.metadataCache.getFileCache(file) + + // Look for links that lead to non-existing files, + // and add them to the index. + if (metadata) { + const nonExisting = getNonExistingNotes(file, metadata) + for (const name of nonExisting.filter(o => !cacheManager.getDocument(o))) { + NotesIndex.addNonExistingToIndex(name, file.path) + } + } + + return { + basename: removeDiacritics(file.basename), + content, + path: file.path, + mtime: file.stat.mtime, + + tags: getTagsFromMetadata(metadata), + aliases: getAliasesFromMetadata(metadata).join(''), + headings1: metadata ? extractHeadingsFromCache(metadata, 1).join(' ') : '', + headings2: metadata ? extractHeadingsFromCache(metadata, 2).join(' ') : '', + headings3: metadata ? extractHeadingsFromCache(metadata, 3).join(' ') : '', + } +} diff --git a/src/globals.ts b/src/globals.ts index 6f46587..48e01b3 100644 --- a/src/globals.ts +++ b/src/globals.ts @@ -1,6 +1,4 @@ -import pLimit from 'p-limit' -import { EventBus } from './event-bus' -import { settings } from './settings' +import { EventBus } from './tools/event-bus' export const regexLineSplit = /\r?\n|\r|((\.|\?|!)( |\r?\n|\r))/g export const regexYaml = /^---\s*\n(.*?)\n?^---\s?/ms @@ -14,8 +12,6 @@ export const highlightClass = 'suggestion-highlight omnisearch-highlight' export const eventBus = new EventBus() -export const minisearchCacheFilePath = `${app.vault.configDir}/plugins/omnisearch/searchIndex.data` -export const notesCacheFilePath = `${app.vault.configDir}/plugins/omnisearch/notesCache.data` export const historyFilePath = `${app.vault.configDir}/plugins/omnisearch/historyCache.json` export const EventNames = { diff --git a/src/main.ts b/src/main.ts index e2db52c..2108b2c 100644 --- a/src/main.ts +++ b/src/main.ts @@ -1,29 +1,26 @@ import { Notice, Plugin, TFile } from 'obsidian' -import * as Search from './search' -import { OmnisearchInFileModal, OmnisearchVaultModal } from './modals' +import * as Search from './search/search' +import { + OmnisearchInFileModal, + OmnisearchVaultModal, +} from './components/modals' import { loadSettings, settings, SettingsTab, showExcerpt } from './settings' import { eventBus, EventNames } from './globals' import { registerAPI } from '@vanakat/plugin-api' -import api from './api' -import { loadSearchHistory } from './search-history' -import { isFilePlaintext } from './utils' +import api from './tools/api' +import { loadSearchHistory } from './search/search-history' +import { isFilePlaintext } from './tools/utils' import * as NotesIndex from './notes-index' -import { cacheManager } from './cache-manager' - -function _registerAPI(plugin: OmnisearchPlugin): void { - registerAPI('omnisearch', api, plugin as any) - ;(app as any).plugins.plugins.omnisearch.api = api - plugin.register(() => { - delete (app as any).plugins.plugins.omnisearch.api - }) -} +import * as FileLoader from './file-loader' export default class OmnisearchPlugin extends Plugin { async onload(): Promise { await cleanOldCacheFiles() await loadSettings(this) await loadSearchHistory() - await cacheManager.loadNotesCache() + + // Initialize minisearch + await Search.initSearchEngine() _registerAPI(this) @@ -69,7 +66,7 @@ export default class OmnisearchPlugin extends Plugin { ) this.registerEvent( this.app.vault.on('modify', async file => { - NotesIndex.addNoteToReindex(file) + NotesIndex.markNoteForReindex(file) }) ) this.registerEvent( @@ -81,7 +78,7 @@ export default class OmnisearchPlugin extends Plugin { }) ) - await Search.initGlobalSearchIndex() + await populateIndex() }) // showWelcomeNotice(this) @@ -99,11 +96,36 @@ export default class OmnisearchPlugin extends Plugin { } } +/** + * Read the files and feed them to Minisearch + */ +async function populateIndex(): Promise { + // Load plain text files + console.time('Omnisearch - Timing') + const files = await FileLoader.getPlainTextFiles() + // Index them + await Search.addAllToMinisearch(files) + console.log(`Omnisearch - Indexed ${files.length} notes`) + console.timeEnd('Omnisearch - Timing') + + // Load PDFs + if (settings.PDFIndexing) { + console.time('Omnisearch - Timing') + const pdfs = await FileLoader.getPDFFiles() + // Index them + await Search.addAllToMinisearch(pdfs) + console.log(`Omnisearch - Indexed ${pdfs.length} PDFs`) + console.timeEnd('Omnisearch - Timing') + } +} + async function cleanOldCacheFiles() { const toDelete = [ `${app.vault.configDir}/plugins/omnisearch/searchIndex.json`, `${app.vault.configDir}/plugins/omnisearch/notesCache.json`, - `${app.vault.configDir}/plugins/omnisearch/pdfCache.data` + `${app.vault.configDir}/plugins/omnisearch/notesCache.data`, + `${app.vault.configDir}/plugins/omnisearch/searchIndex.data`, + `${app.vault.configDir}/plugins/omnisearch/pdfCache.data`, ] for (const item of toDelete) { if (await app.vault.adapter.exists(item)) { @@ -130,3 +152,11 @@ New beta feature: PDF search 🔎📄 plugin.saveData(settings) } + +function _registerAPI(plugin: OmnisearchPlugin): void { + registerAPI('omnisearch', api, plugin as any) + ;(app as any).plugins.plugins.omnisearch.api = api + plugin.register(() => { + delete (app as any).plugins.plugins.omnisearch.api + }) +} diff --git a/src/notes-index.ts b/src/notes-index.ts index 48aada4..31ba5c1 100644 --- a/src/notes-index.ts +++ b/src/notes-index.ts @@ -1,20 +1,12 @@ import { Notice, TAbstractFile, TFile } from 'obsidian' -import { - extractHeadingsFromCache, - getAliasesFromMetadata, - getTagsFromMetadata, - isFileIndexable, - removeDiacritics, - wait, -} from './utils' -import { getNonExistingNotes, removeAnchors } from './notes' -import { pdfManager } from './pdf-manager' +import { isFileIndexable, wait } from './tools/utils' +import { removeAnchors } from './tools/notes' import { settings } from './settings' -import * as Search from './search' -// import PQueue from 'p-queue-compat' +import * as Search from './search/search' import { cacheManager } from './cache-manager' import pLimit from 'p-limit' import type { IndexedDocument } from './globals' +import { fileToIndexedDocument } from './file-loader' /** * Use this processing queue to handle all heavy work @@ -33,59 +25,21 @@ export async function addToIndexAndMemCache( return } - // Check if the file was already indexed as non-existent, - // and if so, remove it from the index (before adding it again) - if (cacheManager.getNoteFromMemCache(file.path)?.doesNotExist) { + // Check if the file was already indexed as non-existent. + // If so, remove it from the index, and add it again as a real note. + if (cacheManager.getDocument(file.path)?.doesNotExist) { removeFromIndex(file.path) } try { - // Look for links that lead to non-existing files, - // and index them as well - const metadata = app.metadataCache.getFileCache(file) - if (metadata) { - const nonExisting = getNonExistingNotes(file, metadata) - for (const name of nonExisting.filter( - o => !cacheManager.getNoteFromMemCache(o) - )) { - addNonExistingToIndex(name, file.path) - } - } - - if (cacheManager.getNoteFromMemCache(file.path)) { + if (cacheManager.getDocument(file.path)) { throw new Error(`${file.basename} is already indexed`) } - let content - if (file.path.endsWith('.pdf')) { - content = removeDiacritics(await pdfManager.getPdfText(file as TFile)) - } else { - // Fetch content from the cache to index it as-is - content = removeDiacritics(await app.vault.cachedRead(file)) - } - // Make the document and index it - const note: IndexedDocument = { - basename: removeDiacritics(file.basename), - content, - path: file.path, - mtime: file.stat.mtime, - - tags: getTagsFromMetadata(metadata), - aliases: getAliasesFromMetadata(metadata).join(''), - headings1: metadata - ? extractHeadingsFromCache(metadata, 1).join(' ') - : '', - headings2: metadata - ? extractHeadingsFromCache(metadata, 2).join(' ') - : '', - headings3: metadata - ? extractHeadingsFromCache(metadata, 3).join(' ') - : '', - } - - Search.minisearchInstance.add(note) - cacheManager.addNoteToMemCache(note.path, note) + const note = await fileToIndexedDocument(file) + Search.addSingleToMinisearch(note) + await cacheManager.updateDocument(note.path, note) } catch (e) { // console.trace('Error while indexing ' + file.basename) console.error(e) @@ -101,7 +55,7 @@ export async function addToIndexAndMemCache( export function addNonExistingToIndex(name: string, parent: string): void { name = removeAnchors(name) const filename = name + (name.endsWith('.md') ? '' : '.md') - if (cacheManager.getNoteFromMemCache(filename)) return + if (cacheManager.getDocument(filename)) return const note: IndexedDocument = { path: filename, @@ -118,29 +72,30 @@ export function addNonExistingToIndex(name: string, parent: string): void { doesNotExist: true, parent, } - Search.minisearchInstance.add(note) - cacheManager.addNoteToMemCache(filename, note) + Search.addSingleToMinisearch(note) + cacheManager.updateDocument(filename, note) } /** - * Removes a file from the index, by its path - * @param path + * Removes a file from the index, by its path. */ export function removeFromIndex(path: string): void { if (!isFileIndexable(path)) { console.info(`"${path}" is not an indexable file`) return } - const note = cacheManager.getNoteFromMemCache(path) + const note = cacheManager.getDocument(path) if (note) { - Search.minisearchInstance.remove(note) - cacheManager.removeNoteFromMemCache(path) - cacheManager - .getNonExistingNotesFromMemCache() - .filter(n => n.parent === path) - .forEach(n => { - removeFromIndex(n.path) - }) + Search.removeFromMinisearch(note) + cacheManager.deleteDocument(path) + + // FIXME: only remove non-existing notes if they don't have another parent + // cacheManager + // .getNonExistingNotesFromMemCache() + // .filter(n => n.parent === path) + // .forEach(n => { + // removeFromIndex(n.path) + // }) } else { console.warn(`Omnisearch - Note not found under path ${path}`) } @@ -148,7 +103,11 @@ export function removeFromIndex(path: string): void { const notesToReindex = new Set() -export function addNoteToReindex(note: TAbstractFile): void { +/** + * Updated notes are not reindexed immediately for performance reasons. + * They're added to a list, and reindex is done the next time we open Omnisearch. + */ +export function markNoteForReindex(note: TAbstractFile): void { notesToReindex.add(note) } @@ -163,35 +122,5 @@ export async function refreshIndex(): Promise { await wait(0) } notesToReindex.clear() - await cacheManager.writeMinisearchIndex(Search.minisearchInstance) - } -} - -export async function indexPDFs() { - if (settings.PDFIndexing) { - const files = app.vault.getFiles().filter(f => f.path.endsWith('.pdf')) - console.time('PDF Indexing') - console.log(`Omnisearch - Indexing ${files.length} PDFs`) - const input = [] - for (const file of files) { - if (cacheManager.getNoteFromMemCache(file.path)) { - removeFromIndex(file.path) - } - input.push( - processQueue(async () => { - await addToIndexAndMemCache(file) - await cacheManager.writeMinisearchIndex(Search.minisearchInstance) - }) - ) - } - await Promise.all(input) - // await pdfQueue.onEmpty() - console.timeEnd('PDF Indexing') - - if (settings.showIndexingNotices) { - new Notice(`Omnisearch - Indexed ${files.length} PDFs`) - } - - await pdfManager.cleanCache() } } diff --git a/src/pdf-manager.ts b/src/pdf/pdf-manager.ts similarity index 97% rename from src/pdf-manager.ts rename to src/pdf/pdf-manager.ts index 0fa1348..aebd726 100644 --- a/src/pdf-manager.ts +++ b/src/pdf/pdf-manager.ts @@ -1,7 +1,7 @@ import type { TFile } from 'obsidian' import WebWorker from 'web-worker:./pdf-worker.ts' -import { makeMD5 } from './utils' -import { database } from './database' +import { makeMD5 } from '../tools/utils' +import { database } from '../database' const workerTimeout = 120_000 diff --git a/src/pdf-worker.ts b/src/pdf/pdf-worker.ts similarity index 84% rename from src/pdf-worker.ts rename to src/pdf/pdf-worker.ts index 6be1289..828d521 100644 --- a/src/pdf-worker.ts +++ b/src/pdf/pdf-worker.ts @@ -1,5 +1,5 @@ -import rustPlugin from '../pkg/obsidian_search_bg.wasm' -import * as plugin from '../pkg/obsidian_search' +import rustPlugin from '../../pkg/obsidian_search_bg.wasm' +import * as plugin from '../../pkg' const decodedPlugin = decodeBase64(rustPlugin as any) diff --git a/src/query.ts b/src/search/query.ts similarity index 87% rename from src/query.ts rename to src/search/query.ts index 1bb8d2a..389436d 100644 --- a/src/query.ts +++ b/src/search/query.ts @@ -1,6 +1,6 @@ -import { settings } from './settings' -import { removeDiacritics, stripSurroundingQuotes } from './utils' -import { parseQuery } from './vendor/parse-query' +import { settings } from '../settings' +import { removeDiacritics, stripSurroundingQuotes } from '../tools/utils' +import { parseQuery } from '../vendor/parse-query' type QueryToken = { /** diff --git a/src/search-history.ts b/src/search/search-history.ts similarity index 93% rename from src/search-history.ts rename to src/search/search-history.ts index 4a0b5e5..6c3b2d1 100644 --- a/src/search-history.ts +++ b/src/search/search-history.ts @@ -1,4 +1,4 @@ -import { historyFilePath } from './globals' +import { historyFilePath } from '../globals' export let searchHistory: string[] = [] diff --git a/src/search.ts b/src/search/search.ts similarity index 63% rename from src/search.ts rename to src/search/search.ts index 0782d2f..b399011 100644 --- a/src/search.ts +++ b/src/search/search.ts @@ -1,25 +1,25 @@ -import { Notice } from 'obsidian' -import MiniSearch, { type Options, type SearchResult } from 'minisearch' +import MiniSearch, { + type AsPlainObject, + type Options, + type SearchResult, +} from 'minisearch' import { chsRegex, type IndexedDocument, type ResultNote, - minisearchCacheFilePath, type SearchMatch, SPACE_OR_PUNCTUATION, -} from './globals' +} from '../globals' import { - isFilePlaintext, removeDiacritics, stringsToRegex, stripMarkdownCharacters, -} from './utils' +} from '../tools/utils' import type { Query } from './query' -import { settings } from './settings' -import * as NotesIndex from './notes-index' -import { cacheManager } from './cache-manager' +import { settings } from '../settings' +import { cacheManager } from '../cache-manager' -export let minisearchInstance: MiniSearch +let minisearchInstance: MiniSearch const tokenize = (text: string): string[] => { const tokens = text.split(SPACE_OR_PUNCTUATION) @@ -32,98 +32,38 @@ const tokenize = (text: string): string[] => { } else return tokens } +const minisearchOptions: Options = { + tokenize, + processTerm: (term: string) => + (settings.ignoreDiacritics ? removeDiacritics(term) : term).toLowerCase(), + idField: 'path', + fields: [ + 'basename', + 'aliases', + 'content', + 'headings1', + 'headings2', + 'headings3', + ], + storeFields: ['tags'], +} + /** * Initializes the MiniSearch instance, * and adds all the notes to the index */ -export async function initGlobalSearchIndex(): Promise { - const options: Options = { - tokenize, - processTerm: (term: string) => - (settings.ignoreDiacritics ? removeDiacritics(term) : term).toLowerCase(), - idField: 'path', - fields: [ - 'basename', - 'aliases', - 'content', - 'headings1', - 'headings2', - 'headings3', - ], - storeFields: ['tags'], - } - +export async function initSearchEngine(): Promise { // Default instance - minisearchInstance = new MiniSearch(options) + minisearchInstance = new MiniSearch(minisearchOptions) +} - // Load Minisearch cache, if it exists - if (await app.vault.adapter.exists(minisearchCacheFilePath)) { - try { - const json = await cacheManager.readMinisearchIndex() - if (json) { - // If we have cache data, reload it - minisearchInstance = MiniSearch.loadJSON(json, options) - } - console.log('Omnisearch - MiniSearch index loaded from the file') - } catch (e) { - console.trace( - 'Omnisearch - Could not load MiniSearch index from the file' - ) - console.error(e) - } - } - - // if (!minisearchInstance) { - // resetNotesCache() - // } - - // Index files that are already present - const start = new Date().getTime() - - const allFiles = app.vault.getFiles().filter(f => isFilePlaintext(f.path)) - - let files - let notesSuffix - if (settings.persistCache) { - files = allFiles.filter(file => cacheManager.isNoteInMemCacheOutdated(file)) - notesSuffix = 'modified notes' - } else { - files = allFiles - notesSuffix = 'notes' - } - - if (files.length > 0) { - console.log(`Omnisearch - Indexing ${files.length} ${notesSuffix}`) - } - - // Read and index all the files into the search engine - const input = [] - for (const file of files) { - if (cacheManager.getNoteFromMemCache(file.path)) { - NotesIndex.removeFromIndex(file.path) - } - input.push( - NotesIndex.processQueue(() => NotesIndex.addToIndexAndMemCache(file)) - ) - } - - await Promise.all(input) - - if (files.length > 0) { - const message = `Omnisearch - Indexed ${files.length} ${notesSuffix} in ${ - new Date().getTime() - start - }ms` - - console.log(message) - - if (settings.showIndexingNotices) { - new Notice(message) - } - - await cacheManager.writeMinisearchIndex(minisearchInstance) - - // PDFs are indexed later, since they're heavier - await NotesIndex.indexPDFs() +export async function initSearchEngineFromData(json: string): Promise { + try { + minisearchInstance = MiniSearch.loadJSON(json, minisearchOptions) + console.log('Omnisearch - MiniSearch index loaded from the file') + } catch (e) { + console.error('Omnisearch - Could not load MiniSearch index from json') + console.error(e) } } @@ -165,10 +105,9 @@ async function search(query: Query): Promise { const exactTerms = query.getExactTerms() if (exactTerms.length) { results = results.filter(r => { - const title = - cacheManager.getNoteFromMemCache(r.id)?.path.toLowerCase() ?? '' + const title = cacheManager.getDocument(r.id)?.path.toLowerCase() ?? '' const content = stripMarkdownCharacters( - cacheManager.getNoteFromMemCache(r.id)?.content ?? '' + cacheManager.getDocument(r.id)?.content ?? '' ).toLowerCase() return exactTerms.every(q => content.includes(q) || title.includes(q)) }) @@ -179,7 +118,7 @@ async function search(query: Query): Promise { if (exclusions.length) { results = results.filter(r => { const content = stripMarkdownCharacters( - cacheManager.getNoteFromMemCache(r.id)?.content ?? '' + cacheManager.getDocument(r.id)?.content ?? '' ).toLowerCase() return exclusions.every(q => !content.includes(q.value)) }) @@ -247,7 +186,7 @@ export async function getSuggestions( // Map the raw results to get usable suggestions return results.map(result => { - const note = cacheManager.getNoteFromMemCache(result.id) + const note = cacheManager.getDocument(result.id) if (!note) { throw new Error(`Note "${result.id}" not indexed`) } @@ -286,3 +225,25 @@ export async function getSuggestions( return resultNote }) } + +// #region Read/write minisearch index + +export function getMinisearchIndexJSON(): AsPlainObject { + return minisearchInstance.toJSON() +} + +export async function addAllToMinisearch( + documents: IndexedDocument[] +): Promise { + await minisearchInstance.addAllAsync(documents) +} + +export function addSingleToMinisearch(document: IndexedDocument): void { + minisearchInstance.add(document) +} + +export function removeFromMinisearch(document: IndexedDocument): void { + minisearchInstance.remove(document) +} + +// #endregion \ No newline at end of file diff --git a/src/settings.ts b/src/settings.ts index 93e8d1b..62389f6 100644 --- a/src/settings.ts +++ b/src/settings.ts @@ -6,7 +6,6 @@ import { SliderComponent, } from 'obsidian' import { writable } from 'svelte/store' -import { notesCacheFilePath, minisearchCacheFilePath } from './globals' import type OmnisearchPlugin from './main' interface WeightingSettings { @@ -28,7 +27,7 @@ export interface OmnisearchSettings extends WeightingSettings { /** Max number of spawned processes for background tasks, such as extracting text from PDFs */ backgroundProcesses: number /** Write cache files on disk (unrelated to PDFs) */ - persistCache: boolean + // persistCache: boolean /** Display Omnisearch popup notices over Obsidian */ showIndexingNotices: boolean /** Activate the small 🔍 button on Obsidian's ribbon */ @@ -141,37 +140,27 @@ export class SettingsTab extends PluginSettingTab { // }) // }) - // Store index - const serializedIndexDesc = new DocumentFragment() - serializedIndexDesc.createSpan({}, span => { - span.innerHTML = `This will speedup startup times after the initial indexing. Do not activate it unless indexing is too slow on your device: -
    -
  • PDF indexing is not affected by this setting
  • -
  • ⚠️ The index can become corrupted - if you notice any issue, disable and re-enable this option to clear the cache.
  • -
  • ⚠️ Cache files in .obsidian/plugins/omnisearch/*.data must not be synchronized between your devices.
  • -
- Needs a restart to fully take effect. - ` - }) - new Setting(containerEl) - .setName('Persist cache on disk') - .setDesc(serializedIndexDesc) - .addToggle(toggle => - toggle.setValue(settings.persistCache).onChange(async v => { - try { - await app.vault.adapter.remove(notesCacheFilePath) - } catch (e) { - console.warn(e) - } - try { - await app.vault.adapter.remove(minisearchCacheFilePath) - } catch (e) { - console.warn(e) - } - settings.persistCache = v - await saveSettings(this.plugin) - }) - ) + // // Store index + // const serializedIndexDesc = new DocumentFragment() + // serializedIndexDesc.createSpan({}, span => { + // span.innerHTML = `This will speedup startup times after the initial indexing. Do not activate it unless indexing is too slow on your device: + //
    + //
  • PDF indexing is not affected by this setting
  • + //
  • ⚠️ The index can become corrupted - if you notice any issue, disable and re-enable this option to clear the cache.
  • + //
  • ⚠️ Cache files in .obsidian/plugins/omnisearch/*.data must not be synchronized between your devices.
  • + //
+ // Needs a restart to fully take effect. + // ` + // }) + // new Setting(containerEl) + // .setName('Persist cache on disk') + // .setDesc(serializedIndexDesc) + // .addToggle(toggle => + // toggle.setValue(settings.persistCache).onChange(async v => { + // settings.persistCache = v + // await saveSettings(this.plugin) + // }) + // ) // PDF Indexing const indexPDFsDesc = new DocumentFragment() @@ -363,7 +352,7 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = { CtrlJK: false, CtrlNP: false, - persistCache: false, + // persistCache: false, welcomeMessage: '', } as const diff --git a/src/api.ts b/src/tools/api.ts similarity index 83% rename from src/api.ts rename to src/tools/api.ts index 663af82..2e5523d 100644 --- a/src/api.ts +++ b/src/tools/api.ts @@ -1,6 +1,6 @@ -import type { ResultNote, SearchMatch } from './globals' -import { Query } from './query' -import * as Search from './search' +import type { ResultNote, SearchMatch } from '../globals' +import { Query } from '../search/query' +import * as Search from '../search/search' type ResultNoteApi = { score: number diff --git a/src/event-bus.ts b/src/tools/event-bus.ts similarity index 100% rename from src/event-bus.ts rename to src/tools/event-bus.ts diff --git a/src/notes.ts b/src/tools/notes.ts similarity index 98% rename from src/notes.ts rename to src/tools/notes.ts index 97323ce..3c02ed4 100644 --- a/src/notes.ts +++ b/src/tools/notes.ts @@ -1,6 +1,6 @@ import { type CachedMetadata, MarkdownView, TFile } from 'obsidian' import { stringsToRegex } from './utils' -import type { ResultNote } from './globals' +import type { ResultNote } from '../globals' export async function openNote( item: ResultNote, diff --git a/src/utils.ts b/src/tools/utils.ts similarity index 95% rename from src/utils.ts rename to src/tools/utils.ts index 6ae1658..f051a33 100644 --- a/src/utils.ts +++ b/src/tools/utils.ts @@ -1,5 +1,5 @@ -import { type CachedMetadata, Notice, Platform, Plugin } from 'obsidian' -import type { SearchMatch } from './globals' +import { type CachedMetadata, Platform } from 'obsidian' +import type { SearchMatch } from '../globals' import { excerptAfter, excerptBefore, @@ -8,9 +8,9 @@ import { regexLineSplit, regexStripQuotes, regexYaml, -} from './globals' -import { settings } from './settings' -import { createHash, type BinaryLike } from 'crypto' +} from '../globals' +import { settings } from '../settings' +import { type BinaryLike, createHash } from 'crypto' import { md5 } from 'pure-md5' export function highlighter(str: string): string {