Squashed commit of the following:

commit ac82511ddd17d5472ae3cfea9bbad9754f5a4d62
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Sat Oct 22 08:23:42 2022 +0200

    Screw that cache, seriously.

commit 8ba40d1be73daaaffea09e07bc56c339266db9b6
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Fri Oct 21 22:36:48 2022 +0200

    Stuff

commit 27b8fd7dc809be9714a109d3a458eb1276a47e2e
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Fri Oct 21 22:22:20 2022 +0200

    Moved files

commit fb1349c914907e586e103ca54fb04b9ddd45ef5d
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Thu Oct 20 22:25:29 2022 +0200

    Removed duplicate code

commit e7371138e60cbe4155cfd4fb44e3ee1d2e3ee088
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Thu Oct 20 21:50:09 2022 +0200

    Moved a bunch of files

commit 2ee1b2a0e799d4b41ab3a444d8cc44dfff5b5623
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Thu Oct 20 21:32:21 2022 +0200

    Removed useless code

commit 76c530dfb9adbad1bbe9079de2330fe43a044249
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Thu Oct 20 20:44:11 2022 +0200

    Split file reading and indexing
This commit is contained in:
Simon Cambier
2022-10-22 08:25:34 +02:00
parent 1376cea282
commit c2ecdd79ad
25 changed files with 338 additions and 403 deletions

View File

@@ -14,7 +14,7 @@ if you want to view the source visit the plugins github repository
*/ */
` `
const production = false//!process.env.ROLLUP_WATCH const production = !process.env.ROLLUP_WATCH
export default { export default {
input: './src/main.ts', input: './src/main.ts',

View File

@@ -1,4 +1,4 @@
import { EventBus } from '../event-bus' import { EventBus } from '../tools/event-bus'
describe('EventBus', () => { describe('EventBus', () => {
it('should refuse the registering of invalid ctx/event names', () => { it('should refuse the registering of invalid ctx/event names', () => {

View File

@@ -1,4 +1,4 @@
import { Query } from '../query' import { Query } from '../search/query'
describe('The Query class', () => { describe('The Query class', () => {
const stringQuery = const stringQuery =

View File

@@ -1,5 +1,5 @@
import type { CachedMetadata } from 'obsidian' import type { CachedMetadata } from 'obsidian'
import { getAliasesFromMetadata } from '../utils' import { getAliasesFromMetadata } from '../tools/utils'
describe('Utils', () => { describe('Utils', () => {
describe('getAliasesFromMetadata', () => { describe('getAliasesFromMetadata', () => {

View File

@@ -1,120 +1,50 @@
import { throttle } from 'lodash-es'
import type MiniSearch from 'minisearch'
import type { TFile } from 'obsidian' import type { TFile } from 'obsidian'
import { deflate, inflate } from 'pako' import type { IndexedDocument } from './globals'
import {
notesCacheFilePath,
minisearchCacheFilePath,
type IndexedDocument,
} from './globals'
import { settings } from './settings'
class CacheManager { class CacheManager {
notesCache: Record<string, IndexedDocument> = {} private documentsCache: Map<string, IndexedDocument> = new Map()
compress = true private writeInterval = 10_000 // In milliseconds
writeInterval = 5_000 // In milliseconds
//#region Minisearch public async updateDocument(path: string, note: IndexedDocument) {
this.documentsCache.set(path, note)
/**
* Serializes and writes the Minisearch index on the disk
*/
public writeMinisearchIndex = throttle(
this._writeMinisearchIndex,
this.writeInterval,
{
leading: true,
trailing: true,
}
)
private async _writeMinisearchIndex(minisearch: MiniSearch): Promise<void> {
if (!settings.persistCache) {
return
}
const json = JSON.stringify(minisearch)
const data = this.compress ? deflate(json) : json
await app.vault.adapter.writeBinary(minisearchCacheFilePath, data as any)
console.log('Omnisearch - Minisearch index saved on disk')
} }
public async readMinisearchIndex(): Promise<string | null> { public deleteDocument(key: string): void {
if (!settings.persistCache) { this.documentsCache.delete(key)
return null
}
if (await app.vault.adapter.exists(minisearchCacheFilePath)) {
try {
const data = await app.vault.adapter.readBinary(minisearchCacheFilePath)
return (
this.compress ? new TextDecoder('utf8').decode(inflate(data)) : data
) as any
} catch (e) {
console.trace(
'Omnisearch - Could not load MiniSearch index from the file:'
)
console.warn(e)
app.vault.adapter.remove(minisearchCacheFilePath)
}
}
return null
} }
//#endregion Minisearch public getDocument(key: string): IndexedDocument | undefined {
return this.documentsCache.get(key)
public async loadNotesCache() {
if (!settings.persistCache) {
return null
}
if (await app.vault.adapter.exists(notesCacheFilePath)) {
try {
const data = await app.vault.adapter.readBinary(notesCacheFilePath)
const json = (
this.compress ? new TextDecoder('utf8').decode(inflate(data)) : data
) as any
this.notesCache = JSON.parse(json)
} catch (e) {
console.trace('Omnisearch - Could not load notes cache:')
console.warn(e)
app.vault.adapter.remove(notesCacheFilePath)
}
}
return null
}
public saveNotesCache = throttle(this._saveNotesCache, this.writeInterval, {
leading: true,
trailing: true,
})
private async _saveNotesCache() {
if (!settings.persistCache) {
return
}
const json = JSON.stringify(this.notesCache)
const data = this.compress ? deflate(json) : json
await app.vault.adapter.writeBinary(notesCacheFilePath, data as any)
console.log('Omnisearch - Notes cache saved on disk')
}
public addNoteToMemCache(path: string, note: IndexedDocument) {
this.notesCache[path] = note
this.saveNotesCache()
}
public removeNoteFromMemCache(key: string): void {
delete this.notesCache[key]
}
public getNoteFromMemCache(key: string): IndexedDocument | undefined {
return this.notesCache[key]
} }
public getNonExistingNotesFromMemCache(): IndexedDocument[] { public getNonExistingNotesFromMemCache(): IndexedDocument[] {
return Object.values(this.notesCache).filter(note => note.doesNotExist) return Object.values(this.documentsCache).filter(note => note.doesNotExist)
} }
public isNoteInMemCacheOutdated(file: TFile): boolean { public isDocumentOutdated(file: TFile): boolean {
const indexedNote = this.getNoteFromMemCache(file.path) const indexedNote = this.getDocument(file.path)
return !indexedNote || indexedNote.mtime !== file.stat.mtime return !indexedNote || indexedNote.mtime !== file.stat.mtime
} }
// private async _writeMinisearchIndex(minisearch: MiniSearch): Promise<void> {
// if (!settings.persistCache) {
// return
// }
// const json = JSON.stringify(minisearch)
// const data = deflate(json)
// await app.vault.adapter.writeBinary(minisearchCacheFilePath, data as any)
// console.log('Omnisearch - Minisearch index saved on disk')
// }
//
// private async _saveNotesCache() {
// if (!settings.persistCache) {
// return
// }
// const json = JSON.stringify(Array.from(this.documentsCache.entries()))
// const data = deflate(json)
// await app.vault.adapter.writeBinary(notesCacheFilePath, data as any)
// console.log('Omnisearch - Notes cache saved on disk')
// }
} }
export const cacheManager = new CacheManager() export const cacheManager = new CacheManager()

View File

@@ -10,16 +10,16 @@
type ResultNote, type ResultNote,
type SearchMatch, type SearchMatch,
} from 'src/globals' } from 'src/globals'
import { loopIndex } from 'src/utils' import { loopIndex } from 'src/tools/utils'
import { onDestroy, onMount, tick } from 'svelte' import { onDestroy, onMount, tick } from 'svelte'
import { MarkdownView } from 'obsidian' import { MarkdownView } from 'obsidian'
import * as Search from 'src/search' import * as Search from 'src/search/search'
import ModalContainer from './ModalContainer.svelte' import ModalContainer from './ModalContainer.svelte'
import { OmnisearchInFileModal, OmnisearchVaultModal } from 'src/modals' import { OmnisearchInFileModal, OmnisearchVaultModal } from 'src/components/modals'
import ResultItemInFile from './ResultItemInFile.svelte' import ResultItemInFile from './ResultItemInFile.svelte'
import { Query } from 'src/query' import { Query } from 'src/search/query'
import { openNote } from 'src/notes' import { openNote } from 'src/tools/notes'
import { saveSearchHistory } from '../search-history' import { saveSearchHistory } from '../search/search-history'
export let modal: OmnisearchInFileModal export let modal: OmnisearchInFileModal
export let parent: OmnisearchVaultModal | null = null export let parent: OmnisearchVaultModal | null = null

View File

@@ -4,13 +4,13 @@
import InputSearch from './InputSearch.svelte' import InputSearch from './InputSearch.svelte'
import ModalContainer from './ModalContainer.svelte' import ModalContainer from './ModalContainer.svelte'
import { eventBus, type ResultNote } from 'src/globals' import { eventBus, type ResultNote } from 'src/globals'
import { createNote, openNote } from 'src/notes' import { createNote, openNote } from 'src/tools/notes'
import * as Search from 'src/search' import * as Search from 'src/search/search'
import { getCtrlKeyLabel, getExtension, loopIndex } from 'src/utils' import { getCtrlKeyLabel, getExtension, loopIndex } from 'src/tools/utils'
import { OmnisearchInFileModal, type OmnisearchVaultModal } from 'src/modals' import { OmnisearchInFileModal, type OmnisearchVaultModal } from 'src/components/modals'
import ResultItemVault from './ResultItemVault.svelte' import ResultItemVault from './ResultItemVault.svelte'
import { Query } from 'src/query' import { Query } from 'src/search/query'
import { saveSearchHistory, searchHistory } from 'src/search-history' import { saveSearchHistory, searchHistory } from 'src/search/search-history'
import { settings } from '../settings' import { settings } from '../settings'
import * as NotesIndex from '../notes-index' import * as NotesIndex from '../notes-index'

View File

@@ -1,6 +1,6 @@
<script lang="ts"> <script lang="ts">
import type { ResultNote } from '../globals' import type { ResultNote } from '../globals'
import { highlighter, makeExcerpt, stringsToRegex } from '../utils' import { highlighter, makeExcerpt, stringsToRegex } from '../tools/utils'
import ResultItemContainer from './ResultItemContainer.svelte' import ResultItemContainer from './ResultItemContainer.svelte'
export let offset: number export let offset: number

View File

@@ -2,7 +2,7 @@
import { cacheManager } from 'src/cache-manager' import { cacheManager } from 'src/cache-manager'
import { settings, showExcerpt } from 'src/settings' import { settings, showExcerpt } from 'src/settings'
import type { ResultNote } from '../globals' import type { ResultNote } from '../globals'
import { highlighter, makeExcerpt, stringsToRegex } from '../utils' import { highlighter, makeExcerpt, stringsToRegex } from '../tools/utils'
import ResultItemContainer from './ResultItemContainer.svelte' import ResultItemContainer from './ResultItemContainer.svelte'
export let selected = false export let selected = false
@@ -10,7 +10,7 @@
$: reg = stringsToRegex(note.foundWords) $: reg = stringsToRegex(note.foundWords)
$: cleanedContent = makeExcerpt(note.content, note.matches[0]?.offset ?? -1) $: cleanedContent = makeExcerpt(note.content, note.matches[0]?.offset ?? -1)
$: glyph = cacheManager.getNoteFromMemCache(note.path)?.doesNotExist $: glyph = cacheManager.getDocument(note.path)?.doesNotExist
$: title = settings.showShortName ? note.basename : note.path $: title = settings.showShortName ? note.basename : note.path
</script> </script>

View File

@@ -1,8 +1,8 @@
import { App, Modal, TFile } from 'obsidian' import { App, Modal, TFile } from 'obsidian'
import ModalVault from './components/ModalVault.svelte' import ModalVault from './ModalVault.svelte'
import ModalInFile from './components/ModalInFile.svelte' import ModalInFile from './ModalInFile.svelte'
import {eventBus, EventNames, isInputComposition} from './globals' import {eventBus, EventNames, isInputComposition} from '../globals'
import { settings } from './settings' import { settings } from '../settings'
abstract class OmnisearchModal extends Modal { abstract class OmnisearchModal extends Modal {
protected constructor(app: App) { protected constructor(app: App) {

View File

@@ -1,15 +1,23 @@
import Dexie from 'dexie' import Dexie from 'dexie'
import type { IndexedDocument } from './globals'
class OmnisearchCache extends Dexie { class OmnisearchCache extends Dexie {
pdf!: Dexie.Table< pdf!: Dexie.Table<
{ path: string; hash: string; size: number; text: string }, { path: string; hash: string; size: number; text: string },
string string
> >
documents!: Dexie.Table<
{ document: IndexedDocument; path: string; mtime: number },
string
>
minisearch!: Dexie.Table<string>
constructor() { constructor() {
super(app.appId + '_omnisearch') super(app.appId + '_omnisearch')
this.version(1).stores({ this.version(2).stores({
pdf: 'path, hash, size, text', pdf: 'path, hash, size, text',
documents: 'path, mtime, document',
minisearch: 'data',
}) })
} }
} }

92
src/file-loader.ts Normal file
View File

@@ -0,0 +1,92 @@
import { cacheManager } from './cache-manager'
import {
extractHeadingsFromCache,
getAliasesFromMetadata,
getTagsFromMetadata,
isFilePlaintext,
removeDiacritics,
} from './tools/utils'
import * as NotesIndex from './notes-index'
import type { TFile } from 'obsidian'
import type { IndexedDocument } from './globals'
import { pdfManager } from './pdf/pdf-manager'
import { getNonExistingNotes } from './tools/notes'
/**
* Return all plaintext files as IndexedDocuments
*/
export async function getPlainTextFiles(): Promise<IndexedDocument[]> {
const allFiles = app.vault.getFiles().filter(f => isFilePlaintext(f.path))
const data: IndexedDocument[] = []
for (const file of allFiles) {
const doc = await fileToIndexedDocument(file)
data.push(doc)
await cacheManager.updateDocument(file.path, doc)
}
return data
}
/**
* Return all PDF files as IndexedDocuments.
* If a PDF isn't cached, it will be read from the disk and added to the IndexedDB
*/
export async function getPDFFiles(): Promise<IndexedDocument[]> {
const allFiles = app.vault.getFiles().filter(f => f.path.endsWith('.pdf'))
const data: IndexedDocument[] = []
const input = []
for (const file of allFiles) {
input.push(
NotesIndex.processQueue(async () => {
const doc = await fileToIndexedDocument(file)
cacheManager.updateDocument(file.path, doc)
data.push(doc)
})
)
}
await Promise.all(input)
return data
}
/**
* Convert a file into an IndexedDocument.
* Will use the cache if possible.
* @param file
*/
export async function fileToIndexedDocument(
file: TFile
): Promise<IndexedDocument> {
let content: string
if (isFilePlaintext(file.path)) {
content = removeDiacritics(await app.vault.cachedRead(file))
} else if (file.path.endsWith('.pdf')) {
content = removeDiacritics(await pdfManager.getPdfText(file))
} else {
throw new Error('Invalid file: ' + file.path)
}
content = removeDiacritics(content)
const metadata = app.metadataCache.getFileCache(file)
// Look for links that lead to non-existing files,
// and add them to the index.
if (metadata) {
const nonExisting = getNonExistingNotes(file, metadata)
for (const name of nonExisting.filter(o => !cacheManager.getDocument(o))) {
NotesIndex.addNonExistingToIndex(name, file.path)
}
}
return {
basename: removeDiacritics(file.basename),
content,
path: file.path,
mtime: file.stat.mtime,
tags: getTagsFromMetadata(metadata),
aliases: getAliasesFromMetadata(metadata).join(''),
headings1: metadata ? extractHeadingsFromCache(metadata, 1).join(' ') : '',
headings2: metadata ? extractHeadingsFromCache(metadata, 2).join(' ') : '',
headings3: metadata ? extractHeadingsFromCache(metadata, 3).join(' ') : '',
}
}

View File

@@ -1,6 +1,4 @@
import pLimit from 'p-limit' import { EventBus } from './tools/event-bus'
import { EventBus } from './event-bus'
import { settings } from './settings'
export const regexLineSplit = /\r?\n|\r|((\.|\?|!)( |\r?\n|\r))/g export const regexLineSplit = /\r?\n|\r|((\.|\?|!)( |\r?\n|\r))/g
export const regexYaml = /^---\s*\n(.*?)\n?^---\s?/ms export const regexYaml = /^---\s*\n(.*?)\n?^---\s?/ms
@@ -14,8 +12,6 @@ export const highlightClass = 'suggestion-highlight omnisearch-highlight'
export const eventBus = new EventBus() export const eventBus = new EventBus()
export const minisearchCacheFilePath = `${app.vault.configDir}/plugins/omnisearch/searchIndex.data`
export const notesCacheFilePath = `${app.vault.configDir}/plugins/omnisearch/notesCache.data`
export const historyFilePath = `${app.vault.configDir}/plugins/omnisearch/historyCache.json` export const historyFilePath = `${app.vault.configDir}/plugins/omnisearch/historyCache.json`
export const EventNames = { export const EventNames = {

View File

@@ -1,29 +1,26 @@
import { Notice, Plugin, TFile } from 'obsidian' import { Notice, Plugin, TFile } from 'obsidian'
import * as Search from './search' import * as Search from './search/search'
import { OmnisearchInFileModal, OmnisearchVaultModal } from './modals' import {
OmnisearchInFileModal,
OmnisearchVaultModal,
} from './components/modals'
import { loadSettings, settings, SettingsTab, showExcerpt } from './settings' import { loadSettings, settings, SettingsTab, showExcerpt } from './settings'
import { eventBus, EventNames } from './globals' import { eventBus, EventNames } from './globals'
import { registerAPI } from '@vanakat/plugin-api' import { registerAPI } from '@vanakat/plugin-api'
import api from './api' import api from './tools/api'
import { loadSearchHistory } from './search-history' import { loadSearchHistory } from './search/search-history'
import { isFilePlaintext } from './utils' import { isFilePlaintext } from './tools/utils'
import * as NotesIndex from './notes-index' import * as NotesIndex from './notes-index'
import { cacheManager } from './cache-manager' import * as FileLoader from './file-loader'
function _registerAPI(plugin: OmnisearchPlugin): void {
registerAPI('omnisearch', api, plugin as any)
;(app as any).plugins.plugins.omnisearch.api = api
plugin.register(() => {
delete (app as any).plugins.plugins.omnisearch.api
})
}
export default class OmnisearchPlugin extends Plugin { export default class OmnisearchPlugin extends Plugin {
async onload(): Promise<void> { async onload(): Promise<void> {
await cleanOldCacheFiles() await cleanOldCacheFiles()
await loadSettings(this) await loadSettings(this)
await loadSearchHistory() await loadSearchHistory()
await cacheManager.loadNotesCache()
// Initialize minisearch
await Search.initSearchEngine()
_registerAPI(this) _registerAPI(this)
@@ -69,7 +66,7 @@ export default class OmnisearchPlugin extends Plugin {
) )
this.registerEvent( this.registerEvent(
this.app.vault.on('modify', async file => { this.app.vault.on('modify', async file => {
NotesIndex.addNoteToReindex(file) NotesIndex.markNoteForReindex(file)
}) })
) )
this.registerEvent( this.registerEvent(
@@ -81,7 +78,7 @@ export default class OmnisearchPlugin extends Plugin {
}) })
) )
await Search.initGlobalSearchIndex() await populateIndex()
}) })
// showWelcomeNotice(this) // showWelcomeNotice(this)
@@ -99,11 +96,36 @@ export default class OmnisearchPlugin extends Plugin {
} }
} }
/**
* Read the files and feed them to Minisearch
*/
async function populateIndex(): Promise<void> {
// Load plain text files
console.time('Omnisearch - Timing')
const files = await FileLoader.getPlainTextFiles()
// Index them
await Search.addAllToMinisearch(files)
console.log(`Omnisearch - Indexed ${files.length} notes`)
console.timeEnd('Omnisearch - Timing')
// Load PDFs
if (settings.PDFIndexing) {
console.time('Omnisearch - Timing')
const pdfs = await FileLoader.getPDFFiles()
// Index them
await Search.addAllToMinisearch(pdfs)
console.log(`Omnisearch - Indexed ${pdfs.length} PDFs`)
console.timeEnd('Omnisearch - Timing')
}
}
async function cleanOldCacheFiles() { async function cleanOldCacheFiles() {
const toDelete = [ const toDelete = [
`${app.vault.configDir}/plugins/omnisearch/searchIndex.json`, `${app.vault.configDir}/plugins/omnisearch/searchIndex.json`,
`${app.vault.configDir}/plugins/omnisearch/notesCache.json`, `${app.vault.configDir}/plugins/omnisearch/notesCache.json`,
`${app.vault.configDir}/plugins/omnisearch/pdfCache.data` `${app.vault.configDir}/plugins/omnisearch/notesCache.data`,
`${app.vault.configDir}/plugins/omnisearch/searchIndex.data`,
`${app.vault.configDir}/plugins/omnisearch/pdfCache.data`,
] ]
for (const item of toDelete) { for (const item of toDelete) {
if (await app.vault.adapter.exists(item)) { if (await app.vault.adapter.exists(item)) {
@@ -130,3 +152,11 @@ New beta feature: PDF search 🔎📄
plugin.saveData(settings) plugin.saveData(settings)
} }
function _registerAPI(plugin: OmnisearchPlugin): void {
registerAPI('omnisearch', api, plugin as any)
;(app as any).plugins.plugins.omnisearch.api = api
plugin.register(() => {
delete (app as any).plugins.plugins.omnisearch.api
})
}

View File

@@ -1,20 +1,12 @@
import { Notice, TAbstractFile, TFile } from 'obsidian' import { Notice, TAbstractFile, TFile } from 'obsidian'
import { import { isFileIndexable, wait } from './tools/utils'
extractHeadingsFromCache, import { removeAnchors } from './tools/notes'
getAliasesFromMetadata,
getTagsFromMetadata,
isFileIndexable,
removeDiacritics,
wait,
} from './utils'
import { getNonExistingNotes, removeAnchors } from './notes'
import { pdfManager } from './pdf-manager'
import { settings } from './settings' import { settings } from './settings'
import * as Search from './search' import * as Search from './search/search'
// import PQueue from 'p-queue-compat'
import { cacheManager } from './cache-manager' import { cacheManager } from './cache-manager'
import pLimit from 'p-limit' import pLimit from 'p-limit'
import type { IndexedDocument } from './globals' import type { IndexedDocument } from './globals'
import { fileToIndexedDocument } from './file-loader'
/** /**
* Use this processing queue to handle all heavy work * Use this processing queue to handle all heavy work
@@ -33,59 +25,21 @@ export async function addToIndexAndMemCache(
return return
} }
// Check if the file was already indexed as non-existent, // Check if the file was already indexed as non-existent.
// and if so, remove it from the index (before adding it again) // If so, remove it from the index, and add it again as a real note.
if (cacheManager.getNoteFromMemCache(file.path)?.doesNotExist) { if (cacheManager.getDocument(file.path)?.doesNotExist) {
removeFromIndex(file.path) removeFromIndex(file.path)
} }
try { try {
// Look for links that lead to non-existing files, if (cacheManager.getDocument(file.path)) {
// and index them as well
const metadata = app.metadataCache.getFileCache(file)
if (metadata) {
const nonExisting = getNonExistingNotes(file, metadata)
for (const name of nonExisting.filter(
o => !cacheManager.getNoteFromMemCache(o)
)) {
addNonExistingToIndex(name, file.path)
}
}
if (cacheManager.getNoteFromMemCache(file.path)) {
throw new Error(`${file.basename} is already indexed`) throw new Error(`${file.basename} is already indexed`)
} }
let content
if (file.path.endsWith('.pdf')) {
content = removeDiacritics(await pdfManager.getPdfText(file as TFile))
} else {
// Fetch content from the cache to index it as-is
content = removeDiacritics(await app.vault.cachedRead(file))
}
// Make the document and index it // Make the document and index it
const note: IndexedDocument = { const note = await fileToIndexedDocument(file)
basename: removeDiacritics(file.basename), Search.addSingleToMinisearch(note)
content, await cacheManager.updateDocument(note.path, note)
path: file.path,
mtime: file.stat.mtime,
tags: getTagsFromMetadata(metadata),
aliases: getAliasesFromMetadata(metadata).join(''),
headings1: metadata
? extractHeadingsFromCache(metadata, 1).join(' ')
: '',
headings2: metadata
? extractHeadingsFromCache(metadata, 2).join(' ')
: '',
headings3: metadata
? extractHeadingsFromCache(metadata, 3).join(' ')
: '',
}
Search.minisearchInstance.add(note)
cacheManager.addNoteToMemCache(note.path, note)
} catch (e) { } catch (e) {
// console.trace('Error while indexing ' + file.basename) // console.trace('Error while indexing ' + file.basename)
console.error(e) console.error(e)
@@ -101,7 +55,7 @@ export async function addToIndexAndMemCache(
export function addNonExistingToIndex(name: string, parent: string): void { export function addNonExistingToIndex(name: string, parent: string): void {
name = removeAnchors(name) name = removeAnchors(name)
const filename = name + (name.endsWith('.md') ? '' : '.md') const filename = name + (name.endsWith('.md') ? '' : '.md')
if (cacheManager.getNoteFromMemCache(filename)) return if (cacheManager.getDocument(filename)) return
const note: IndexedDocument = { const note: IndexedDocument = {
path: filename, path: filename,
@@ -118,29 +72,30 @@ export function addNonExistingToIndex(name: string, parent: string): void {
doesNotExist: true, doesNotExist: true,
parent, parent,
} }
Search.minisearchInstance.add(note) Search.addSingleToMinisearch(note)
cacheManager.addNoteToMemCache(filename, note) cacheManager.updateDocument(filename, note)
} }
/** /**
* Removes a file from the index, by its path * Removes a file from the index, by its path.
* @param path
*/ */
export function removeFromIndex(path: string): void { export function removeFromIndex(path: string): void {
if (!isFileIndexable(path)) { if (!isFileIndexable(path)) {
console.info(`"${path}" is not an indexable file`) console.info(`"${path}" is not an indexable file`)
return return
} }
const note = cacheManager.getNoteFromMemCache(path) const note = cacheManager.getDocument(path)
if (note) { if (note) {
Search.minisearchInstance.remove(note) Search.removeFromMinisearch(note)
cacheManager.removeNoteFromMemCache(path) cacheManager.deleteDocument(path)
cacheManager
.getNonExistingNotesFromMemCache() // FIXME: only remove non-existing notes if they don't have another parent
.filter(n => n.parent === path) // cacheManager
.forEach(n => { // .getNonExistingNotesFromMemCache()
removeFromIndex(n.path) // .filter(n => n.parent === path)
}) // .forEach(n => {
// removeFromIndex(n.path)
// })
} else { } else {
console.warn(`Omnisearch - Note not found under path ${path}`) console.warn(`Omnisearch - Note not found under path ${path}`)
} }
@@ -148,7 +103,11 @@ export function removeFromIndex(path: string): void {
const notesToReindex = new Set<TAbstractFile>() const notesToReindex = new Set<TAbstractFile>()
export function addNoteToReindex(note: TAbstractFile): void { /**
* Updated notes are not reindexed immediately for performance reasons.
* They're added to a list, and reindex is done the next time we open Omnisearch.
*/
export function markNoteForReindex(note: TAbstractFile): void {
notesToReindex.add(note) notesToReindex.add(note)
} }
@@ -163,35 +122,5 @@ export async function refreshIndex(): Promise<void> {
await wait(0) await wait(0)
} }
notesToReindex.clear() notesToReindex.clear()
await cacheManager.writeMinisearchIndex(Search.minisearchInstance)
}
}
export async function indexPDFs() {
if (settings.PDFIndexing) {
const files = app.vault.getFiles().filter(f => f.path.endsWith('.pdf'))
console.time('PDF Indexing')
console.log(`Omnisearch - Indexing ${files.length} PDFs`)
const input = []
for (const file of files) {
if (cacheManager.getNoteFromMemCache(file.path)) {
removeFromIndex(file.path)
}
input.push(
processQueue(async () => {
await addToIndexAndMemCache(file)
await cacheManager.writeMinisearchIndex(Search.minisearchInstance)
})
)
}
await Promise.all(input)
// await pdfQueue.onEmpty()
console.timeEnd('PDF Indexing')
if (settings.showIndexingNotices) {
new Notice(`Omnisearch - Indexed ${files.length} PDFs`)
}
await pdfManager.cleanCache()
} }
} }

View File

@@ -1,7 +1,7 @@
import type { TFile } from 'obsidian' import type { TFile } from 'obsidian'
import WebWorker from 'web-worker:./pdf-worker.ts' import WebWorker from 'web-worker:./pdf-worker.ts'
import { makeMD5 } from './utils' import { makeMD5 } from '../tools/utils'
import { database } from './database' import { database } from '../database'
const workerTimeout = 120_000 const workerTimeout = 120_000

View File

@@ -1,5 +1,5 @@
import rustPlugin from '../pkg/obsidian_search_bg.wasm' import rustPlugin from '../../pkg/obsidian_search_bg.wasm'
import * as plugin from '../pkg/obsidian_search' import * as plugin from '../../pkg'
const decodedPlugin = decodeBase64(rustPlugin as any) const decodedPlugin = decodeBase64(rustPlugin as any)

View File

@@ -1,6 +1,6 @@
import { settings } from './settings' import { settings } from '../settings'
import { removeDiacritics, stripSurroundingQuotes } from './utils' import { removeDiacritics, stripSurroundingQuotes } from '../tools/utils'
import { parseQuery } from './vendor/parse-query' import { parseQuery } from '../vendor/parse-query'
type QueryToken = { type QueryToken = {
/** /**

View File

@@ -1,4 +1,4 @@
import { historyFilePath } from './globals' import { historyFilePath } from '../globals'
export let searchHistory: string[] = [] export let searchHistory: string[] = []

View File

@@ -1,25 +1,25 @@
import { Notice } from 'obsidian' import MiniSearch, {
import MiniSearch, { type Options, type SearchResult } from 'minisearch' type AsPlainObject,
type Options,
type SearchResult,
} from 'minisearch'
import { import {
chsRegex, chsRegex,
type IndexedDocument, type IndexedDocument,
type ResultNote, type ResultNote,
minisearchCacheFilePath,
type SearchMatch, type SearchMatch,
SPACE_OR_PUNCTUATION, SPACE_OR_PUNCTUATION,
} from './globals' } from '../globals'
import { import {
isFilePlaintext,
removeDiacritics, removeDiacritics,
stringsToRegex, stringsToRegex,
stripMarkdownCharacters, stripMarkdownCharacters,
} from './utils' } from '../tools/utils'
import type { Query } from './query' import type { Query } from './query'
import { settings } from './settings' import { settings } from '../settings'
import * as NotesIndex from './notes-index' import { cacheManager } from '../cache-manager'
import { cacheManager } from './cache-manager'
export let minisearchInstance: MiniSearch<IndexedDocument> let minisearchInstance: MiniSearch<IndexedDocument>
const tokenize = (text: string): string[] => { const tokenize = (text: string): string[] => {
const tokens = text.split(SPACE_OR_PUNCTUATION) const tokens = text.split(SPACE_OR_PUNCTUATION)
@@ -32,98 +32,38 @@ const tokenize = (text: string): string[] => {
} else return tokens } else return tokens
} }
const minisearchOptions: Options<IndexedDocument> = {
tokenize,
processTerm: (term: string) =>
(settings.ignoreDiacritics ? removeDiacritics(term) : term).toLowerCase(),
idField: 'path',
fields: [
'basename',
'aliases',
'content',
'headings1',
'headings2',
'headings3',
],
storeFields: ['tags'],
}
/** /**
* Initializes the MiniSearch instance, * Initializes the MiniSearch instance,
* and adds all the notes to the index * and adds all the notes to the index
*/ */
export async function initGlobalSearchIndex(): Promise<void> { export async function initSearchEngine(): Promise<void> {
const options: Options<IndexedDocument> = {
tokenize,
processTerm: (term: string) =>
(settings.ignoreDiacritics ? removeDiacritics(term) : term).toLowerCase(),
idField: 'path',
fields: [
'basename',
'aliases',
'content',
'headings1',
'headings2',
'headings3',
],
storeFields: ['tags'],
}
// Default instance // Default instance
minisearchInstance = new MiniSearch(options) minisearchInstance = new MiniSearch(minisearchOptions)
}
// Load Minisearch cache, if it exists export async function initSearchEngineFromData(json: string): Promise<void> {
if (await app.vault.adapter.exists(minisearchCacheFilePath)) { try {
try { minisearchInstance = MiniSearch.loadJSON(json, minisearchOptions)
const json = await cacheManager.readMinisearchIndex() console.log('Omnisearch - MiniSearch index loaded from the file')
if (json) { } catch (e) {
// If we have cache data, reload it console.error('Omnisearch - Could not load MiniSearch index from json')
minisearchInstance = MiniSearch.loadJSON(json, options) console.error(e)
}
console.log('Omnisearch - MiniSearch index loaded from the file')
} catch (e) {
console.trace(
'Omnisearch - Could not load MiniSearch index from the file'
)
console.error(e)
}
}
// if (!minisearchInstance) {
// resetNotesCache()
// }
// Index files that are already present
const start = new Date().getTime()
const allFiles = app.vault.getFiles().filter(f => isFilePlaintext(f.path))
let files
let notesSuffix
if (settings.persistCache) {
files = allFiles.filter(file => cacheManager.isNoteInMemCacheOutdated(file))
notesSuffix = 'modified notes'
} else {
files = allFiles
notesSuffix = 'notes'
}
if (files.length > 0) {
console.log(`Omnisearch - Indexing ${files.length} ${notesSuffix}`)
}
// Read and index all the files into the search engine
const input = []
for (const file of files) {
if (cacheManager.getNoteFromMemCache(file.path)) {
NotesIndex.removeFromIndex(file.path)
}
input.push(
NotesIndex.processQueue(() => NotesIndex.addToIndexAndMemCache(file))
)
}
await Promise.all(input)
if (files.length > 0) {
const message = `Omnisearch - Indexed ${files.length} ${notesSuffix} in ${
new Date().getTime() - start
}ms`
console.log(message)
if (settings.showIndexingNotices) {
new Notice(message)
}
await cacheManager.writeMinisearchIndex(minisearchInstance)
// PDFs are indexed later, since they're heavier
await NotesIndex.indexPDFs()
} }
} }
@@ -165,10 +105,9 @@ async function search(query: Query): Promise<SearchResult[]> {
const exactTerms = query.getExactTerms() const exactTerms = query.getExactTerms()
if (exactTerms.length) { if (exactTerms.length) {
results = results.filter(r => { results = results.filter(r => {
const title = const title = cacheManager.getDocument(r.id)?.path.toLowerCase() ?? ''
cacheManager.getNoteFromMemCache(r.id)?.path.toLowerCase() ?? ''
const content = stripMarkdownCharacters( const content = stripMarkdownCharacters(
cacheManager.getNoteFromMemCache(r.id)?.content ?? '' cacheManager.getDocument(r.id)?.content ?? ''
).toLowerCase() ).toLowerCase()
return exactTerms.every(q => content.includes(q) || title.includes(q)) return exactTerms.every(q => content.includes(q) || title.includes(q))
}) })
@@ -179,7 +118,7 @@ async function search(query: Query): Promise<SearchResult[]> {
if (exclusions.length) { if (exclusions.length) {
results = results.filter(r => { results = results.filter(r => {
const content = stripMarkdownCharacters( const content = stripMarkdownCharacters(
cacheManager.getNoteFromMemCache(r.id)?.content ?? '' cacheManager.getDocument(r.id)?.content ?? ''
).toLowerCase() ).toLowerCase()
return exclusions.every(q => !content.includes(q.value)) return exclusions.every(q => !content.includes(q.value))
}) })
@@ -247,7 +186,7 @@ export async function getSuggestions(
// Map the raw results to get usable suggestions // Map the raw results to get usable suggestions
return results.map(result => { return results.map(result => {
const note = cacheManager.getNoteFromMemCache(result.id) const note = cacheManager.getDocument(result.id)
if (!note) { if (!note) {
throw new Error(`Note "${result.id}" not indexed`) throw new Error(`Note "${result.id}" not indexed`)
} }
@@ -286,3 +225,25 @@ export async function getSuggestions(
return resultNote return resultNote
}) })
} }
// #region Read/write minisearch index
export function getMinisearchIndexJSON(): AsPlainObject {
return minisearchInstance.toJSON()
}
export async function addAllToMinisearch(
documents: IndexedDocument[]
): Promise<void> {
await minisearchInstance.addAllAsync(documents)
}
export function addSingleToMinisearch(document: IndexedDocument): void {
minisearchInstance.add(document)
}
export function removeFromMinisearch(document: IndexedDocument): void {
minisearchInstance.remove(document)
}
// #endregion

View File

@@ -6,7 +6,6 @@ import {
SliderComponent, SliderComponent,
} from 'obsidian' } from 'obsidian'
import { writable } from 'svelte/store' import { writable } from 'svelte/store'
import { notesCacheFilePath, minisearchCacheFilePath } from './globals'
import type OmnisearchPlugin from './main' import type OmnisearchPlugin from './main'
interface WeightingSettings { interface WeightingSettings {
@@ -28,7 +27,7 @@ export interface OmnisearchSettings extends WeightingSettings {
/** Max number of spawned processes for background tasks, such as extracting text from PDFs */ /** Max number of spawned processes for background tasks, such as extracting text from PDFs */
backgroundProcesses: number backgroundProcesses: number
/** Write cache files on disk (unrelated to PDFs) */ /** Write cache files on disk (unrelated to PDFs) */
persistCache: boolean // persistCache: boolean
/** Display Omnisearch popup notices over Obsidian */ /** Display Omnisearch popup notices over Obsidian */
showIndexingNotices: boolean showIndexingNotices: boolean
/** Activate the small 🔍 button on Obsidian's ribbon */ /** Activate the small 🔍 button on Obsidian's ribbon */
@@ -141,37 +140,27 @@ export class SettingsTab extends PluginSettingTab {
// }) // })
// }) // })
// Store index // // Store index
const serializedIndexDesc = new DocumentFragment() // const serializedIndexDesc = new DocumentFragment()
serializedIndexDesc.createSpan({}, span => { // serializedIndexDesc.createSpan({}, span => {
span.innerHTML = `This will speedup startup times after the initial indexing. Do not activate it unless indexing is too slow on your device: // span.innerHTML = `This will speedup startup times after the initial indexing. Do not activate it unless indexing is too slow on your device:
<ul> // <ul>
<li>PDF indexing is not affected by this setting</li> // <li>PDF indexing is not affected by this setting</li>
<li>⚠️ The index can become corrupted - if you notice any issue, disable and re-enable this option to clear the cache.</li> // <li>⚠️ The index can become corrupted - if you notice any issue, disable and re-enable this option to clear the cache.</li>
<li>⚠️ Cache files in <code>.obsidian/plugins/omnisearch/*.data</code> must not be synchronized between your devices.</li> // <li>⚠️ Cache files in <code>.obsidian/plugins/omnisearch/*.data</code> must not be synchronized between your devices.</li>
</ul> // </ul>
<strong style="color: var(--text-accent)">Needs a restart to fully take effect.</strong> // <strong style="color: var(--text-accent)">Needs a restart to fully take effect.</strong>
` // `
}) // })
new Setting(containerEl) // new Setting(containerEl)
.setName('Persist cache on disk') // .setName('Persist cache on disk')
.setDesc(serializedIndexDesc) // .setDesc(serializedIndexDesc)
.addToggle(toggle => // .addToggle(toggle =>
toggle.setValue(settings.persistCache).onChange(async v => { // toggle.setValue(settings.persistCache).onChange(async v => {
try { // settings.persistCache = v
await app.vault.adapter.remove(notesCacheFilePath) // await saveSettings(this.plugin)
} catch (e) { // })
console.warn(e) // )
}
try {
await app.vault.adapter.remove(minisearchCacheFilePath)
} catch (e) {
console.warn(e)
}
settings.persistCache = v
await saveSettings(this.plugin)
})
)
// PDF Indexing // PDF Indexing
const indexPDFsDesc = new DocumentFragment() const indexPDFsDesc = new DocumentFragment()
@@ -363,7 +352,7 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = {
CtrlJK: false, CtrlJK: false,
CtrlNP: false, CtrlNP: false,
persistCache: false, // persistCache: false,
welcomeMessage: '', welcomeMessage: '',
} as const } as const

View File

@@ -1,6 +1,6 @@
import type { ResultNote, SearchMatch } from './globals' import type { ResultNote, SearchMatch } from '../globals'
import { Query } from './query' import { Query } from '../search/query'
import * as Search from './search' import * as Search from '../search/search'
type ResultNoteApi = { type ResultNoteApi = {
score: number score: number

View File

@@ -1,6 +1,6 @@
import { type CachedMetadata, MarkdownView, TFile } from 'obsidian' import { type CachedMetadata, MarkdownView, TFile } from 'obsidian'
import { stringsToRegex } from './utils' import { stringsToRegex } from './utils'
import type { ResultNote } from './globals' import type { ResultNote } from '../globals'
export async function openNote( export async function openNote(
item: ResultNote, item: ResultNote,

View File

@@ -1,5 +1,5 @@
import { type CachedMetadata, Notice, Platform, Plugin } from 'obsidian' import { type CachedMetadata, Platform } from 'obsidian'
import type { SearchMatch } from './globals' import type { SearchMatch } from '../globals'
import { import {
excerptAfter, excerptAfter,
excerptBefore, excerptBefore,
@@ -8,9 +8,9 @@ import {
regexLineSplit, regexLineSplit,
regexStripQuotes, regexStripQuotes,
regexYaml, regexYaml,
} from './globals' } from '../globals'
import { settings } from './settings' import { settings } from '../settings'
import { createHash, type BinaryLike } from 'crypto' import { type BinaryLike, createHash } from 'crypto'
import { md5 } from 'pure-md5' import { md5 } from 'pure-md5'
export function highlighter(str: string): string { export function highlighter(str: string): string {