Squashed commit of the following:

commit ac82511ddd17d5472ae3cfea9bbad9754f5a4d62
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Sat Oct 22 08:23:42 2022 +0200

    Screw that cache, seriously.

commit 8ba40d1be73daaaffea09e07bc56c339266db9b6
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Fri Oct 21 22:36:48 2022 +0200

    Stuff

commit 27b8fd7dc809be9714a109d3a458eb1276a47e2e
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Fri Oct 21 22:22:20 2022 +0200

    Moved files

commit fb1349c914907e586e103ca54fb04b9ddd45ef5d
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Thu Oct 20 22:25:29 2022 +0200

    Removed duplicate code

commit e7371138e60cbe4155cfd4fb44e3ee1d2e3ee088
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Thu Oct 20 21:50:09 2022 +0200

    Moved a bunch of files

commit 2ee1b2a0e799d4b41ab3a444d8cc44dfff5b5623
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Thu Oct 20 21:32:21 2022 +0200

    Removed useless code

commit 76c530dfb9adbad1bbe9079de2330fe43a044249
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Thu Oct 20 20:44:11 2022 +0200

    Split file reading and indexing
This commit is contained in:
Simon Cambier
2022-10-22 08:25:34 +02:00
parent 1376cea282
commit c2ecdd79ad
25 changed files with 338 additions and 403 deletions

View File

@@ -1,4 +1,4 @@
import { EventBus } from '../event-bus'
import { EventBus } from '../tools/event-bus'
describe('EventBus', () => {
it('should refuse the registering of invalid ctx/event names', () => {

View File

@@ -1,4 +1,4 @@
import { Query } from '../query'
import { Query } from '../search/query'
describe('The Query class', () => {
const stringQuery =

View File

@@ -1,5 +1,5 @@
import type { CachedMetadata } from 'obsidian'
import { getAliasesFromMetadata } from '../utils'
import { getAliasesFromMetadata } from '../tools/utils'
describe('Utils', () => {
describe('getAliasesFromMetadata', () => {

View File

@@ -1,120 +1,50 @@
import { throttle } from 'lodash-es'
import type MiniSearch from 'minisearch'
import type { TFile } from 'obsidian'
import { deflate, inflate } from 'pako'
import {
notesCacheFilePath,
minisearchCacheFilePath,
type IndexedDocument,
} from './globals'
import { settings } from './settings'
import type { IndexedDocument } from './globals'
class CacheManager {
notesCache: Record<string, IndexedDocument> = {}
compress = true
writeInterval = 5_000 // In milliseconds
private documentsCache: Map<string, IndexedDocument> = new Map()
private writeInterval = 10_000 // In milliseconds
//#region Minisearch
/**
* Serializes and writes the Minisearch index on the disk
*/
public writeMinisearchIndex = throttle(
this._writeMinisearchIndex,
this.writeInterval,
{
leading: true,
trailing: true,
}
)
private async _writeMinisearchIndex(minisearch: MiniSearch): Promise<void> {
if (!settings.persistCache) {
return
}
const json = JSON.stringify(minisearch)
const data = this.compress ? deflate(json) : json
await app.vault.adapter.writeBinary(minisearchCacheFilePath, data as any)
console.log('Omnisearch - Minisearch index saved on disk')
public async updateDocument(path: string, note: IndexedDocument) {
this.documentsCache.set(path, note)
}
public async readMinisearchIndex(): Promise<string | null> {
if (!settings.persistCache) {
return null
}
if (await app.vault.adapter.exists(minisearchCacheFilePath)) {
try {
const data = await app.vault.adapter.readBinary(minisearchCacheFilePath)
return (
this.compress ? new TextDecoder('utf8').decode(inflate(data)) : data
) as any
} catch (e) {
console.trace(
'Omnisearch - Could not load MiniSearch index from the file:'
)
console.warn(e)
app.vault.adapter.remove(minisearchCacheFilePath)
}
}
return null
public deleteDocument(key: string): void {
this.documentsCache.delete(key)
}
//#endregion Minisearch
public async loadNotesCache() {
if (!settings.persistCache) {
return null
}
if (await app.vault.adapter.exists(notesCacheFilePath)) {
try {
const data = await app.vault.adapter.readBinary(notesCacheFilePath)
const json = (
this.compress ? new TextDecoder('utf8').decode(inflate(data)) : data
) as any
this.notesCache = JSON.parse(json)
} catch (e) {
console.trace('Omnisearch - Could not load notes cache:')
console.warn(e)
app.vault.adapter.remove(notesCacheFilePath)
}
}
return null
}
public saveNotesCache = throttle(this._saveNotesCache, this.writeInterval, {
leading: true,
trailing: true,
})
private async _saveNotesCache() {
if (!settings.persistCache) {
return
}
const json = JSON.stringify(this.notesCache)
const data = this.compress ? deflate(json) : json
await app.vault.adapter.writeBinary(notesCacheFilePath, data as any)
console.log('Omnisearch - Notes cache saved on disk')
}
public addNoteToMemCache(path: string, note: IndexedDocument) {
this.notesCache[path] = note
this.saveNotesCache()
}
public removeNoteFromMemCache(key: string): void {
delete this.notesCache[key]
}
public getNoteFromMemCache(key: string): IndexedDocument | undefined {
return this.notesCache[key]
public getDocument(key: string): IndexedDocument | undefined {
return this.documentsCache.get(key)
}
public getNonExistingNotesFromMemCache(): IndexedDocument[] {
return Object.values(this.notesCache).filter(note => note.doesNotExist)
return Object.values(this.documentsCache).filter(note => note.doesNotExist)
}
public isNoteInMemCacheOutdated(file: TFile): boolean {
const indexedNote = this.getNoteFromMemCache(file.path)
public isDocumentOutdated(file: TFile): boolean {
const indexedNote = this.getDocument(file.path)
return !indexedNote || indexedNote.mtime !== file.stat.mtime
}
// private async _writeMinisearchIndex(minisearch: MiniSearch): Promise<void> {
// if (!settings.persistCache) {
// return
// }
// const json = JSON.stringify(minisearch)
// const data = deflate(json)
// await app.vault.adapter.writeBinary(minisearchCacheFilePath, data as any)
// console.log('Omnisearch - Minisearch index saved on disk')
// }
//
// private async _saveNotesCache() {
// if (!settings.persistCache) {
// return
// }
// const json = JSON.stringify(Array.from(this.documentsCache.entries()))
// const data = deflate(json)
// await app.vault.adapter.writeBinary(notesCacheFilePath, data as any)
// console.log('Omnisearch - Notes cache saved on disk')
// }
}
export const cacheManager = new CacheManager()

View File

@@ -10,16 +10,16 @@
type ResultNote,
type SearchMatch,
} from 'src/globals'
import { loopIndex } from 'src/utils'
import { loopIndex } from 'src/tools/utils'
import { onDestroy, onMount, tick } from 'svelte'
import { MarkdownView } from 'obsidian'
import * as Search from 'src/search'
import * as Search from 'src/search/search'
import ModalContainer from './ModalContainer.svelte'
import { OmnisearchInFileModal, OmnisearchVaultModal } from 'src/modals'
import { OmnisearchInFileModal, OmnisearchVaultModal } from 'src/components/modals'
import ResultItemInFile from './ResultItemInFile.svelte'
import { Query } from 'src/query'
import { openNote } from 'src/notes'
import { saveSearchHistory } from '../search-history'
import { Query } from 'src/search/query'
import { openNote } from 'src/tools/notes'
import { saveSearchHistory } from '../search/search-history'
export let modal: OmnisearchInFileModal
export let parent: OmnisearchVaultModal | null = null

View File

@@ -4,13 +4,13 @@
import InputSearch from './InputSearch.svelte'
import ModalContainer from './ModalContainer.svelte'
import { eventBus, type ResultNote } from 'src/globals'
import { createNote, openNote } from 'src/notes'
import * as Search from 'src/search'
import { getCtrlKeyLabel, getExtension, loopIndex } from 'src/utils'
import { OmnisearchInFileModal, type OmnisearchVaultModal } from 'src/modals'
import { createNote, openNote } from 'src/tools/notes'
import * as Search from 'src/search/search'
import { getCtrlKeyLabel, getExtension, loopIndex } from 'src/tools/utils'
import { OmnisearchInFileModal, type OmnisearchVaultModal } from 'src/components/modals'
import ResultItemVault from './ResultItemVault.svelte'
import { Query } from 'src/query'
import { saveSearchHistory, searchHistory } from 'src/search-history'
import { Query } from 'src/search/query'
import { saveSearchHistory, searchHistory } from 'src/search/search-history'
import { settings } from '../settings'
import * as NotesIndex from '../notes-index'

View File

@@ -1,6 +1,6 @@
<script lang="ts">
import type { ResultNote } from '../globals'
import { highlighter, makeExcerpt, stringsToRegex } from '../utils'
import { highlighter, makeExcerpt, stringsToRegex } from '../tools/utils'
import ResultItemContainer from './ResultItemContainer.svelte'
export let offset: number

View File

@@ -2,7 +2,7 @@
import { cacheManager } from 'src/cache-manager'
import { settings, showExcerpt } from 'src/settings'
import type { ResultNote } from '../globals'
import { highlighter, makeExcerpt, stringsToRegex } from '../utils'
import { highlighter, makeExcerpt, stringsToRegex } from '../tools/utils'
import ResultItemContainer from './ResultItemContainer.svelte'
export let selected = false
@@ -10,7 +10,7 @@
$: reg = stringsToRegex(note.foundWords)
$: cleanedContent = makeExcerpt(note.content, note.matches[0]?.offset ?? -1)
$: glyph = cacheManager.getNoteFromMemCache(note.path)?.doesNotExist
$: glyph = cacheManager.getDocument(note.path)?.doesNotExist
$: title = settings.showShortName ? note.basename : note.path
</script>

View File

@@ -1,8 +1,8 @@
import { App, Modal, TFile } from 'obsidian'
import ModalVault from './components/ModalVault.svelte'
import ModalInFile from './components/ModalInFile.svelte'
import {eventBus, EventNames, isInputComposition} from './globals'
import { settings } from './settings'
import ModalVault from './ModalVault.svelte'
import ModalInFile from './ModalInFile.svelte'
import {eventBus, EventNames, isInputComposition} from '../globals'
import { settings } from '../settings'
abstract class OmnisearchModal extends Modal {
protected constructor(app: App) {

View File

@@ -1,15 +1,23 @@
import Dexie from 'dexie'
import type { IndexedDocument } from './globals'
class OmnisearchCache extends Dexie {
pdf!: Dexie.Table<
{ path: string; hash: string; size: number; text: string },
string
>
documents!: Dexie.Table<
{ document: IndexedDocument; path: string; mtime: number },
string
>
minisearch!: Dexie.Table<string>
constructor() {
super(app.appId + '_omnisearch')
this.version(1).stores({
this.version(2).stores({
pdf: 'path, hash, size, text',
documents: 'path, mtime, document',
minisearch: 'data',
})
}
}

92
src/file-loader.ts Normal file
View File

@@ -0,0 +1,92 @@
import { cacheManager } from './cache-manager'
import {
extractHeadingsFromCache,
getAliasesFromMetadata,
getTagsFromMetadata,
isFilePlaintext,
removeDiacritics,
} from './tools/utils'
import * as NotesIndex from './notes-index'
import type { TFile } from 'obsidian'
import type { IndexedDocument } from './globals'
import { pdfManager } from './pdf/pdf-manager'
import { getNonExistingNotes } from './tools/notes'
/**
* Return all plaintext files as IndexedDocuments
*/
export async function getPlainTextFiles(): Promise<IndexedDocument[]> {
const allFiles = app.vault.getFiles().filter(f => isFilePlaintext(f.path))
const data: IndexedDocument[] = []
for (const file of allFiles) {
const doc = await fileToIndexedDocument(file)
data.push(doc)
await cacheManager.updateDocument(file.path, doc)
}
return data
}
/**
* Return all PDF files as IndexedDocuments.
* If a PDF isn't cached, it will be read from the disk and added to the IndexedDB
*/
export async function getPDFFiles(): Promise<IndexedDocument[]> {
const allFiles = app.vault.getFiles().filter(f => f.path.endsWith('.pdf'))
const data: IndexedDocument[] = []
const input = []
for (const file of allFiles) {
input.push(
NotesIndex.processQueue(async () => {
const doc = await fileToIndexedDocument(file)
cacheManager.updateDocument(file.path, doc)
data.push(doc)
})
)
}
await Promise.all(input)
return data
}
/**
* Convert a file into an IndexedDocument.
* Will use the cache if possible.
* @param file
*/
export async function fileToIndexedDocument(
file: TFile
): Promise<IndexedDocument> {
let content: string
if (isFilePlaintext(file.path)) {
content = removeDiacritics(await app.vault.cachedRead(file))
} else if (file.path.endsWith('.pdf')) {
content = removeDiacritics(await pdfManager.getPdfText(file))
} else {
throw new Error('Invalid file: ' + file.path)
}
content = removeDiacritics(content)
const metadata = app.metadataCache.getFileCache(file)
// Look for links that lead to non-existing files,
// and add them to the index.
if (metadata) {
const nonExisting = getNonExistingNotes(file, metadata)
for (const name of nonExisting.filter(o => !cacheManager.getDocument(o))) {
NotesIndex.addNonExistingToIndex(name, file.path)
}
}
return {
basename: removeDiacritics(file.basename),
content,
path: file.path,
mtime: file.stat.mtime,
tags: getTagsFromMetadata(metadata),
aliases: getAliasesFromMetadata(metadata).join(''),
headings1: metadata ? extractHeadingsFromCache(metadata, 1).join(' ') : '',
headings2: metadata ? extractHeadingsFromCache(metadata, 2).join(' ') : '',
headings3: metadata ? extractHeadingsFromCache(metadata, 3).join(' ') : '',
}
}

View File

@@ -1,6 +1,4 @@
import pLimit from 'p-limit'
import { EventBus } from './event-bus'
import { settings } from './settings'
import { EventBus } from './tools/event-bus'
export const regexLineSplit = /\r?\n|\r|((\.|\?|!)( |\r?\n|\r))/g
export const regexYaml = /^---\s*\n(.*?)\n?^---\s?/ms
@@ -14,8 +12,6 @@ export const highlightClass = 'suggestion-highlight omnisearch-highlight'
export const eventBus = new EventBus()
export const minisearchCacheFilePath = `${app.vault.configDir}/plugins/omnisearch/searchIndex.data`
export const notesCacheFilePath = `${app.vault.configDir}/plugins/omnisearch/notesCache.data`
export const historyFilePath = `${app.vault.configDir}/plugins/omnisearch/historyCache.json`
export const EventNames = {

View File

@@ -1,29 +1,26 @@
import { Notice, Plugin, TFile } from 'obsidian'
import * as Search from './search'
import { OmnisearchInFileModal, OmnisearchVaultModal } from './modals'
import * as Search from './search/search'
import {
OmnisearchInFileModal,
OmnisearchVaultModal,
} from './components/modals'
import { loadSettings, settings, SettingsTab, showExcerpt } from './settings'
import { eventBus, EventNames } from './globals'
import { registerAPI } from '@vanakat/plugin-api'
import api from './api'
import { loadSearchHistory } from './search-history'
import { isFilePlaintext } from './utils'
import api from './tools/api'
import { loadSearchHistory } from './search/search-history'
import { isFilePlaintext } from './tools/utils'
import * as NotesIndex from './notes-index'
import { cacheManager } from './cache-manager'
function _registerAPI(plugin: OmnisearchPlugin): void {
registerAPI('omnisearch', api, plugin as any)
;(app as any).plugins.plugins.omnisearch.api = api
plugin.register(() => {
delete (app as any).plugins.plugins.omnisearch.api
})
}
import * as FileLoader from './file-loader'
export default class OmnisearchPlugin extends Plugin {
async onload(): Promise<void> {
await cleanOldCacheFiles()
await loadSettings(this)
await loadSearchHistory()
await cacheManager.loadNotesCache()
// Initialize minisearch
await Search.initSearchEngine()
_registerAPI(this)
@@ -69,7 +66,7 @@ export default class OmnisearchPlugin extends Plugin {
)
this.registerEvent(
this.app.vault.on('modify', async file => {
NotesIndex.addNoteToReindex(file)
NotesIndex.markNoteForReindex(file)
})
)
this.registerEvent(
@@ -81,7 +78,7 @@ export default class OmnisearchPlugin extends Plugin {
})
)
await Search.initGlobalSearchIndex()
await populateIndex()
})
// showWelcomeNotice(this)
@@ -99,11 +96,36 @@ export default class OmnisearchPlugin extends Plugin {
}
}
/**
* Read the files and feed them to Minisearch
*/
async function populateIndex(): Promise<void> {
// Load plain text files
console.time('Omnisearch - Timing')
const files = await FileLoader.getPlainTextFiles()
// Index them
await Search.addAllToMinisearch(files)
console.log(`Omnisearch - Indexed ${files.length} notes`)
console.timeEnd('Omnisearch - Timing')
// Load PDFs
if (settings.PDFIndexing) {
console.time('Omnisearch - Timing')
const pdfs = await FileLoader.getPDFFiles()
// Index them
await Search.addAllToMinisearch(pdfs)
console.log(`Omnisearch - Indexed ${pdfs.length} PDFs`)
console.timeEnd('Omnisearch - Timing')
}
}
async function cleanOldCacheFiles() {
const toDelete = [
`${app.vault.configDir}/plugins/omnisearch/searchIndex.json`,
`${app.vault.configDir}/plugins/omnisearch/notesCache.json`,
`${app.vault.configDir}/plugins/omnisearch/pdfCache.data`
`${app.vault.configDir}/plugins/omnisearch/notesCache.data`,
`${app.vault.configDir}/plugins/omnisearch/searchIndex.data`,
`${app.vault.configDir}/plugins/omnisearch/pdfCache.data`,
]
for (const item of toDelete) {
if (await app.vault.adapter.exists(item)) {
@@ -130,3 +152,11 @@ New beta feature: PDF search 🔎📄
plugin.saveData(settings)
}
function _registerAPI(plugin: OmnisearchPlugin): void {
registerAPI('omnisearch', api, plugin as any)
;(app as any).plugins.plugins.omnisearch.api = api
plugin.register(() => {
delete (app as any).plugins.plugins.omnisearch.api
})
}

View File

@@ -1,20 +1,12 @@
import { Notice, TAbstractFile, TFile } from 'obsidian'
import {
extractHeadingsFromCache,
getAliasesFromMetadata,
getTagsFromMetadata,
isFileIndexable,
removeDiacritics,
wait,
} from './utils'
import { getNonExistingNotes, removeAnchors } from './notes'
import { pdfManager } from './pdf-manager'
import { isFileIndexable, wait } from './tools/utils'
import { removeAnchors } from './tools/notes'
import { settings } from './settings'
import * as Search from './search'
// import PQueue from 'p-queue-compat'
import * as Search from './search/search'
import { cacheManager } from './cache-manager'
import pLimit from 'p-limit'
import type { IndexedDocument } from './globals'
import { fileToIndexedDocument } from './file-loader'
/**
* Use this processing queue to handle all heavy work
@@ -33,59 +25,21 @@ export async function addToIndexAndMemCache(
return
}
// Check if the file was already indexed as non-existent,
// and if so, remove it from the index (before adding it again)
if (cacheManager.getNoteFromMemCache(file.path)?.doesNotExist) {
// Check if the file was already indexed as non-existent.
// If so, remove it from the index, and add it again as a real note.
if (cacheManager.getDocument(file.path)?.doesNotExist) {
removeFromIndex(file.path)
}
try {
// Look for links that lead to non-existing files,
// and index them as well
const metadata = app.metadataCache.getFileCache(file)
if (metadata) {
const nonExisting = getNonExistingNotes(file, metadata)
for (const name of nonExisting.filter(
o => !cacheManager.getNoteFromMemCache(o)
)) {
addNonExistingToIndex(name, file.path)
}
}
if (cacheManager.getNoteFromMemCache(file.path)) {
if (cacheManager.getDocument(file.path)) {
throw new Error(`${file.basename} is already indexed`)
}
let content
if (file.path.endsWith('.pdf')) {
content = removeDiacritics(await pdfManager.getPdfText(file as TFile))
} else {
// Fetch content from the cache to index it as-is
content = removeDiacritics(await app.vault.cachedRead(file))
}
// Make the document and index it
const note: IndexedDocument = {
basename: removeDiacritics(file.basename),
content,
path: file.path,
mtime: file.stat.mtime,
tags: getTagsFromMetadata(metadata),
aliases: getAliasesFromMetadata(metadata).join(''),
headings1: metadata
? extractHeadingsFromCache(metadata, 1).join(' ')
: '',
headings2: metadata
? extractHeadingsFromCache(metadata, 2).join(' ')
: '',
headings3: metadata
? extractHeadingsFromCache(metadata, 3).join(' ')
: '',
}
Search.minisearchInstance.add(note)
cacheManager.addNoteToMemCache(note.path, note)
const note = await fileToIndexedDocument(file)
Search.addSingleToMinisearch(note)
await cacheManager.updateDocument(note.path, note)
} catch (e) {
// console.trace('Error while indexing ' + file.basename)
console.error(e)
@@ -101,7 +55,7 @@ export async function addToIndexAndMemCache(
export function addNonExistingToIndex(name: string, parent: string): void {
name = removeAnchors(name)
const filename = name + (name.endsWith('.md') ? '' : '.md')
if (cacheManager.getNoteFromMemCache(filename)) return
if (cacheManager.getDocument(filename)) return
const note: IndexedDocument = {
path: filename,
@@ -118,29 +72,30 @@ export function addNonExistingToIndex(name: string, parent: string): void {
doesNotExist: true,
parent,
}
Search.minisearchInstance.add(note)
cacheManager.addNoteToMemCache(filename, note)
Search.addSingleToMinisearch(note)
cacheManager.updateDocument(filename, note)
}
/**
* Removes a file from the index, by its path
* @param path
* Removes a file from the index, by its path.
*/
export function removeFromIndex(path: string): void {
if (!isFileIndexable(path)) {
console.info(`"${path}" is not an indexable file`)
return
}
const note = cacheManager.getNoteFromMemCache(path)
const note = cacheManager.getDocument(path)
if (note) {
Search.minisearchInstance.remove(note)
cacheManager.removeNoteFromMemCache(path)
cacheManager
.getNonExistingNotesFromMemCache()
.filter(n => n.parent === path)
.forEach(n => {
removeFromIndex(n.path)
})
Search.removeFromMinisearch(note)
cacheManager.deleteDocument(path)
// FIXME: only remove non-existing notes if they don't have another parent
// cacheManager
// .getNonExistingNotesFromMemCache()
// .filter(n => n.parent === path)
// .forEach(n => {
// removeFromIndex(n.path)
// })
} else {
console.warn(`Omnisearch - Note not found under path ${path}`)
}
@@ -148,7 +103,11 @@ export function removeFromIndex(path: string): void {
const notesToReindex = new Set<TAbstractFile>()
export function addNoteToReindex(note: TAbstractFile): void {
/**
* Updated notes are not reindexed immediately for performance reasons.
* They're added to a list, and reindex is done the next time we open Omnisearch.
*/
export function markNoteForReindex(note: TAbstractFile): void {
notesToReindex.add(note)
}
@@ -163,35 +122,5 @@ export async function refreshIndex(): Promise<void> {
await wait(0)
}
notesToReindex.clear()
await cacheManager.writeMinisearchIndex(Search.minisearchInstance)
}
}
export async function indexPDFs() {
if (settings.PDFIndexing) {
const files = app.vault.getFiles().filter(f => f.path.endsWith('.pdf'))
console.time('PDF Indexing')
console.log(`Omnisearch - Indexing ${files.length} PDFs`)
const input = []
for (const file of files) {
if (cacheManager.getNoteFromMemCache(file.path)) {
removeFromIndex(file.path)
}
input.push(
processQueue(async () => {
await addToIndexAndMemCache(file)
await cacheManager.writeMinisearchIndex(Search.minisearchInstance)
})
)
}
await Promise.all(input)
// await pdfQueue.onEmpty()
console.timeEnd('PDF Indexing')
if (settings.showIndexingNotices) {
new Notice(`Omnisearch - Indexed ${files.length} PDFs`)
}
await pdfManager.cleanCache()
}
}

View File

@@ -1,7 +1,7 @@
import type { TFile } from 'obsidian'
import WebWorker from 'web-worker:./pdf-worker.ts'
import { makeMD5 } from './utils'
import { database } from './database'
import { makeMD5 } from '../tools/utils'
import { database } from '../database'
const workerTimeout = 120_000

View File

@@ -1,5 +1,5 @@
import rustPlugin from '../pkg/obsidian_search_bg.wasm'
import * as plugin from '../pkg/obsidian_search'
import rustPlugin from '../../pkg/obsidian_search_bg.wasm'
import * as plugin from '../../pkg'
const decodedPlugin = decodeBase64(rustPlugin as any)

View File

@@ -1,6 +1,6 @@
import { settings } from './settings'
import { removeDiacritics, stripSurroundingQuotes } from './utils'
import { parseQuery } from './vendor/parse-query'
import { settings } from '../settings'
import { removeDiacritics, stripSurroundingQuotes } from '../tools/utils'
import { parseQuery } from '../vendor/parse-query'
type QueryToken = {
/**

View File

@@ -1,4 +1,4 @@
import { historyFilePath } from './globals'
import { historyFilePath } from '../globals'
export let searchHistory: string[] = []

View File

@@ -1,25 +1,25 @@
import { Notice } from 'obsidian'
import MiniSearch, { type Options, type SearchResult } from 'minisearch'
import MiniSearch, {
type AsPlainObject,
type Options,
type SearchResult,
} from 'minisearch'
import {
chsRegex,
type IndexedDocument,
type ResultNote,
minisearchCacheFilePath,
type SearchMatch,
SPACE_OR_PUNCTUATION,
} from './globals'
} from '../globals'
import {
isFilePlaintext,
removeDiacritics,
stringsToRegex,
stripMarkdownCharacters,
} from './utils'
} from '../tools/utils'
import type { Query } from './query'
import { settings } from './settings'
import * as NotesIndex from './notes-index'
import { cacheManager } from './cache-manager'
import { settings } from '../settings'
import { cacheManager } from '../cache-manager'
export let minisearchInstance: MiniSearch<IndexedDocument>
let minisearchInstance: MiniSearch<IndexedDocument>
const tokenize = (text: string): string[] => {
const tokens = text.split(SPACE_OR_PUNCTUATION)
@@ -32,98 +32,38 @@ const tokenize = (text: string): string[] => {
} else return tokens
}
const minisearchOptions: Options<IndexedDocument> = {
tokenize,
processTerm: (term: string) =>
(settings.ignoreDiacritics ? removeDiacritics(term) : term).toLowerCase(),
idField: 'path',
fields: [
'basename',
'aliases',
'content',
'headings1',
'headings2',
'headings3',
],
storeFields: ['tags'],
}
/**
* Initializes the MiniSearch instance,
* and adds all the notes to the index
*/
export async function initGlobalSearchIndex(): Promise<void> {
const options: Options<IndexedDocument> = {
tokenize,
processTerm: (term: string) =>
(settings.ignoreDiacritics ? removeDiacritics(term) : term).toLowerCase(),
idField: 'path',
fields: [
'basename',
'aliases',
'content',
'headings1',
'headings2',
'headings3',
],
storeFields: ['tags'],
}
export async function initSearchEngine(): Promise<void> {
// Default instance
minisearchInstance = new MiniSearch(options)
minisearchInstance = new MiniSearch(minisearchOptions)
}
// Load Minisearch cache, if it exists
if (await app.vault.adapter.exists(minisearchCacheFilePath)) {
try {
const json = await cacheManager.readMinisearchIndex()
if (json) {
// If we have cache data, reload it
minisearchInstance = MiniSearch.loadJSON(json, options)
}
console.log('Omnisearch - MiniSearch index loaded from the file')
} catch (e) {
console.trace(
'Omnisearch - Could not load MiniSearch index from the file'
)
console.error(e)
}
}
// if (!minisearchInstance) {
// resetNotesCache()
// }
// Index files that are already present
const start = new Date().getTime()
const allFiles = app.vault.getFiles().filter(f => isFilePlaintext(f.path))
let files
let notesSuffix
if (settings.persistCache) {
files = allFiles.filter(file => cacheManager.isNoteInMemCacheOutdated(file))
notesSuffix = 'modified notes'
} else {
files = allFiles
notesSuffix = 'notes'
}
if (files.length > 0) {
console.log(`Omnisearch - Indexing ${files.length} ${notesSuffix}`)
}
// Read and index all the files into the search engine
const input = []
for (const file of files) {
if (cacheManager.getNoteFromMemCache(file.path)) {
NotesIndex.removeFromIndex(file.path)
}
input.push(
NotesIndex.processQueue(() => NotesIndex.addToIndexAndMemCache(file))
)
}
await Promise.all(input)
if (files.length > 0) {
const message = `Omnisearch - Indexed ${files.length} ${notesSuffix} in ${
new Date().getTime() - start
}ms`
console.log(message)
if (settings.showIndexingNotices) {
new Notice(message)
}
await cacheManager.writeMinisearchIndex(minisearchInstance)
// PDFs are indexed later, since they're heavier
await NotesIndex.indexPDFs()
export async function initSearchEngineFromData(json: string): Promise<void> {
try {
minisearchInstance = MiniSearch.loadJSON(json, minisearchOptions)
console.log('Omnisearch - MiniSearch index loaded from the file')
} catch (e) {
console.error('Omnisearch - Could not load MiniSearch index from json')
console.error(e)
}
}
@@ -165,10 +105,9 @@ async function search(query: Query): Promise<SearchResult[]> {
const exactTerms = query.getExactTerms()
if (exactTerms.length) {
results = results.filter(r => {
const title =
cacheManager.getNoteFromMemCache(r.id)?.path.toLowerCase() ?? ''
const title = cacheManager.getDocument(r.id)?.path.toLowerCase() ?? ''
const content = stripMarkdownCharacters(
cacheManager.getNoteFromMemCache(r.id)?.content ?? ''
cacheManager.getDocument(r.id)?.content ?? ''
).toLowerCase()
return exactTerms.every(q => content.includes(q) || title.includes(q))
})
@@ -179,7 +118,7 @@ async function search(query: Query): Promise<SearchResult[]> {
if (exclusions.length) {
results = results.filter(r => {
const content = stripMarkdownCharacters(
cacheManager.getNoteFromMemCache(r.id)?.content ?? ''
cacheManager.getDocument(r.id)?.content ?? ''
).toLowerCase()
return exclusions.every(q => !content.includes(q.value))
})
@@ -247,7 +186,7 @@ export async function getSuggestions(
// Map the raw results to get usable suggestions
return results.map(result => {
const note = cacheManager.getNoteFromMemCache(result.id)
const note = cacheManager.getDocument(result.id)
if (!note) {
throw new Error(`Note "${result.id}" not indexed`)
}
@@ -286,3 +225,25 @@ export async function getSuggestions(
return resultNote
})
}
// #region Read/write minisearch index
export function getMinisearchIndexJSON(): AsPlainObject {
return minisearchInstance.toJSON()
}
export async function addAllToMinisearch(
documents: IndexedDocument[]
): Promise<void> {
await minisearchInstance.addAllAsync(documents)
}
export function addSingleToMinisearch(document: IndexedDocument): void {
minisearchInstance.add(document)
}
export function removeFromMinisearch(document: IndexedDocument): void {
minisearchInstance.remove(document)
}
// #endregion

View File

@@ -6,7 +6,6 @@ import {
SliderComponent,
} from 'obsidian'
import { writable } from 'svelte/store'
import { notesCacheFilePath, minisearchCacheFilePath } from './globals'
import type OmnisearchPlugin from './main'
interface WeightingSettings {
@@ -28,7 +27,7 @@ export interface OmnisearchSettings extends WeightingSettings {
/** Max number of spawned processes for background tasks, such as extracting text from PDFs */
backgroundProcesses: number
/** Write cache files on disk (unrelated to PDFs) */
persistCache: boolean
// persistCache: boolean
/** Display Omnisearch popup notices over Obsidian */
showIndexingNotices: boolean
/** Activate the small 🔍 button on Obsidian's ribbon */
@@ -141,37 +140,27 @@ export class SettingsTab extends PluginSettingTab {
// })
// })
// Store index
const serializedIndexDesc = new DocumentFragment()
serializedIndexDesc.createSpan({}, span => {
span.innerHTML = `This will speedup startup times after the initial indexing. Do not activate it unless indexing is too slow on your device:
<ul>
<li>PDF indexing is not affected by this setting</li>
<li>⚠️ The index can become corrupted - if you notice any issue, disable and re-enable this option to clear the cache.</li>
<li>⚠️ Cache files in <code>.obsidian/plugins/omnisearch/*.data</code> must not be synchronized between your devices.</li>
</ul>
<strong style="color: var(--text-accent)">Needs a restart to fully take effect.</strong>
`
})
new Setting(containerEl)
.setName('Persist cache on disk')
.setDesc(serializedIndexDesc)
.addToggle(toggle =>
toggle.setValue(settings.persistCache).onChange(async v => {
try {
await app.vault.adapter.remove(notesCacheFilePath)
} catch (e) {
console.warn(e)
}
try {
await app.vault.adapter.remove(minisearchCacheFilePath)
} catch (e) {
console.warn(e)
}
settings.persistCache = v
await saveSettings(this.plugin)
})
)
// // Store index
// const serializedIndexDesc = new DocumentFragment()
// serializedIndexDesc.createSpan({}, span => {
// span.innerHTML = `This will speedup startup times after the initial indexing. Do not activate it unless indexing is too slow on your device:
// <ul>
// <li>PDF indexing is not affected by this setting</li>
// <li>⚠️ The index can become corrupted - if you notice any issue, disable and re-enable this option to clear the cache.</li>
// <li>⚠️ Cache files in <code>.obsidian/plugins/omnisearch/*.data</code> must not be synchronized between your devices.</li>
// </ul>
// <strong style="color: var(--text-accent)">Needs a restart to fully take effect.</strong>
// `
// })
// new Setting(containerEl)
// .setName('Persist cache on disk')
// .setDesc(serializedIndexDesc)
// .addToggle(toggle =>
// toggle.setValue(settings.persistCache).onChange(async v => {
// settings.persistCache = v
// await saveSettings(this.plugin)
// })
// )
// PDF Indexing
const indexPDFsDesc = new DocumentFragment()
@@ -363,7 +352,7 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = {
CtrlJK: false,
CtrlNP: false,
persistCache: false,
// persistCache: false,
welcomeMessage: '',
} as const

View File

@@ -1,6 +1,6 @@
import type { ResultNote, SearchMatch } from './globals'
import { Query } from './query'
import * as Search from './search'
import type { ResultNote, SearchMatch } from '../globals'
import { Query } from '../search/query'
import * as Search from '../search/search'
type ResultNoteApi = {
score: number

View File

@@ -1,6 +1,6 @@
import { type CachedMetadata, MarkdownView, TFile } from 'obsidian'
import { stringsToRegex } from './utils'
import type { ResultNote } from './globals'
import type { ResultNote } from '../globals'
export async function openNote(
item: ResultNote,

View File

@@ -1,5 +1,5 @@
import { type CachedMetadata, Notice, Platform, Plugin } from 'obsidian'
import type { SearchMatch } from './globals'
import { type CachedMetadata, Platform } from 'obsidian'
import type { SearchMatch } from '../globals'
import {
excerptAfter,
excerptBefore,
@@ -8,9 +8,9 @@ import {
regexLineSplit,
regexStripQuotes,
regexYaml,
} from './globals'
import { settings } from './settings'
import { createHash, type BinaryLike } from 'crypto'
} from '../globals'
import { settings } from '../settings'
import { type BinaryLike, createHash } from 'crypto'
import { md5 } from 'pure-md5'
export function highlighter(str: string): string {