Squashed commit of the following:

commit 603b9bbde4c6efc90c81032e4e765c64d3075e75
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Tue Oct 11 21:47:03 2022 +0200

    Basic PDF indexing ok

commit 200331bb5c5111493af1e1f6ef8cd4bbfbdbfd4f
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Tue Oct 11 20:56:44 2022 +0200

    Tweaks and comments

commit 434b9662d40c5fea9d8b28d43828b11916db8c94
Author: Simon Cambier <simon.cambier@ores.be>
Date:   Tue Oct 11 16:22:55 2022 +0200

    Refactoring notes & minisearch cache

commit 7253c676c8ed161782ba8e33f0c4c162880925ad
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Tue Oct 11 09:50:33 2022 +0200

    wip

commit 77736e6ef6f28ccfddb64fb768732927d43bbd77
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Mon Oct 10 20:49:02 2022 +0200

    Small rewrites & deps updates

commit 59845fdb89eb6a3ad3f3f9ad75b39e7a3e604c45
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Mon Oct 10 12:22:11 2022 +0200

    wasm + worker ok

commit 1cf3b506e56147586cd0ebcc003642c5230e04cc
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Sun Oct 2 20:04:49 2022 +0200

    no disk access, of course

commit eb3dd9dd4f616a479a53e10856f6c96c6725e911
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Sun Oct 2 19:08:48 2022 +0200

    Rollup build ok

commit 54f2b7e615456c0e1b1504691689d1ba2c72d9e8
Author: Simon Cambier <simon.cambier@protonmail.com>
Date:   Sun Oct 2 16:03:31 2022 +0200

    Rollup build + wasm PoC
This commit is contained in:
Simon Cambier
2022-10-11 21:54:11 +02:00
parent cf7f6af257
commit 7ddae6dc08
28 changed files with 18437 additions and 923 deletions

View File

@@ -1,6 +1,5 @@
import {Notice, TAbstractFile, TFile} from 'obsidian'
import { Notice, TAbstractFile, TFile } from 'obsidian'
import {
canIndexPDFs,
extractHeadingsFromCache,
getAliasesFromMetadata,
getTagsFromMetadata,
@@ -9,36 +8,33 @@ import {
removeDiacritics,
wait,
} from './utils'
import {
addNoteToCache,
getNonExistingNotes,
getNonExistingNotesFromCache,
getNoteFromCache,
removeAnchors,
removeNoteFromCache,
saveNotesCacheToFile,
} from './notes'
import {getPdfText} from './pdf-parser'
import type {IndexedNote} from './globals'
import {searchIndexFilePath} from './globals'
import {settings} from './settings'
import {minisearchInstance} from './search'
import { getNonExistingNotes, removeAnchors } from './notes'
import * as PDF from './pdf-manager'
import type { IndexedNote } from './globals'
import { settings } from './settings'
import * as Search from './search'
import PQueue from 'p-queue-compat'
import { cacheManager } from './cache-manager'
let isIndexChanged: boolean
export const pdfQueue = new PQueue({
concurrency: settings.backgroundProcesses,
})
/**
* Adds a file to the index
* @param file
* @returns
*/
export async function addToIndex(file: TAbstractFile): Promise<void> {
export async function addToIndexAndCache(file: TAbstractFile): Promise<void> {
if (!(file instanceof TFile) || !isFileIndexable(file.path)) {
return
}
// Check if the file was already indexed as non-existent,
// and if so, remove it from the index (before adding it again)
if (getNoteFromCache(file.path)?.doesNotExist) {
if (cacheManager.getNoteFromCache(file.path)?.doesNotExist) {
removeFromIndex(file.path)
}
@@ -50,18 +46,20 @@ export async function addToIndex(file: TAbstractFile): Promise<void> {
const metadata = app.metadataCache.getFileCache(file)
if (metadata) {
const nonExisting = getNonExistingNotes(file, metadata)
for (const name of nonExisting.filter(o => !getNoteFromCache(o))) {
for (const name of nonExisting.filter(
o => !cacheManager.getNoteFromCache(o)
)) {
addNonExistingToIndex(name, file.path)
}
}
if (getNoteFromCache(file.path)) {
if (cacheManager.getNoteFromCache(file.path)) {
throw new Error(`${file.basename} is already indexed`)
}
let content
if (file.path.endsWith('.pdf')) {
content = removeDiacritics(await getPdfText(file as TFile))
content = removeDiacritics(await PDF.pdfManager.getPdfText(file as TFile))
} else {
// Fetch content from the cache to index it as-is
content = removeDiacritics(await app.vault.cachedRead(file))
@@ -87,9 +85,9 @@ export async function addToIndex(file: TAbstractFile): Promise<void> {
: '',
}
minisearchInstance.add(note)
Search.minisearchInstance.add(note)
isIndexChanged = true
addNoteToCache(note.path, note)
cacheManager.addNoteToCache(note.path, note)
} catch (e) {
console.trace('Error while indexing ' + file.basename)
console.error(e)
@@ -105,7 +103,7 @@ export async function addToIndex(file: TAbstractFile): Promise<void> {
export function addNonExistingToIndex(name: string, parent: string): void {
name = removeAnchors(name)
const filename = name + (name.endsWith('.md') ? '' : '.md')
if (getNoteFromCache(filename)) return
if (cacheManager.getNoteFromCache(filename)) return
const note = {
path: filename,
@@ -121,9 +119,9 @@ export function addNonExistingToIndex(name: string, parent: string): void {
doesNotExist: true,
parent,
} as IndexedNote
minisearchInstance.add(note)
Search.minisearchInstance.add(note)
isIndexChanged = true
addNoteToCache(filename, note)
cacheManager.addNoteToCache(filename, note)
}
/**
@@ -135,18 +133,19 @@ export function removeFromIndex(path: string): void {
console.info(`"${path}" is not an indexable file`)
return
}
const note = getNoteFromCache(path)
const note = cacheManager.getNoteFromCache(path)
if (note) {
minisearchInstance.remove(note)
Search.minisearchInstance.remove(note)
isIndexChanged = true
removeNoteFromCache(path)
getNonExistingNotesFromCache()
cacheManager.removeNoteFromCache(path)
cacheManager
.getNonExistingNotesFromCache()
.filter(n => n.parent === path)
.forEach(n => {
removeFromIndex(n.path)
})
} else {
console.warn(`not not found under path ${path}`)
console.warn(`Omnisearch - Note not found under path ${path}`)
}
}
@@ -157,54 +156,40 @@ export function addNoteToReindex(note: TAbstractFile): void {
}
export async function refreshIndex(): Promise<void> {
if (settings.showIndexingNotices && notesToReindex.size > 0) {
new Notice(`Omnisearch - Reindexing ${notesToReindex.size} notes`, 2000)
}
for (const note of notesToReindex) {
removeFromIndex(note.path)
await addToIndex(note)
await wait(0)
}
notesToReindex.clear()
await saveIndexToFile()
}
export async function saveIndexToFile(): Promise<void> {
if (settings.storeIndexInFile && minisearchInstance && isIndexChanged) {
const json = JSON.stringify(minisearchInstance)
await app.vault.adapter.write(searchIndexFilePath, json)
console.log('Omnisearch - Index saved on disk')
await saveNotesCacheToFile()
isIndexChanged = false
if (notesToReindex.size > 0) {
if (settings.showIndexingNotices) {
new Notice(`Omnisearch - Reindexing ${notesToReindex.size} notes`, 2000)
}
for (const note of notesToReindex) {
removeFromIndex(note.path)
await addToIndexAndCache(note)
await wait(0)
}
notesToReindex.clear()
await cacheManager.writeMinisearchIndex(Search.minisearchInstance)
}
}
export async function indexPDFs() {
if (canIndexPDFs()) {
const start = new Date().getTime()
if (settings.PDFIndexing) {
const files = app.vault.getFiles().filter(f => f.path.endsWith('.pdf'))
if (files.length > 50) {
new Notice(`⚠️ Omnisearch is indexing ${files.length} PDFs. You can experience slowdowns while this work is in progress.`)
}
const promises: Promise<void>[] = []
console.time('PDF Indexing')
console.log(`Omnisearch - Indexing ${files.length} PDFs`)
for (const file of files) {
if (getNoteFromCache(file.path)) {
if (cacheManager.getNoteFromCache(file.path)) {
removeFromIndex(file.path)
}
promises.push(addToIndex(file))
pdfQueue.add(async () => {
await addToIndexAndCache(file)
await cacheManager.writeMinisearchIndex(Search.minisearchInstance)
})
}
await Promise.all(promises)
// Notice & log
const message = `Omnisearch - Indexed ${files.length} PDFs in ${
new Date().getTime() - start
}ms`
await pdfQueue.onEmpty()
console.timeEnd('PDF Indexing')
if (settings.showIndexingNotices) {
new Notice(message)
new Notice(`Omnisearch - Indexed ${files.length} PDFs`)
}
console.log(message)
}
}
}