Squashed commit of the following:
commit 603b9bbde4c6efc90c81032e4e765c64d3075e75 Author: Simon Cambier <simon.cambier@protonmail.com> Date: Tue Oct 11 21:47:03 2022 +0200 Basic PDF indexing ok commit 200331bb5c5111493af1e1f6ef8cd4bbfbdbfd4f Author: Simon Cambier <simon.cambier@protonmail.com> Date: Tue Oct 11 20:56:44 2022 +0200 Tweaks and comments commit 434b9662d40c5fea9d8b28d43828b11916db8c94 Author: Simon Cambier <simon.cambier@ores.be> Date: Tue Oct 11 16:22:55 2022 +0200 Refactoring notes & minisearch cache commit 7253c676c8ed161782ba8e33f0c4c162880925ad Author: Simon Cambier <simon.cambier@protonmail.com> Date: Tue Oct 11 09:50:33 2022 +0200 wip commit 77736e6ef6f28ccfddb64fb768732927d43bbd77 Author: Simon Cambier <simon.cambier@protonmail.com> Date: Mon Oct 10 20:49:02 2022 +0200 Small rewrites & deps updates commit 59845fdb89eb6a3ad3f3f9ad75b39e7a3e604c45 Author: Simon Cambier <simon.cambier@protonmail.com> Date: Mon Oct 10 12:22:11 2022 +0200 wasm + worker ok commit 1cf3b506e56147586cd0ebcc003642c5230e04cc Author: Simon Cambier <simon.cambier@protonmail.com> Date: Sun Oct 2 20:04:49 2022 +0200 no disk access, of course commit eb3dd9dd4f616a479a53e10856f6c96c6725e911 Author: Simon Cambier <simon.cambier@protonmail.com> Date: Sun Oct 2 19:08:48 2022 +0200 Rollup build ok commit 54f2b7e615456c0e1b1504691689d1ba2c72d9e8 Author: Simon Cambier <simon.cambier@protonmail.com> Date: Sun Oct 2 16:03:31 2022 +0200 Rollup build + wasm PoC
This commit is contained in:
@@ -1,6 +1,5 @@
|
||||
import {Notice, TAbstractFile, TFile} from 'obsidian'
|
||||
import { Notice, TAbstractFile, TFile } from 'obsidian'
|
||||
import {
|
||||
canIndexPDFs,
|
||||
extractHeadingsFromCache,
|
||||
getAliasesFromMetadata,
|
||||
getTagsFromMetadata,
|
||||
@@ -9,36 +8,33 @@ import {
|
||||
removeDiacritics,
|
||||
wait,
|
||||
} from './utils'
|
||||
import {
|
||||
addNoteToCache,
|
||||
getNonExistingNotes,
|
||||
getNonExistingNotesFromCache,
|
||||
getNoteFromCache,
|
||||
removeAnchors,
|
||||
removeNoteFromCache,
|
||||
saveNotesCacheToFile,
|
||||
} from './notes'
|
||||
import {getPdfText} from './pdf-parser'
|
||||
import type {IndexedNote} from './globals'
|
||||
import {searchIndexFilePath} from './globals'
|
||||
import {settings} from './settings'
|
||||
import {minisearchInstance} from './search'
|
||||
import { getNonExistingNotes, removeAnchors } from './notes'
|
||||
import * as PDF from './pdf-manager'
|
||||
import type { IndexedNote } from './globals'
|
||||
import { settings } from './settings'
|
||||
import * as Search from './search'
|
||||
import PQueue from 'p-queue-compat'
|
||||
import { cacheManager } from './cache-manager'
|
||||
|
||||
let isIndexChanged: boolean
|
||||
|
||||
export const pdfQueue = new PQueue({
|
||||
concurrency: settings.backgroundProcesses,
|
||||
})
|
||||
|
||||
/**
|
||||
* Adds a file to the index
|
||||
* @param file
|
||||
* @returns
|
||||
*/
|
||||
export async function addToIndex(file: TAbstractFile): Promise<void> {
|
||||
export async function addToIndexAndCache(file: TAbstractFile): Promise<void> {
|
||||
if (!(file instanceof TFile) || !isFileIndexable(file.path)) {
|
||||
return
|
||||
}
|
||||
|
||||
// Check if the file was already indexed as non-existent,
|
||||
// and if so, remove it from the index (before adding it again)
|
||||
if (getNoteFromCache(file.path)?.doesNotExist) {
|
||||
if (cacheManager.getNoteFromCache(file.path)?.doesNotExist) {
|
||||
removeFromIndex(file.path)
|
||||
}
|
||||
|
||||
@@ -50,18 +46,20 @@ export async function addToIndex(file: TAbstractFile): Promise<void> {
|
||||
const metadata = app.metadataCache.getFileCache(file)
|
||||
if (metadata) {
|
||||
const nonExisting = getNonExistingNotes(file, metadata)
|
||||
for (const name of nonExisting.filter(o => !getNoteFromCache(o))) {
|
||||
for (const name of nonExisting.filter(
|
||||
o => !cacheManager.getNoteFromCache(o)
|
||||
)) {
|
||||
addNonExistingToIndex(name, file.path)
|
||||
}
|
||||
}
|
||||
|
||||
if (getNoteFromCache(file.path)) {
|
||||
if (cacheManager.getNoteFromCache(file.path)) {
|
||||
throw new Error(`${file.basename} is already indexed`)
|
||||
}
|
||||
|
||||
let content
|
||||
if (file.path.endsWith('.pdf')) {
|
||||
content = removeDiacritics(await getPdfText(file as TFile))
|
||||
content = removeDiacritics(await PDF.pdfManager.getPdfText(file as TFile))
|
||||
} else {
|
||||
// Fetch content from the cache to index it as-is
|
||||
content = removeDiacritics(await app.vault.cachedRead(file))
|
||||
@@ -87,9 +85,9 @@ export async function addToIndex(file: TAbstractFile): Promise<void> {
|
||||
: '',
|
||||
}
|
||||
|
||||
minisearchInstance.add(note)
|
||||
Search.minisearchInstance.add(note)
|
||||
isIndexChanged = true
|
||||
addNoteToCache(note.path, note)
|
||||
cacheManager.addNoteToCache(note.path, note)
|
||||
} catch (e) {
|
||||
console.trace('Error while indexing ' + file.basename)
|
||||
console.error(e)
|
||||
@@ -105,7 +103,7 @@ export async function addToIndex(file: TAbstractFile): Promise<void> {
|
||||
export function addNonExistingToIndex(name: string, parent: string): void {
|
||||
name = removeAnchors(name)
|
||||
const filename = name + (name.endsWith('.md') ? '' : '.md')
|
||||
if (getNoteFromCache(filename)) return
|
||||
if (cacheManager.getNoteFromCache(filename)) return
|
||||
|
||||
const note = {
|
||||
path: filename,
|
||||
@@ -121,9 +119,9 @@ export function addNonExistingToIndex(name: string, parent: string): void {
|
||||
doesNotExist: true,
|
||||
parent,
|
||||
} as IndexedNote
|
||||
minisearchInstance.add(note)
|
||||
Search.minisearchInstance.add(note)
|
||||
isIndexChanged = true
|
||||
addNoteToCache(filename, note)
|
||||
cacheManager.addNoteToCache(filename, note)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -135,18 +133,19 @@ export function removeFromIndex(path: string): void {
|
||||
console.info(`"${path}" is not an indexable file`)
|
||||
return
|
||||
}
|
||||
const note = getNoteFromCache(path)
|
||||
const note = cacheManager.getNoteFromCache(path)
|
||||
if (note) {
|
||||
minisearchInstance.remove(note)
|
||||
Search.minisearchInstance.remove(note)
|
||||
isIndexChanged = true
|
||||
removeNoteFromCache(path)
|
||||
getNonExistingNotesFromCache()
|
||||
cacheManager.removeNoteFromCache(path)
|
||||
cacheManager
|
||||
.getNonExistingNotesFromCache()
|
||||
.filter(n => n.parent === path)
|
||||
.forEach(n => {
|
||||
removeFromIndex(n.path)
|
||||
})
|
||||
} else {
|
||||
console.warn(`not not found under path ${path}`)
|
||||
console.warn(`Omnisearch - Note not found under path ${path}`)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -157,54 +156,40 @@ export function addNoteToReindex(note: TAbstractFile): void {
|
||||
}
|
||||
|
||||
export async function refreshIndex(): Promise<void> {
|
||||
if (settings.showIndexingNotices && notesToReindex.size > 0) {
|
||||
new Notice(`Omnisearch - Reindexing ${notesToReindex.size} notes`, 2000)
|
||||
}
|
||||
for (const note of notesToReindex) {
|
||||
removeFromIndex(note.path)
|
||||
await addToIndex(note)
|
||||
await wait(0)
|
||||
}
|
||||
notesToReindex.clear()
|
||||
|
||||
await saveIndexToFile()
|
||||
}
|
||||
|
||||
export async function saveIndexToFile(): Promise<void> {
|
||||
if (settings.storeIndexInFile && minisearchInstance && isIndexChanged) {
|
||||
const json = JSON.stringify(minisearchInstance)
|
||||
await app.vault.adapter.write(searchIndexFilePath, json)
|
||||
console.log('Omnisearch - Index saved on disk')
|
||||
|
||||
await saveNotesCacheToFile()
|
||||
isIndexChanged = false
|
||||
if (notesToReindex.size > 0) {
|
||||
if (settings.showIndexingNotices) {
|
||||
new Notice(`Omnisearch - Reindexing ${notesToReindex.size} notes`, 2000)
|
||||
}
|
||||
for (const note of notesToReindex) {
|
||||
removeFromIndex(note.path)
|
||||
await addToIndexAndCache(note)
|
||||
await wait(0)
|
||||
}
|
||||
notesToReindex.clear()
|
||||
await cacheManager.writeMinisearchIndex(Search.minisearchInstance)
|
||||
}
|
||||
}
|
||||
|
||||
export async function indexPDFs() {
|
||||
if (canIndexPDFs()) {
|
||||
const start = new Date().getTime()
|
||||
if (settings.PDFIndexing) {
|
||||
const files = app.vault.getFiles().filter(f => f.path.endsWith('.pdf'))
|
||||
if (files.length > 50) {
|
||||
new Notice(`⚠️ Omnisearch is indexing ${files.length} PDFs. You can experience slowdowns while this work is in progress.`)
|
||||
}
|
||||
|
||||
const promises: Promise<void>[] = []
|
||||
console.time('PDF Indexing')
|
||||
console.log(`Omnisearch - Indexing ${files.length} PDFs`)
|
||||
for (const file of files) {
|
||||
if (getNoteFromCache(file.path)) {
|
||||
if (cacheManager.getNoteFromCache(file.path)) {
|
||||
removeFromIndex(file.path)
|
||||
}
|
||||
promises.push(addToIndex(file))
|
||||
pdfQueue.add(async () => {
|
||||
await addToIndexAndCache(file)
|
||||
await cacheManager.writeMinisearchIndex(Search.minisearchInstance)
|
||||
})
|
||||
}
|
||||
await Promise.all(promises)
|
||||
|
||||
// Notice & log
|
||||
const message = `Omnisearch - Indexed ${files.length} PDFs in ${
|
||||
new Date().getTime() - start
|
||||
}ms`
|
||||
await pdfQueue.onEmpty()
|
||||
console.timeEnd('PDF Indexing')
|
||||
|
||||
if (settings.showIndexingNotices) {
|
||||
new Notice(message)
|
||||
new Notice(`Omnisearch - Indexed ${files.length} PDFs`)
|
||||
}
|
||||
console.log(message)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user