Embedding PDF.js in Omnisearch to avoid crashes
This commit is contained in:
@@ -37,7 +37,8 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"@vanakat/plugin-api": "^0.1.0",
|
||||
"minisearch": "^5.0.0"
|
||||
"minisearch": "^5.0.0",
|
||||
"pdfjs-dist": "^2.16.105"
|
||||
},
|
||||
"pnpm": {
|
||||
"overrides": {
|
||||
|
||||
23
pnpm-lock.yaml
generated
23
pnpm-lock.yaml
generated
@@ -19,6 +19,7 @@ specifiers:
|
||||
jest: ^27.5.1
|
||||
minisearch: ^5.0.0
|
||||
obsidian: latest
|
||||
pdfjs-dist: ^2.16.105
|
||||
prettier: ^2.7.1
|
||||
prettier-plugin-svelte: ^2.7.0
|
||||
svelte: ^3.50.1
|
||||
@@ -30,6 +31,7 @@ specifiers:
|
||||
dependencies:
|
||||
'@vanakat/plugin-api': 0.1.0
|
||||
minisearch: 5.0.0
|
||||
pdfjs-dist: 2.16.105
|
||||
|
||||
devDependencies:
|
||||
'@babel/preset-env': 7.19.0
|
||||
@@ -2436,6 +2438,10 @@ packages:
|
||||
webidl-conversions: 5.0.0
|
||||
dev: true
|
||||
|
||||
/dommatrix/1.0.3:
|
||||
resolution: {integrity: sha512-l32Xp/TLgWb8ReqbVJAFIvXmY7go4nTxxlWiAFyhoQw9RKEOHBZNnyGvJWqDVSPmq3Y9HlM4npqF/T6VMOXhww==}
|
||||
dev: false
|
||||
|
||||
/electron-to-chromium/1.4.247:
|
||||
resolution: {integrity: sha512-FLs6R4FQE+1JHM0hh3sfdxnYjKvJpHZyhQDjc2qFq/xFvmmRt/TATNToZhrcGUFzpF2XjeiuozrA8lI0PZmYYw==}
|
||||
dev: true
|
||||
@@ -3920,6 +3926,18 @@ packages:
|
||||
resolution: {integrity: sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw==}
|
||||
engines: {node: '>=8'}
|
||||
|
||||
/pdfjs-dist/2.16.105:
|
||||
resolution: {integrity: sha512-J4dn41spsAwUxCpEoVf6GVoz908IAA3mYiLmNxg8J9kfRXc2jxpbUepcP0ocp0alVNLFthTAM8DZ1RaHh8sU0A==}
|
||||
peerDependencies:
|
||||
worker-loader: ^3.0.8
|
||||
peerDependenciesMeta:
|
||||
worker-loader:
|
||||
optional: true
|
||||
dependencies:
|
||||
dommatrix: 1.0.3
|
||||
web-streams-polyfill: 3.2.1
|
||||
dev: false
|
||||
|
||||
/picocolors/1.0.0:
|
||||
resolution: {integrity: sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ==}
|
||||
dev: true
|
||||
@@ -4559,6 +4577,11 @@ packages:
|
||||
makeerror: 1.0.12
|
||||
dev: true
|
||||
|
||||
/web-streams-polyfill/3.2.1:
|
||||
resolution: {integrity: sha512-e0MO3wdXWKrLbL0DgGnUV7WHVuw9OUvL4hjgnPkIeEvESk74gAITi5G606JtZPp39cd8HA9VQzCIvA49LpPN5Q==}
|
||||
engines: {node: '>= 8'}
|
||||
dev: false
|
||||
|
||||
/webidl-conversions/5.0.0:
|
||||
resolution: {integrity: sha512-VlZwKPCkYKxQgeSbH5EyngOmRp7Ww7I9rQLERETtf5ofd9pGeswWiOtogpEO850jziPRarreGxn5QIiTqpb2wA==}
|
||||
engines: {node: '>=8'}
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
import { Notice, TAbstractFile, TFile } from 'obsidian'
|
||||
import {Notice, TAbstractFile, TFile} from 'obsidian'
|
||||
import {
|
||||
canIndexPDFs,
|
||||
extractHeadingsFromCache,
|
||||
getAliasesFromMetadata,
|
||||
getTagsFromMetadata, isFileIndexable,
|
||||
getTagsFromMetadata,
|
||||
isFileIndexable,
|
||||
isFilePlaintext,
|
||||
removeDiacritics,
|
||||
wait,
|
||||
@@ -16,11 +18,11 @@ import {
|
||||
removeNoteFromCache,
|
||||
saveNotesCacheToFile,
|
||||
} from './notes'
|
||||
import { getPdfText } from './pdf-parser'
|
||||
import type { IndexedNote } from './globals'
|
||||
import { searchIndexFilePath } from './globals'
|
||||
import { settings } from './settings'
|
||||
import { minisearchInstance } from './search'
|
||||
import {getPdfText} from './pdf-parser'
|
||||
import type {IndexedNote} from './globals'
|
||||
import {searchIndexFilePath} from './globals'
|
||||
import {settings} from './settings'
|
||||
import {minisearchInstance} from './search'
|
||||
|
||||
let isIndexChanged: boolean
|
||||
|
||||
@@ -178,3 +180,31 @@ export async function saveIndexToFile(): Promise<void> {
|
||||
isIndexChanged = false
|
||||
}
|
||||
}
|
||||
|
||||
export async function indexPDFs() {
|
||||
if (canIndexPDFs()) {
|
||||
const start = new Date().getTime()
|
||||
const files = app.vault.getFiles().filter(f => f.path.endsWith('.pdf'))
|
||||
if (files.length > 50) {
|
||||
new Notice(`⚠️ Omnisearch is indexing ${files.length} PDFs. You can experience slowdowns while this work is in progress.`)
|
||||
}
|
||||
|
||||
const promises: Promise<void>[] = []
|
||||
for (const file of files) {
|
||||
if (getNoteFromCache(file.path)) {
|
||||
removeFromIndex(file.path)
|
||||
}
|
||||
promises.push(addToIndex(file))
|
||||
}
|
||||
await Promise.all(promises)
|
||||
|
||||
// Notice & log
|
||||
const message = `Omnisearch - Indexed ${files.length} PDFs in ${
|
||||
new Date().getTime() - start
|
||||
}ms`
|
||||
if (settings.showIndexingNotices) {
|
||||
new Notice(message)
|
||||
}
|
||||
console.log(message)
|
||||
}
|
||||
}
|
||||
@@ -1,11 +1,11 @@
|
||||
import type { TFile } from 'obsidian'
|
||||
import { loadPdfJs } from 'obsidian'
|
||||
import PDFJs from 'pdfjs-dist'
|
||||
import pdfjsWorker from 'pdfjs-dist/build/pdf.worker.entry'
|
||||
|
||||
let PDFJs: any = null
|
||||
PDFJs.GlobalWorkerOptions.workerSrc = pdfjsWorker
|
||||
|
||||
// https://stackoverflow.com/a/59929946
|
||||
export async function getPdfText(file: TFile): Promise<string> {
|
||||
PDFJs = PDFJs ?? (await loadPdfJs())
|
||||
const data = await app.vault.readBinary(file)
|
||||
const doc = await PDFJs.getDocument(data).promise
|
||||
const pageTexts = Array.from({ length: doc.numPages }, async (v, i) => {
|
||||
|
||||
@@ -24,7 +24,7 @@ import {
|
||||
loadNotesCache,
|
||||
resetNotesCache,
|
||||
} from './notes'
|
||||
import { addToIndex, removeFromIndex, saveIndexToFile } from './notes-index'
|
||||
import {addToIndex, indexPDFs, removeFromIndex, saveIndexToFile} from './notes-index'
|
||||
|
||||
export let minisearchInstance: MiniSearch<IndexedNote>
|
||||
|
||||
@@ -134,38 +134,6 @@ export async function initGlobalSearchIndex(): Promise<void> {
|
||||
}
|
||||
}
|
||||
|
||||
async function indexPDFs() {
|
||||
if (canIndexPDFs()) {
|
||||
const start = new Date().getTime()
|
||||
console.warn(
|
||||
"Omnisearch - Warnings on 'pdf.worker.min' are due to some issues while reading PDFs file and can usually be ignored."
|
||||
)
|
||||
const files = app.vault.getFiles().filter(f => f.path.endsWith('.pdf'))
|
||||
let promises: Promise<void>[] = []
|
||||
for (const [i, file] of files.entries()) {
|
||||
if (getNoteFromCache(file.path)) {
|
||||
removeFromIndex(file.path)
|
||||
}
|
||||
promises.push(addToIndex(file))
|
||||
if (i % 10 === 0) {
|
||||
await wait(1)
|
||||
await Promise.all(promises)
|
||||
promises = []
|
||||
}
|
||||
}
|
||||
await Promise.all(promises)
|
||||
|
||||
// Notice & log
|
||||
const message = `Omnisearch - Indexed ${files.length} PDFs in ${
|
||||
new Date().getTime() - start
|
||||
}ms`
|
||||
if (settings.showIndexingNotices) {
|
||||
new Notice(message)
|
||||
}
|
||||
console.log(message)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Searches the index for the given query,
|
||||
* and returns an array of raw results
|
||||
|
||||
@@ -106,23 +106,23 @@ export class SettingsTab extends PluginSettingTab {
|
||||
})
|
||||
})
|
||||
|
||||
// // Index PDFs
|
||||
// const indexPDFsDesc = new DocumentFragment()
|
||||
// indexPDFsDesc.createSpan({}, span => {
|
||||
// span.innerHTML = `Omnisearch will index your PDFs, and return them in search results.
|
||||
// This feature is currently a work-in-progress, please report slowdowns or issues that you might experience.<br>
|
||||
// PDFs being quite slow to index, <strong style="color: var(--text-accent)">it is strongly recommended to also enable "Store index in file"</strong>.<br>
|
||||
// <strong>Needs a restart to fully take effect.</strong>`
|
||||
// })
|
||||
// new Setting(containerEl)
|
||||
// .setName('BETA - Index PDFs')
|
||||
// .setDesc(indexPDFsDesc)
|
||||
// .addToggle(toggle =>
|
||||
// toggle.setValue(settings.indexPDFs).onChange(async v => {
|
||||
// settings.indexPDFs = v
|
||||
// await saveSettings(this.plugin)
|
||||
// })
|
||||
// )
|
||||
// Index PDFs
|
||||
const indexPDFsDesc = new DocumentFragment()
|
||||
indexPDFsDesc.createSpan({}, span => {
|
||||
span.innerHTML = `Omnisearch will index your PDFs, and return them in search results.
|
||||
This feature is currently a work-in-progress, please report slowdowns or issues that you might experience.<br>
|
||||
PDFs being quite slow to index, <strong style="color: var(--text-accent)">it is strongly recommended to also enable "Store index in file"</strong>.<br>
|
||||
<strong>Needs a restart to fully take effect.</strong>`
|
||||
})
|
||||
new Setting(containerEl)
|
||||
.setName('BETA - Index PDFs')
|
||||
.setDesc(indexPDFsDesc)
|
||||
.addToggle(toggle =>
|
||||
toggle.setValue(settings.indexPDFs).onChange(async v => {
|
||||
settings.indexPDFs = v
|
||||
await saveSettings(this.plugin)
|
||||
})
|
||||
)
|
||||
|
||||
// Store index
|
||||
const serializedIndexDesc = new DocumentFragment()
|
||||
|
||||
@@ -173,7 +173,7 @@ export function getCtrlKeyLabel(): 'ctrl' | '⌘' {
|
||||
}
|
||||
|
||||
export function canIndexPDFs(): boolean {
|
||||
return false
|
||||
return settings.indexPDFs
|
||||
}
|
||||
|
||||
export function isFileIndexable(path: string): boolean {
|
||||
|
||||
Reference in New Issue
Block a user