Embedding PDF.js in Omnisearch to avoid crashes
This commit is contained in:
@@ -37,7 +37,8 @@
|
|||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@vanakat/plugin-api": "^0.1.0",
|
"@vanakat/plugin-api": "^0.1.0",
|
||||||
"minisearch": "^5.0.0"
|
"minisearch": "^5.0.0",
|
||||||
|
"pdfjs-dist": "^2.16.105"
|
||||||
},
|
},
|
||||||
"pnpm": {
|
"pnpm": {
|
||||||
"overrides": {
|
"overrides": {
|
||||||
|
|||||||
23
pnpm-lock.yaml
generated
23
pnpm-lock.yaml
generated
@@ -19,6 +19,7 @@ specifiers:
|
|||||||
jest: ^27.5.1
|
jest: ^27.5.1
|
||||||
minisearch: ^5.0.0
|
minisearch: ^5.0.0
|
||||||
obsidian: latest
|
obsidian: latest
|
||||||
|
pdfjs-dist: ^2.16.105
|
||||||
prettier: ^2.7.1
|
prettier: ^2.7.1
|
||||||
prettier-plugin-svelte: ^2.7.0
|
prettier-plugin-svelte: ^2.7.0
|
||||||
svelte: ^3.50.1
|
svelte: ^3.50.1
|
||||||
@@ -30,6 +31,7 @@ specifiers:
|
|||||||
dependencies:
|
dependencies:
|
||||||
'@vanakat/plugin-api': 0.1.0
|
'@vanakat/plugin-api': 0.1.0
|
||||||
minisearch: 5.0.0
|
minisearch: 5.0.0
|
||||||
|
pdfjs-dist: 2.16.105
|
||||||
|
|
||||||
devDependencies:
|
devDependencies:
|
||||||
'@babel/preset-env': 7.19.0
|
'@babel/preset-env': 7.19.0
|
||||||
@@ -2436,6 +2438,10 @@ packages:
|
|||||||
webidl-conversions: 5.0.0
|
webidl-conversions: 5.0.0
|
||||||
dev: true
|
dev: true
|
||||||
|
|
||||||
|
/dommatrix/1.0.3:
|
||||||
|
resolution: {integrity: sha512-l32Xp/TLgWb8ReqbVJAFIvXmY7go4nTxxlWiAFyhoQw9RKEOHBZNnyGvJWqDVSPmq3Y9HlM4npqF/T6VMOXhww==}
|
||||||
|
dev: false
|
||||||
|
|
||||||
/electron-to-chromium/1.4.247:
|
/electron-to-chromium/1.4.247:
|
||||||
resolution: {integrity: sha512-FLs6R4FQE+1JHM0hh3sfdxnYjKvJpHZyhQDjc2qFq/xFvmmRt/TATNToZhrcGUFzpF2XjeiuozrA8lI0PZmYYw==}
|
resolution: {integrity: sha512-FLs6R4FQE+1JHM0hh3sfdxnYjKvJpHZyhQDjc2qFq/xFvmmRt/TATNToZhrcGUFzpF2XjeiuozrA8lI0PZmYYw==}
|
||||||
dev: true
|
dev: true
|
||||||
@@ -3920,6 +3926,18 @@ packages:
|
|||||||
resolution: {integrity: sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw==}
|
resolution: {integrity: sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw==}
|
||||||
engines: {node: '>=8'}
|
engines: {node: '>=8'}
|
||||||
|
|
||||||
|
/pdfjs-dist/2.16.105:
|
||||||
|
resolution: {integrity: sha512-J4dn41spsAwUxCpEoVf6GVoz908IAA3mYiLmNxg8J9kfRXc2jxpbUepcP0ocp0alVNLFthTAM8DZ1RaHh8sU0A==}
|
||||||
|
peerDependencies:
|
||||||
|
worker-loader: ^3.0.8
|
||||||
|
peerDependenciesMeta:
|
||||||
|
worker-loader:
|
||||||
|
optional: true
|
||||||
|
dependencies:
|
||||||
|
dommatrix: 1.0.3
|
||||||
|
web-streams-polyfill: 3.2.1
|
||||||
|
dev: false
|
||||||
|
|
||||||
/picocolors/1.0.0:
|
/picocolors/1.0.0:
|
||||||
resolution: {integrity: sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ==}
|
resolution: {integrity: sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ==}
|
||||||
dev: true
|
dev: true
|
||||||
@@ -4559,6 +4577,11 @@ packages:
|
|||||||
makeerror: 1.0.12
|
makeerror: 1.0.12
|
||||||
dev: true
|
dev: true
|
||||||
|
|
||||||
|
/web-streams-polyfill/3.2.1:
|
||||||
|
resolution: {integrity: sha512-e0MO3wdXWKrLbL0DgGnUV7WHVuw9OUvL4hjgnPkIeEvESk74gAITi5G606JtZPp39cd8HA9VQzCIvA49LpPN5Q==}
|
||||||
|
engines: {node: '>= 8'}
|
||||||
|
dev: false
|
||||||
|
|
||||||
/webidl-conversions/5.0.0:
|
/webidl-conversions/5.0.0:
|
||||||
resolution: {integrity: sha512-VlZwKPCkYKxQgeSbH5EyngOmRp7Ww7I9rQLERETtf5ofd9pGeswWiOtogpEO850jziPRarreGxn5QIiTqpb2wA==}
|
resolution: {integrity: sha512-VlZwKPCkYKxQgeSbH5EyngOmRp7Ww7I9rQLERETtf5ofd9pGeswWiOtogpEO850jziPRarreGxn5QIiTqpb2wA==}
|
||||||
engines: {node: '>=8'}
|
engines: {node: '>=8'}
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
import {Notice, TAbstractFile, TFile} from 'obsidian'
|
import {Notice, TAbstractFile, TFile} from 'obsidian'
|
||||||
import {
|
import {
|
||||||
|
canIndexPDFs,
|
||||||
extractHeadingsFromCache,
|
extractHeadingsFromCache,
|
||||||
getAliasesFromMetadata,
|
getAliasesFromMetadata,
|
||||||
getTagsFromMetadata, isFileIndexable,
|
getTagsFromMetadata,
|
||||||
|
isFileIndexable,
|
||||||
isFilePlaintext,
|
isFilePlaintext,
|
||||||
removeDiacritics,
|
removeDiacritics,
|
||||||
wait,
|
wait,
|
||||||
@@ -178,3 +180,31 @@ export async function saveIndexToFile(): Promise<void> {
|
|||||||
isIndexChanged = false
|
isIndexChanged = false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function indexPDFs() {
|
||||||
|
if (canIndexPDFs()) {
|
||||||
|
const start = new Date().getTime()
|
||||||
|
const files = app.vault.getFiles().filter(f => f.path.endsWith('.pdf'))
|
||||||
|
if (files.length > 50) {
|
||||||
|
new Notice(`⚠️ Omnisearch is indexing ${files.length} PDFs. You can experience slowdowns while this work is in progress.`)
|
||||||
|
}
|
||||||
|
|
||||||
|
const promises: Promise<void>[] = []
|
||||||
|
for (const file of files) {
|
||||||
|
if (getNoteFromCache(file.path)) {
|
||||||
|
removeFromIndex(file.path)
|
||||||
|
}
|
||||||
|
promises.push(addToIndex(file))
|
||||||
|
}
|
||||||
|
await Promise.all(promises)
|
||||||
|
|
||||||
|
// Notice & log
|
||||||
|
const message = `Omnisearch - Indexed ${files.length} PDFs in ${
|
||||||
|
new Date().getTime() - start
|
||||||
|
}ms`
|
||||||
|
if (settings.showIndexingNotices) {
|
||||||
|
new Notice(message)
|
||||||
|
}
|
||||||
|
console.log(message)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,11 +1,11 @@
|
|||||||
import type { TFile } from 'obsidian'
|
import type { TFile } from 'obsidian'
|
||||||
import { loadPdfJs } from 'obsidian'
|
import PDFJs from 'pdfjs-dist'
|
||||||
|
import pdfjsWorker from 'pdfjs-dist/build/pdf.worker.entry'
|
||||||
|
|
||||||
let PDFJs: any = null
|
PDFJs.GlobalWorkerOptions.workerSrc = pdfjsWorker
|
||||||
|
|
||||||
// https://stackoverflow.com/a/59929946
|
// https://stackoverflow.com/a/59929946
|
||||||
export async function getPdfText(file: TFile): Promise<string> {
|
export async function getPdfText(file: TFile): Promise<string> {
|
||||||
PDFJs = PDFJs ?? (await loadPdfJs())
|
|
||||||
const data = await app.vault.readBinary(file)
|
const data = await app.vault.readBinary(file)
|
||||||
const doc = await PDFJs.getDocument(data).promise
|
const doc = await PDFJs.getDocument(data).promise
|
||||||
const pageTexts = Array.from({ length: doc.numPages }, async (v, i) => {
|
const pageTexts = Array.from({ length: doc.numPages }, async (v, i) => {
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ import {
|
|||||||
loadNotesCache,
|
loadNotesCache,
|
||||||
resetNotesCache,
|
resetNotesCache,
|
||||||
} from './notes'
|
} from './notes'
|
||||||
import { addToIndex, removeFromIndex, saveIndexToFile } from './notes-index'
|
import {addToIndex, indexPDFs, removeFromIndex, saveIndexToFile} from './notes-index'
|
||||||
|
|
||||||
export let minisearchInstance: MiniSearch<IndexedNote>
|
export let minisearchInstance: MiniSearch<IndexedNote>
|
||||||
|
|
||||||
@@ -134,38 +134,6 @@ export async function initGlobalSearchIndex(): Promise<void> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function indexPDFs() {
|
|
||||||
if (canIndexPDFs()) {
|
|
||||||
const start = new Date().getTime()
|
|
||||||
console.warn(
|
|
||||||
"Omnisearch - Warnings on 'pdf.worker.min' are due to some issues while reading PDFs file and can usually be ignored."
|
|
||||||
)
|
|
||||||
const files = app.vault.getFiles().filter(f => f.path.endsWith('.pdf'))
|
|
||||||
let promises: Promise<void>[] = []
|
|
||||||
for (const [i, file] of files.entries()) {
|
|
||||||
if (getNoteFromCache(file.path)) {
|
|
||||||
removeFromIndex(file.path)
|
|
||||||
}
|
|
||||||
promises.push(addToIndex(file))
|
|
||||||
if (i % 10 === 0) {
|
|
||||||
await wait(1)
|
|
||||||
await Promise.all(promises)
|
|
||||||
promises = []
|
|
||||||
}
|
|
||||||
}
|
|
||||||
await Promise.all(promises)
|
|
||||||
|
|
||||||
// Notice & log
|
|
||||||
const message = `Omnisearch - Indexed ${files.length} PDFs in ${
|
|
||||||
new Date().getTime() - start
|
|
||||||
}ms`
|
|
||||||
if (settings.showIndexingNotices) {
|
|
||||||
new Notice(message)
|
|
||||||
}
|
|
||||||
console.log(message)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Searches the index for the given query,
|
* Searches the index for the given query,
|
||||||
* and returns an array of raw results
|
* and returns an array of raw results
|
||||||
|
|||||||
@@ -106,23 +106,23 @@ export class SettingsTab extends PluginSettingTab {
|
|||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
// // Index PDFs
|
// Index PDFs
|
||||||
// const indexPDFsDesc = new DocumentFragment()
|
const indexPDFsDesc = new DocumentFragment()
|
||||||
// indexPDFsDesc.createSpan({}, span => {
|
indexPDFsDesc.createSpan({}, span => {
|
||||||
// span.innerHTML = `Omnisearch will index your PDFs, and return them in search results.
|
span.innerHTML = `Omnisearch will index your PDFs, and return them in search results.
|
||||||
// This feature is currently a work-in-progress, please report slowdowns or issues that you might experience.<br>
|
This feature is currently a work-in-progress, please report slowdowns or issues that you might experience.<br>
|
||||||
// PDFs being quite slow to index, <strong style="color: var(--text-accent)">it is strongly recommended to also enable "Store index in file"</strong>.<br>
|
PDFs being quite slow to index, <strong style="color: var(--text-accent)">it is strongly recommended to also enable "Store index in file"</strong>.<br>
|
||||||
// <strong>Needs a restart to fully take effect.</strong>`
|
<strong>Needs a restart to fully take effect.</strong>`
|
||||||
// })
|
})
|
||||||
// new Setting(containerEl)
|
new Setting(containerEl)
|
||||||
// .setName('BETA - Index PDFs')
|
.setName('BETA - Index PDFs')
|
||||||
// .setDesc(indexPDFsDesc)
|
.setDesc(indexPDFsDesc)
|
||||||
// .addToggle(toggle =>
|
.addToggle(toggle =>
|
||||||
// toggle.setValue(settings.indexPDFs).onChange(async v => {
|
toggle.setValue(settings.indexPDFs).onChange(async v => {
|
||||||
// settings.indexPDFs = v
|
settings.indexPDFs = v
|
||||||
// await saveSettings(this.plugin)
|
await saveSettings(this.plugin)
|
||||||
// })
|
})
|
||||||
// )
|
)
|
||||||
|
|
||||||
// Store index
|
// Store index
|
||||||
const serializedIndexDesc = new DocumentFragment()
|
const serializedIndexDesc = new DocumentFragment()
|
||||||
|
|||||||
@@ -173,7 +173,7 @@ export function getCtrlKeyLabel(): 'ctrl' | '⌘' {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export function canIndexPDFs(): boolean {
|
export function canIndexPDFs(): boolean {
|
||||||
return false
|
return settings.indexPDFs
|
||||||
}
|
}
|
||||||
|
|
||||||
export function isFileIndexable(path: string): boolean {
|
export function isFileIndexable(path: string): boolean {
|
||||||
|
|||||||
Reference in New Issue
Block a user