Using PDFjs directly from Obsidian

This commit is contained in:
Simon Cambier
2022-09-30 20:49:49 +02:00
parent 30b77fa4d8
commit 3f9d3c4f80
3 changed files with 6 additions and 28 deletions

View File

@@ -1,12 +1,14 @@
import PDFJs from 'pdfjs-dist'
import pdfjsWorker from 'pdfjs-dist/build/pdf.worker.entry'
// import PDFJs from 'pdfjs-dist'
// import pdfjsWorker from 'pdfjs-dist/build/pdf.worker.entry'
import type { TextItem } from 'pdfjs-dist/types/src/display/api'
import type { TFile } from 'obsidian'
import {loadPdfJs} from "obsidian";
PDFJs.GlobalWorkerOptions.workerSrc = pdfjsWorker
let PDFJs: any = null
// https://stackoverflow.com/a/59929946
export async function getPdfText(file: TFile): Promise<string> {
PDFJs = PDFJs ?? await loadPdfJs()
const data = await app.vault.readBinary(file)
const doc = await PDFJs.getDocument(data).promise
const pageTexts = Array.from({ length: doc.numPages }, async (v, i) => {