PDF extracting looks ok

This commit is contained in:
Simon Cambier
2022-10-12 20:58:56 +02:00
parent 416ccab125
commit 64e47f13f3
3 changed files with 13 additions and 10 deletions

View File

@@ -2,16 +2,14 @@ use js_sys::Uint8Array;
use pdf_extract::extract_text_from_mem;
use wasm_bindgen::prelude::*;
mod obsidian;
// mod obsidian;
#[wasm_bindgen]
pub fn extract_pdf_text(arr: Uint8Array) -> String {
// FIXME: return a Result<> here, to throw in JS in case of an error
let txt = match extract_text_from_mem(&arr.to_vec()) {
Ok(txt) => txt,
Err(e) => e.to_string(),
pub fn extract_pdf_text(arr: Uint8Array) -> Result<String, JsError> {
match extract_text_from_mem(&arr.to_vec()) {
Ok(txt) => return Ok(txt),
Err(e) => return Err(JsError::new(&e.to_string())),
};
txt
}
// #[wasm_bindgen]

View File

@@ -32,7 +32,7 @@ class PDFManager {
const worker = new PDFWorker({ name: 'PDF Text Extractor' })
return new Promise(async (resolve, reject) => {
// @ts-ignore
worker.postMessage({ data })
worker.postMessage({ data, name: file.basename })
worker.onmessage = (evt: any) => {
const txt = evt.data.text
this.updatePDFCache(hash, txt)

View File

@@ -6,8 +6,13 @@ const decodedPlugin = decodeBase64(rustPlugin as any)
onmessage = async evt => {
const buffer = Uint8Array.from(decodedPlugin, c => c.charCodeAt(0))
await plugin.default(Promise.resolve(buffer))
const text = plugin.extract_pdf_text(evt.data.data as Uint8Array)
self.postMessage({ text })
try {
const text = plugin.extract_pdf_text(evt.data.data as Uint8Array)
self.postMessage({ text })
} catch (e) {
console.warn('Omnisearch - Could not extract text from ' + evt.data.name)
self.postMessage({ text: '' })
}
}
function decodeBase64(data: string) {