#58 - Experimental PDF support
This commit is contained in:
@@ -37,6 +37,12 @@
|
|||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@vanakat/plugin-api": "^0.1.0",
|
"@vanakat/plugin-api": "^0.1.0",
|
||||||
"minisearch": "^5.0.0"
|
"minisearch": "^5.0.0",
|
||||||
|
"pdfjs-dist": "^2.16.105"
|
||||||
|
},
|
||||||
|
"pnpm": {
|
||||||
|
"overrides": {
|
||||||
|
"moment@>=2.18.0 <2.29.4": ">=2.29.4"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
42
pnpm-lock.yaml
generated
42
pnpm-lock.yaml
generated
@@ -1,5 +1,8 @@
|
|||||||
lockfileVersion: 5.3
|
lockfileVersion: 5.3
|
||||||
|
|
||||||
|
overrides:
|
||||||
|
moment@>=2.18.0 <2.29.4: '>=2.29.4'
|
||||||
|
|
||||||
specifiers:
|
specifiers:
|
||||||
'@babel/preset-env': ^7.19.0
|
'@babel/preset-env': ^7.19.0
|
||||||
'@babel/preset-typescript': ^7.18.6
|
'@babel/preset-typescript': ^7.18.6
|
||||||
@@ -12,10 +15,13 @@ specifiers:
|
|||||||
builtin-modules: ^3.3.0
|
builtin-modules: ^3.3.0
|
||||||
esbuild: 0.13.12
|
esbuild: 0.13.12
|
||||||
esbuild-plugin-copy: ^1.3.0
|
esbuild-plugin-copy: ^1.3.0
|
||||||
|
esbuild-plugin-wasm: ^1.0.0
|
||||||
esbuild-svelte: ^0.7.1
|
esbuild-svelte: ^0.7.1
|
||||||
|
extract-pdf: C:\Dev\rust\extract-pdf\pkg
|
||||||
jest: ^27.5.1
|
jest: ^27.5.1
|
||||||
minisearch: ^5.0.0
|
minisearch: ^5.0.0
|
||||||
obsidian: latest
|
obsidian: latest
|
||||||
|
pdfjs-dist: ^2.16.105
|
||||||
prettier: ^2.7.1
|
prettier: ^2.7.1
|
||||||
prettier-plugin-svelte: ^2.7.0
|
prettier-plugin-svelte: ^2.7.0
|
||||||
svelte: ^3.50.1
|
svelte: ^3.50.1
|
||||||
@@ -26,7 +32,9 @@ specifiers:
|
|||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
'@vanakat/plugin-api': 0.1.0
|
'@vanakat/plugin-api': 0.1.0
|
||||||
|
extract-pdf: link:../../rust/extract-pdf/pkg
|
||||||
minisearch: 5.0.0
|
minisearch: 5.0.0
|
||||||
|
pdfjs-dist: 2.16.105
|
||||||
|
|
||||||
devDependencies:
|
devDependencies:
|
||||||
'@babel/preset-env': 7.19.0
|
'@babel/preset-env': 7.19.0
|
||||||
@@ -39,6 +47,7 @@ devDependencies:
|
|||||||
builtin-modules: 3.3.0
|
builtin-modules: 3.3.0
|
||||||
esbuild: 0.13.12
|
esbuild: 0.13.12
|
||||||
esbuild-plugin-copy: 1.3.0_esbuild@0.13.12
|
esbuild-plugin-copy: 1.3.0_esbuild@0.13.12
|
||||||
|
esbuild-plugin-wasm: 1.0.0
|
||||||
esbuild-svelte: 0.7.1_esbuild@0.13.12+svelte@3.50.1
|
esbuild-svelte: 0.7.1_esbuild@0.13.12+svelte@3.50.1
|
||||||
jest: 27.5.1
|
jest: 27.5.1
|
||||||
obsidian: 0.16.3
|
obsidian: 0.16.3
|
||||||
@@ -2433,6 +2442,10 @@ packages:
|
|||||||
webidl-conversions: 5.0.0
|
webidl-conversions: 5.0.0
|
||||||
dev: true
|
dev: true
|
||||||
|
|
||||||
|
/dommatrix/1.0.3:
|
||||||
|
resolution: {integrity: sha512-l32Xp/TLgWb8ReqbVJAFIvXmY7go4nTxxlWiAFyhoQw9RKEOHBZNnyGvJWqDVSPmq3Y9HlM4npqF/T6VMOXhww==}
|
||||||
|
dev: false
|
||||||
|
|
||||||
/electron-to-chromium/1.4.247:
|
/electron-to-chromium/1.4.247:
|
||||||
resolution: {integrity: sha512-FLs6R4FQE+1JHM0hh3sfdxnYjKvJpHZyhQDjc2qFq/xFvmmRt/TATNToZhrcGUFzpF2XjeiuozrA8lI0PZmYYw==}
|
resolution: {integrity: sha512-FLs6R4FQE+1JHM0hh3sfdxnYjKvJpHZyhQDjc2qFq/xFvmmRt/TATNToZhrcGUFzpF2XjeiuozrA8lI0PZmYYw==}
|
||||||
dev: true
|
dev: true
|
||||||
@@ -2571,6 +2584,11 @@ packages:
|
|||||||
globby: 11.1.0
|
globby: 11.1.0
|
||||||
dev: true
|
dev: true
|
||||||
|
|
||||||
|
/esbuild-plugin-wasm/1.0.0:
|
||||||
|
resolution: {integrity: sha512-iXIf3hwfqorExG66/eNr3U8JakIZuge70nMNQtinvxbzdljQ/RjvwaBiGPqF/DvuIumUApbe3zj2kqHLVyc7uQ==}
|
||||||
|
engines: {node: '>=0.10.0'}
|
||||||
|
dev: true
|
||||||
|
|
||||||
/esbuild-sunos-64/0.13.12:
|
/esbuild-sunos-64/0.13.12:
|
||||||
resolution: {integrity: sha512-jBsF+e0woK3miKI8ufGWKG3o3rY9DpHvCVRn5eburMIIE+2c+y3IZ1srsthKyKI6kkXLvV4Cf/E7w56kLipMXw==}
|
resolution: {integrity: sha512-jBsF+e0woK3miKI8ufGWKG3o3rY9DpHvCVRn5eburMIIE+2c+y3IZ1srsthKyKI6kkXLvV4Cf/E7w56kLipMXw==}
|
||||||
cpu: [x64]
|
cpu: [x64]
|
||||||
@@ -3768,13 +3786,8 @@ packages:
|
|||||||
minimist: 1.2.6
|
minimist: 1.2.6
|
||||||
dev: true
|
dev: true
|
||||||
|
|
||||||
/moment/2.29.2:
|
|
||||||
resolution: {integrity: sha512-UgzG4rvxYpN15jgCmVJwac49h9ly9NurikMWGPdVxm8GZD6XjkKPxDTjQQ43gtGgnV3X0cAyWDdP2Wexoquifg==}
|
|
||||||
dev: false
|
|
||||||
|
|
||||||
/moment/2.29.4:
|
/moment/2.29.4:
|
||||||
resolution: {integrity: sha512-5LC9SOxjSc2HF6vO2CyuTDNivEdoz2IvyJJGj6X8DJ0eFyfszE0QiEd+iXmBvUP3WHxSjFH/vIsA0EN00cgr8w==}
|
resolution: {integrity: sha512-5LC9SOxjSc2HF6vO2CyuTDNivEdoz2IvyJJGj6X8DJ0eFyfszE0QiEd+iXmBvUP3WHxSjFH/vIsA0EN00cgr8w==}
|
||||||
dev: true
|
|
||||||
|
|
||||||
/ms/2.1.2:
|
/ms/2.1.2:
|
||||||
resolution: {integrity: sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==}
|
resolution: {integrity: sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==}
|
||||||
@@ -3828,7 +3841,7 @@ packages:
|
|||||||
'@codemirror/state': 0.19.9
|
'@codemirror/state': 0.19.9
|
||||||
'@codemirror/view': 0.19.48
|
'@codemirror/view': 0.19.48
|
||||||
'@types/codemirror': 0.0.108
|
'@types/codemirror': 0.0.108
|
||||||
moment: 2.29.2
|
moment: 2.29.4
|
||||||
dev: false
|
dev: false
|
||||||
|
|
||||||
/obsidian/0.16.3:
|
/obsidian/0.16.3:
|
||||||
@@ -3922,6 +3935,18 @@ packages:
|
|||||||
resolution: {integrity: sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw==}
|
resolution: {integrity: sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw==}
|
||||||
engines: {node: '>=8'}
|
engines: {node: '>=8'}
|
||||||
|
|
||||||
|
/pdfjs-dist/2.16.105:
|
||||||
|
resolution: {integrity: sha512-J4dn41spsAwUxCpEoVf6GVoz908IAA3mYiLmNxg8J9kfRXc2jxpbUepcP0ocp0alVNLFthTAM8DZ1RaHh8sU0A==}
|
||||||
|
peerDependencies:
|
||||||
|
worker-loader: ^3.0.8
|
||||||
|
peerDependenciesMeta:
|
||||||
|
worker-loader:
|
||||||
|
optional: true
|
||||||
|
dependencies:
|
||||||
|
dommatrix: 1.0.3
|
||||||
|
web-streams-polyfill: 3.2.1
|
||||||
|
dev: false
|
||||||
|
|
||||||
/picocolors/1.0.0:
|
/picocolors/1.0.0:
|
||||||
resolution: {integrity: sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ==}
|
resolution: {integrity: sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ==}
|
||||||
dev: true
|
dev: true
|
||||||
@@ -4561,6 +4586,11 @@ packages:
|
|||||||
makeerror: 1.0.12
|
makeerror: 1.0.12
|
||||||
dev: true
|
dev: true
|
||||||
|
|
||||||
|
/web-streams-polyfill/3.2.1:
|
||||||
|
resolution: {integrity: sha512-e0MO3wdXWKrLbL0DgGnUV7WHVuw9OUvL4hjgnPkIeEvESk74gAITi5G606JtZPp39cd8HA9VQzCIvA49LpPN5Q==}
|
||||||
|
engines: {node: '>= 8'}
|
||||||
|
dev: false
|
||||||
|
|
||||||
/webidl-conversions/5.0.0:
|
/webidl-conversions/5.0.0:
|
||||||
resolution: {integrity: sha512-VlZwKPCkYKxQgeSbH5EyngOmRp7Ww7I9rQLERETtf5ofd9pGeswWiOtogpEO850jziPRarreGxn5QIiTqpb2wA==}
|
resolution: {integrity: sha512-VlZwKPCkYKxQgeSbH5EyngOmRp7Ww7I9rQLERETtf5ofd9pGeswWiOtogpEO850jziPRarreGxn5QIiTqpb2wA==}
|
||||||
engines: {node: '>=8'}
|
engines: {node: '>=8'}
|
||||||
|
|||||||
@@ -27,7 +27,7 @@
|
|||||||
|
|
||||||
let groupedOffsets: number[] = []
|
let groupedOffsets: number[] = []
|
||||||
let selectedIndex = 0
|
let selectedIndex = 0
|
||||||
let note: ResultNote | null = null
|
let note: ResultNote | undefined
|
||||||
let query: Query
|
let query: Query
|
||||||
|
|
||||||
onMount(() => {
|
onMount(() => {
|
||||||
|
|||||||
18
src/pdf-parser.ts
Normal file
18
src/pdf-parser.ts
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
import PDFJs from 'pdfjs-dist'
|
||||||
|
import pdfjsWorker from 'pdfjs-dist/build/pdf.worker.entry'
|
||||||
|
import type { TextItem } from 'pdfjs-dist/types/src/display/api'
|
||||||
|
import type { TFile } from 'obsidian'
|
||||||
|
|
||||||
|
PDFJs.GlobalWorkerOptions.workerSrc = pdfjsWorker
|
||||||
|
|
||||||
|
// https://stackoverflow.com/a/59929946
|
||||||
|
export async function getPdfText(file: TFile): Promise<string> {
|
||||||
|
const data = await app.vault.readBinary(file)
|
||||||
|
const doc = await PDFJs.getDocument(data).promise
|
||||||
|
const pageTexts = Array.from({ length: doc.numPages }, async (v, i) => {
|
||||||
|
const page = await doc.getPage(i + 1)
|
||||||
|
const content = await page.getTextContent()
|
||||||
|
return (content.items as TextItem[]).map(token => token.str).join('')
|
||||||
|
})
|
||||||
|
return (await Promise.all(pageTexts)).join('')
|
||||||
|
}
|
||||||
@@ -19,7 +19,7 @@ import {
|
|||||||
wait,
|
wait,
|
||||||
} from './utils'
|
} from './utils'
|
||||||
import type { Query } from './query'
|
import type { Query } from './query'
|
||||||
import { settings } from './settings'
|
import { settings } from './settings'
|
||||||
import {
|
import {
|
||||||
removeNoteFromCache,
|
removeNoteFromCache,
|
||||||
getNoteFromCache,
|
getNoteFromCache,
|
||||||
@@ -32,6 +32,7 @@ import {
|
|||||||
saveNotesCacheToFile,
|
saveNotesCacheToFile,
|
||||||
isCacheOutdated,
|
isCacheOutdated,
|
||||||
} from './notes'
|
} from './notes'
|
||||||
|
import { getPdfText } from './pdf-parser'
|
||||||
|
|
||||||
let minisearchInstance: MiniSearch<IndexedNote>
|
let minisearchInstance: MiniSearch<IndexedNote>
|
||||||
let isIndexChanged: boolean
|
let isIndexChanged: boolean
|
||||||
@@ -326,8 +327,13 @@ export async function addToIndex(file: TAbstractFile): Promise<void> {
|
|||||||
throw new Error(`${file.basename} is already indexed`)
|
throw new Error(`${file.basename} is already indexed`)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fetch content from the cache to index it as-is
|
let content
|
||||||
const content = removeDiacritics(await app.vault.cachedRead(file))
|
if (file.path.endsWith('.pdf')) {
|
||||||
|
content = removeDiacritics(await getPdfText(file as TFile))
|
||||||
|
} else {
|
||||||
|
// Fetch content from the cache to index it as-is
|
||||||
|
content = removeDiacritics(await app.vault.cachedRead(file))
|
||||||
|
}
|
||||||
|
|
||||||
// Make the document and index it
|
// Make the document and index it
|
||||||
const note: IndexedNote = {
|
const note: IndexedNote = {
|
||||||
|
|||||||
24
src/types-obsidian.d.ts
vendored
Normal file
24
src/types-obsidian.d.ts
vendored
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
import type { MetadataCache, ViewState, Vault } from 'obsidian'
|
||||||
|
|
||||||
|
declare module 'obsidian' {
|
||||||
|
interface MetadataCache {
|
||||||
|
isUserIgnored?(path: string): boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
interface FrontMatterCache {
|
||||||
|
aliases?: string[] | string
|
||||||
|
tags?: string[] | string
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ViewState {
|
||||||
|
state?: {
|
||||||
|
file?: string
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
interface Vault {
|
||||||
|
getConfig(string): unknown
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
23
src/types.d.ts
vendored
23
src/types.d.ts
vendored
@@ -1,22 +1 @@
|
|||||||
import type { MetadataCache, ViewState, Vault } from 'obsidian'
|
declare module 'pdfjs-dist/build/pdf.worker.entry';
|
||||||
|
|
||||||
declare module 'obsidian' {
|
|
||||||
interface MetadataCache {
|
|
||||||
isUserIgnored?(path: string): boolean
|
|
||||||
}
|
|
||||||
|
|
||||||
interface FrontMatterCache {
|
|
||||||
aliases?: string[] | string
|
|
||||||
tags?: string[] | string
|
|
||||||
}
|
|
||||||
|
|
||||||
interface ViewState {
|
|
||||||
state?: {
|
|
||||||
file?: string
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
interface Vault {
|
|
||||||
getConfig(string): unknown
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -174,7 +174,7 @@ export function getCtrlKeyLabel(): 'ctrl' | '⌘' {
|
|||||||
|
|
||||||
export function isFileIndexable(path: string): boolean {
|
export function isFileIndexable(path: string): boolean {
|
||||||
return (
|
return (
|
||||||
path.endsWith('.md') ||
|
path.endsWith('.md') || path.endsWith('.pdf') ||
|
||||||
settings.indexedFileTypes.some(t => path.endsWith(`.${t}`))
|
settings.indexedFileTypes.some(t => path.endsWith(`.${t}`))
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user