#58 - Experimental PDF support
This commit is contained in:
@@ -27,7 +27,7 @@
|
||||
|
||||
let groupedOffsets: number[] = []
|
||||
let selectedIndex = 0
|
||||
let note: ResultNote | null = null
|
||||
let note: ResultNote | undefined
|
||||
let query: Query
|
||||
|
||||
onMount(() => {
|
||||
|
||||
18
src/pdf-parser.ts
Normal file
18
src/pdf-parser.ts
Normal file
@@ -0,0 +1,18 @@
|
||||
import PDFJs from 'pdfjs-dist'
|
||||
import pdfjsWorker from 'pdfjs-dist/build/pdf.worker.entry'
|
||||
import type { TextItem } from 'pdfjs-dist/types/src/display/api'
|
||||
import type { TFile } from 'obsidian'
|
||||
|
||||
PDFJs.GlobalWorkerOptions.workerSrc = pdfjsWorker
|
||||
|
||||
// https://stackoverflow.com/a/59929946
|
||||
export async function getPdfText(file: TFile): Promise<string> {
|
||||
const data = await app.vault.readBinary(file)
|
||||
const doc = await PDFJs.getDocument(data).promise
|
||||
const pageTexts = Array.from({ length: doc.numPages }, async (v, i) => {
|
||||
const page = await doc.getPage(i + 1)
|
||||
const content = await page.getTextContent()
|
||||
return (content.items as TextItem[]).map(token => token.str).join('')
|
||||
})
|
||||
return (await Promise.all(pageTexts)).join('')
|
||||
}
|
||||
@@ -19,7 +19,7 @@ import {
|
||||
wait,
|
||||
} from './utils'
|
||||
import type { Query } from './query'
|
||||
import { settings } from './settings'
|
||||
import { settings } from './settings'
|
||||
import {
|
||||
removeNoteFromCache,
|
||||
getNoteFromCache,
|
||||
@@ -32,6 +32,7 @@ import {
|
||||
saveNotesCacheToFile,
|
||||
isCacheOutdated,
|
||||
} from './notes'
|
||||
import { getPdfText } from './pdf-parser'
|
||||
|
||||
let minisearchInstance: MiniSearch<IndexedNote>
|
||||
let isIndexChanged: boolean
|
||||
@@ -326,8 +327,13 @@ export async function addToIndex(file: TAbstractFile): Promise<void> {
|
||||
throw new Error(`${file.basename} is already indexed`)
|
||||
}
|
||||
|
||||
// Fetch content from the cache to index it as-is
|
||||
const content = removeDiacritics(await app.vault.cachedRead(file))
|
||||
let content
|
||||
if (file.path.endsWith('.pdf')) {
|
||||
content = removeDiacritics(await getPdfText(file as TFile))
|
||||
} else {
|
||||
// Fetch content from the cache to index it as-is
|
||||
content = removeDiacritics(await app.vault.cachedRead(file))
|
||||
}
|
||||
|
||||
// Make the document and index it
|
||||
const note: IndexedNote = {
|
||||
|
||||
24
src/types-obsidian.d.ts
vendored
Normal file
24
src/types-obsidian.d.ts
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
import type { MetadataCache, ViewState, Vault } from 'obsidian'
|
||||
|
||||
declare module 'obsidian' {
|
||||
interface MetadataCache {
|
||||
isUserIgnored?(path: string): boolean
|
||||
}
|
||||
|
||||
interface FrontMatterCache {
|
||||
aliases?: string[] | string
|
||||
tags?: string[] | string
|
||||
}
|
||||
|
||||
interface ViewState {
|
||||
state?: {
|
||||
file?: string
|
||||
}
|
||||
}
|
||||
|
||||
interface Vault {
|
||||
getConfig(string): unknown
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
23
src/types.d.ts
vendored
23
src/types.d.ts
vendored
@@ -1,22 +1 @@
|
||||
import type { MetadataCache, ViewState, Vault } from 'obsidian'
|
||||
|
||||
declare module 'obsidian' {
|
||||
interface MetadataCache {
|
||||
isUserIgnored?(path: string): boolean
|
||||
}
|
||||
|
||||
interface FrontMatterCache {
|
||||
aliases?: string[] | string
|
||||
tags?: string[] | string
|
||||
}
|
||||
|
||||
interface ViewState {
|
||||
state?: {
|
||||
file?: string
|
||||
}
|
||||
}
|
||||
|
||||
interface Vault {
|
||||
getConfig(string): unknown
|
||||
}
|
||||
}
|
||||
declare module 'pdfjs-dist/build/pdf.worker.entry';
|
||||
@@ -174,7 +174,7 @@ export function getCtrlKeyLabel(): 'ctrl' | '⌘' {
|
||||
|
||||
export function isFileIndexable(path: string): boolean {
|
||||
return (
|
||||
path.endsWith('.md') ||
|
||||
path.endsWith('.md') || path.endsWith('.pdf') ||
|
||||
settings.indexedFileTypes.some(t => path.endsWith(`.${t}`))
|
||||
)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user