Add support for opening PDFs at the correct page for their results. (#507)

This relies on the `#Page N^page=N` form coming back from Text Extractor per https://github.com/scambier/obsidian-text-extractor/issues/75 This commit is an attempt to implement https://github.com/scambier/obsidian-omnisearch/issues/100
2025-12-26 09:24:03 +00:00
parent 5638846b18
commit cc23f87f08
1 changed files with 56 additions and 1 deletions
--- a/src/tools/notes.ts
+++ b/src/tools/notes.ts
@@ -1,6 +1,30 @@
 import { type App, type CachedMetadata, MarkdownView, TFile } from 'obsidian'
 import type { ResultNote } from '../globals'

+/**
+ * Extracts the PDF page number from content based on the offset, looking for page markers in format: `^# Page N^page=N$`
+ */
+function getPdfPageFromOffset(content: string, offset: number): number | null {
+  // return early if the inputs do not look valid (e.g. if text extractor does not support Page markers)
+  if (!content.includes('# Page ') || offset > content.length) return null
+
+  // We're looking for the last # Page marker, set search space to all text prior to our match
+  const textBeforeOffset = content.substring(0, offset)
+
+  // Iterate through all # Page heading matches, collecting the last one
+  const regex = /^# Page ([0-9]+)\^page=\1$/gm
+  let lastMatch: RegExpExecArray | null = null
+  let match: RegExpExecArray | null
+  while (
+    (match = regex.exec(textBeforeOffset)) !== null
+  ) {
+    lastMatch = match
+  }
+
+  // If we found a match, that's the page the result appears on
+  return lastMatch ? parseInt(lastMatch[1], 10) : null
+}
+
 export async function openNote(
  app: App,
  item: ResultNote,
@@ -8,6 +32,25 @@ export async function openNote(
  newPane = false,
  newLeaf = false
 ): Promise<void> {
+  // We don't have a way to switch pages on a PDF view, so we must open a new pane for PDF results to trigger page navigation
+  // We should only trigger this behaviour if we know the page number for the result
+  // This code runs before the normal implementation because we don't want to trigger activation of an existing pane for this PDF and then open a new one on top
+  const isPdf = item.path.toLowerCase().endsWith('.pdf')
+  if (isPdf) {
+    const pdfPage = isPdf ? getPdfPageFromOffset(item.content, offset) : null
+    if (pdfPage !== null) {
+      // Obsidian also supports &selection= but this takes page content id references
+      const linkPath = `${item.path}#page=${pdfPage}`
+
+      await app.workspace.openLinkText(
+        linkPath,
+        '',
+        newLeaf ? 'split' : newPane
+      )
+      return
+    }
+  }
+
  // Check if the note is already open,
  // to avoid opening it twice if the first one is pinned
  let alreadyOpenAndPinned = false
@@ -25,7 +68,19 @@ export async function openNote(
  })

  if (!alreadyOpenAndPinned) {
-    // Open the note normally
+    // For PDFs, extract page number and append to path
+    // TODO if we knew the view type for PDF could we reuse an existing view?
+    let linkPath = item.path
+
+    if (isPdf && offset > 0) {
+      // If this PDF extract has page headings, use them
+      const pageNum = getPdfPageFromOffset(item.content, offset)
+
+      if (pageNum !== null) {
+        linkPath = `${item.path}#page=${pageNum}`
+      }
+    }
+
    await app.workspace.openLinkText(item.path, '', newLeaf ? 'split' : newPane)
  }