Merge branch 'master' into feature/ocr

# Conflicts: # src/database.ts # src/file-loader.ts # src/main.ts # src/pdf/pdf-manager.ts
2022-11-05 18:16:16 +01:00
parent 1d02c8617a 087ec5cc99
commit 74db2844a9
12 changed files with 310 additions and 196 deletions
--- a/src/file-loader.ts
+++ b/src/file-loader.ts
@@ -3,7 +3,6 @@ import {
  extractHeadingsFromCache,
  getAliasesFromMetadata,
  getTagsFromMetadata,
-  isFileImage,
  isFilePlaintext,
  removeDiacritics,
 } from './tools/utils'
@@ -11,6 +10,7 @@ import * as NotesIndex from './notes-index'
 import type { TFile } from 'obsidian'
 import type { IndexedDocument } from './globals'
 import { getNonExistingNotes } from './tools/notes'
+import { database } from './database'
 import { getImageText, getPdfText } from 'obsidian-text-extract'

 /**
@@ -22,7 +22,7 @@ export async function getPlainTextFiles(): Promise<IndexedDocument[]> {
  for (const file of allFiles) {
    const doc = await fileToIndexedDocument(file)
    data.push(doc)
-    await cacheManager.updateDocument(file.path, doc)
+    await cacheManager.updateLiveDocument(file.path, doc)
  }
  return data
 }
@@ -32,44 +32,19 @@ export async function getPlainTextFiles(): Promise<IndexedDocument[]> {
 * If a PDF isn't cached, it will be read from the disk and added to the IndexedDB
 */
 export async function getPDFFiles(): Promise<IndexedDocument[]> {
-  const allFiles = app.vault.getFiles().filter(f => f.path.endsWith('.pdf'))
-  const data: IndexedDocument[] = []
+  const fromDisk = app.vault.getFiles().filter(f => f.path.endsWith('.pdf'))
+  const fromDb = await database.pdf.toArray()

+  const data: IndexedDocument[] = []
  const input = []
-  for (const file of allFiles) {
+  for (const file of fromDisk) {
    input.push(
      NotesIndex.processQueue(async () => {
-        const doc = await fileToIndexedDocument(file)
-        await cacheManager.updateDocument(file.path, doc)
-        data.push(doc)
-      })
-    )
-  }
-  await Promise.all(input)
-  return data
-}
-
-/**
- * Return all Image files as IndexedDocuments.
- * If a PDF isn't cached, it will be read from the disk and added to the IndexedDB
- */
-export async function getImageFiles(): Promise<IndexedDocument[]> {
-  const allFiles = app.vault
-    .getFiles()
-    .filter(
-      f =>
-        f.path.endsWith('.png') ||
-        f.path.endsWith('.jpg') ||
-        f.path.endsWith('.jpeg')
-    )
-  const data: IndexedDocument[] = []
-
-  const input = []
-  for (const file of allFiles) {
-    input.push(
-      NotesIndex.processQueue(async () => {
-        const doc = await fileToIndexedDocument(file)
-        await cacheManager.updateDocument(file.path, doc)
+        const doc = await fileToIndexedDocument(
+          file,
+          fromDb.find(o => o.path === file.path)?.text
+        )
+        await cacheManager.updateLiveDocument(file.path, doc)
        data.push(doc)
      })
    )
@@ -82,41 +57,45 @@ export async function getImageFiles(): Promise<IndexedDocument[]> {
 * Convert a file into an IndexedDocument.
 * Will use the cache if possible.
 * @param file
+ * @param content If we give a text content, will skip the fetching part
 */
 export async function fileToIndexedDocument(
-  file: TFile
+  file: TFile,
+  content?: string
 ): Promise<IndexedDocument> {
-  let content: string
-  if (isFilePlaintext(file.path)) {
-    content = removeDiacritics(await app.vault.cachedRead(file))
-  } else if (file.path.endsWith('.pdf')) {
-    content = removeDiacritics(await getPdfText(file))
-  } else if (isFileImage(file.path)) {
-    content = removeDiacritics(await getImageText(file))
-  } else {
-    throw new Error('Invalid file: ' + file.path)
+  if (!content) {
+    if (isFilePlaintext(file.path)) {
+      content = await app.vault.cachedRead(file)
+    } else if (file.path.endsWith('.pdf')) {
+      content = await getPdfText(file)
+    } else {
+      throw new Error('Invalid file: ' + file.path)
+    }
  }

  content = removeDiacritics(content)
  const metadata = app.metadataCache.getFileCache(file)

-  // EXCALIDRAW
-  // Remove the json code
-  if (metadata?.frontmatter?.['excalidraw-plugin']) {
-    const comments = metadata.sections?.filter(s => s.type === 'comment') ?? []
-    for (const { start, end } of comments.map(c => c.position)) {
-      content =
-        content.substring(0, start.offset - 1) + content.substring(end.offset)
-    }
-  }
-
  // Look for links that lead to non-existing files,
  // and add them to the index.
  if (metadata) {
    const nonExisting = getNonExistingNotes(file, metadata)
-    for (const name of nonExisting.filter(o => !cacheManager.getDocument(o))) {
+    for (const name of nonExisting.filter(
+      o => !cacheManager.getLiveDocument(o)
+    )) {
      NotesIndex.addNonExistingToIndex(name, file.path)
    }
+
+    // EXCALIDRAW
+    // Remove the json code
+    if (metadata.frontmatter?.['excalidraw-plugin']) {
+      const comments =
+        metadata.sections?.filter(s => s.type === 'comment') ?? []
+      for (const { start, end } of comments.map(c => c.position)) {
+        content =
+          content.substring(0, start.offset - 1) + content.substring(end.offset)
+      }
+    }
  }

  return {