#157 - Basic canvas support

This commit is contained in:
Simon Cambier
2023-01-06 12:58:46 +01:00
parent d1b12f9142
commit 7881120abd
2 changed files with 44 additions and 12 deletions

View File

@@ -1,16 +1,15 @@
import { Notice } from 'obsidian'
import {
getTextExtractor,
type DocumentRef,
getTextExtractor,
type IndexedDocument,
} from './globals'
import { database } from './database'
import type { AsPlainObject } from 'minisearch'
import type MiniSearch from 'minisearch'
import {
extractHeadingsFromCache,
getAliasesFromMetadata,
getTagsFromMetadata,
isFileCanvas,
isFileImage,
isFilePDF,
isFilePlaintext,
@@ -18,18 +17,50 @@ import {
removeDiacritics,
} from './tools/utils'
import { getImageText, getPdfText } from 'obsidian-text-extract'
import type { CanvasData } from 'obsidian/canvas'
import type { AsPlainObject } from 'minisearch'
import type MiniSearch from 'minisearch'
async function getIndexedDocument(path: string): Promise<IndexedDocument> {
/**
* This function is responsible for extracting the text from a file and
* returning it as an `IndexedDocument` object.
* @param path
*/
async function getAndMapIndexedDocument(
path: string
): Promise<IndexedDocument> {
const file = app.vault.getFiles().find(f => f.path === path)
if (!file) throw new Error(`Invalid file path: "${path}"`)
let content: string | null = null
const extractor = getTextExtractor()
// Plain text
// ** Plain text **
// Just read the file content
if (isFilePlaintext(path)) {
content = await app.vault.cachedRead(file)
}
// Image or PDF, with the text-extractor plugin
// ** Canvas **
// Extract the text fields from the json
else if (isFileCanvas(path)) {
const canvas = JSON.parse(await app.vault.cachedRead(file)) as CanvasData
let texts: string[] = []
// Concatenate text from the canvas fields
for (const node of canvas.nodes) {
if (node.type === 'text') {
texts.push(node.text)
} else if (node.type === 'file') {
texts.push(node.file)
}
}
for (const edge of canvas.edges.filter(e => !!e.label)) {
texts.push(edge.label!)
}
content = texts.join('\r\n')
}
// a) ** Image or PDF ** with Text Extractor
else if (extractor) {
if (extractor.canFileBeExtracted(path)) {
content = await extractor.extractText(file)
@@ -37,7 +68,7 @@ async function getIndexedDocument(path: string): Promise<IndexedDocument> {
throw new Error('Invalid file format: ' + file.path)
}
}
// Image or PDF, without the text-extractor plugin
// b) ** Image or PDF ** without the text-extractor plugin
else {
if (isFilePDF(path)) {
content = await getPdfText(file)
@@ -106,7 +137,7 @@ class CacheManager {
public async addToLiveCache(path: string): Promise<void> {
try {
const doc = await getIndexedDocument(path)
const doc = await getAndMapIndexedDocument(path)
this.documents.set(path, doc)
} catch (e) {
console.warn('Omnisearch: Error while adding to live cache', e)

View File

@@ -7,9 +7,9 @@ import {
} from 'obsidian'
import type { SearchMatch } from '../globals'
import {
getChsSegmenter,
excerptAfter,
excerptBefore,
getChsSegmenter,
highlightClass,
isSearchMatch,
regexLineSplit,
@@ -215,6 +215,7 @@ export function getCtrlKeyLabel(): 'ctrl' | '⌘' {
export function isFileIndexable(path: string): boolean {
return (
isFilePlaintext(path) ||
isFileCanvas(path) ||
(!Platform.isMobileApp && settings.PDFIndexing && isFilePDF(path)) ||
(!Platform.isMobileApp && settings.imagesIndexing && isFileImage(path))
)
@@ -231,11 +232,11 @@ export function isFilePDF(path: string): boolean {
}
export function isFilePlaintext(path: string): boolean {
return getPlaintextExtensions().some(t => path.endsWith(`.${t}`))
return [...settings.indexedFileTypes, 'md'].some(t => path.endsWith(`.${t}`))
}
export function getPlaintextExtensions(): string[] {
return [...settings.indexedFileTypes, 'md']
export function isFileCanvas(path: string): boolean {
return path.endsWith('.canvas')
}
export function getExtension(path: string): string {