feat(#245): in results list, show where an image/pdf is embedded

* Add an index of embeds for reference in results

* Notes embedding images are now shown in results

* Updated dependencies

* Correctly referencing all embeds

* Updated docs

* Basic embedded feature ok
This commit is contained in:
Simon Cambier
2024-09-25 20:47:27 +02:00
committed by GitHub
parent 437b7e2b9a
commit 1b442d1f24
14 changed files with 280 additions and 101 deletions

View File

@@ -22,7 +22,6 @@ export class CacheManager {
* Show an empty input field next time the user opens Omnisearch modal
*/
private nextQueryIsEmpty = false
/**
* The "live cache", containing all indexed vault files
* in the form of IndexedDocuments
@@ -45,6 +44,7 @@ export class CacheManager {
return
}
this.documents.set(path, doc)
this.plugin.embedsRepository.refreshEmbeds(path)
} catch (e) {
console.warn(`Omnisearch: Error while adding "${path}" to live cache`, e)
// Shouldn't be needed, but...
@@ -163,16 +163,22 @@ export class CacheManager {
else if (
isFileImage(path) &&
((this.plugin.settings.imagesIndexing &&
extractor?.canFileBeExtracted(path)) ||
(this.plugin.settings.aiImageIndexing &&
aiImageAnalyzer?.canBeAnalyzed(file)))
extractor?.canFileBeExtracted(path)) ||
(this.plugin.settings.aiImageIndexing &&
aiImageAnalyzer?.canBeAnalyzed(file)))
) {
if (this.plugin.settings.imagesIndexing && extractor?.canFileBeExtracted(path)){
if (
this.plugin.settings.imagesIndexing &&
extractor?.canFileBeExtracted(path)
) {
content = await extractor.extractText(file)
}
if (this.plugin.settings.aiImageIndexing && aiImageAnalyzer?.canBeAnalyzed(file)) {
content = await aiImageAnalyzer.analyzeImage(file) + (content ?? '')
if (
this.plugin.settings.aiImageIndexing &&
aiImageAnalyzer?.canBeAnalyzed(file)
) {
content = (await aiImageAnalyzer.analyzeImage(file)) + (content ?? '')
}
}
// ** PDF **
@@ -230,7 +236,8 @@ export class CacheManager {
}
}
}
const displayTitle = metadata?.frontmatter?.[this.plugin.settings.displayTitle] ?? ''
const displayTitle =
metadata?.frontmatter?.[this.plugin.settings.displayTitle] ?? ''
const tags = getTagsFromMetadata(metadata)
return {
basename: file.basename,

View File

@@ -4,11 +4,12 @@
export let id: string
export let selected = false
export let glyph = false
export let cssClass = ''
</script>
<div
data-result-id={id}
class="suggestion-item omnisearch-result"
class="suggestion-item omnisearch-result {cssClass}"
class:is-selected={selected}
on:mousemove
on:click

View File

@@ -3,7 +3,8 @@
import type { ResultNote } from '../globals'
import {
getExtension,
isFileCanvas, isFileExcalidraw,
isFileCanvas,
isFileExcalidraw,
isFileImage,
isFilePDF,
pathWithoutFilename,
@@ -11,6 +12,7 @@
import ResultItemContainer from './ResultItemContainer.svelte'
import { TFile, setIcon } from 'obsidian'
import type OmnisearchPlugin from '../main'
import { SvelteComponent } from 'svelte'
export let selected = false
export let note: ResultNote
@@ -21,6 +23,7 @@
let notePath = ''
let elFolderPathIcon: HTMLElement
let elFilePathIcon: HTMLElement
let elEmbedIcon: HTMLElement
$: {
imagePath = null
@@ -32,8 +35,14 @@
}
}
$: matchesTitle = plugin.textProcessor.getMatches(title, note.foundWords)
$: matchesNotePath = plugin.textProcessor.getMatches(notePath, note.foundWords)
$: cleanedContent = plugin.textProcessor.makeExcerpt(note.content, note.matches[0]?.offset ?? -1)
$: matchesNotePath = plugin.textProcessor.getMatches(
notePath,
note.foundWords
)
$: cleanedContent = plugin.textProcessor.makeExcerpt(
note.content,
note.matches[0]?.offset ?? -1
)
$: glyph = false //cacheManager.getLiveDocument(note.path)?.doesNotExist
$: {
title = note.displayTitle || note.basename
@@ -46,23 +55,24 @@
if (elFilePathIcon) {
if (isFileImage(note.path)) {
setIcon(elFilePathIcon, 'image')
}
else if (isFilePDF(note.path)) {
} else if (isFilePDF(note.path)) {
setIcon(elFilePathIcon, 'file-text')
}
else if (isFileCanvas(note.path) || isFileExcalidraw(note.path)) {
} else if (isFileCanvas(note.path) || isFileExcalidraw(note.path)) {
setIcon(elFilePathIcon, 'layout-dashboard')
}
else {
} else {
setIcon(elFilePathIcon, 'file')
}
}
if (elEmbedIcon) {
setIcon(elEmbedIcon, 'corner-down-right')
}
}
</script>
<ResultItemContainer
glyph="{glyph}"
id="{note.path}"
cssClass=" {note.isEmbed ? 'omnisearch-result__embed' : ''}"
on:auxclick
on:click
on:mousemove
@@ -70,8 +80,14 @@
<div>
<div class="omnisearch-result__title-container">
<span class="omnisearch-result__title">
<span bind:this="{elFilePathIcon}"></span>
<span>{@html plugin.textProcessor.highlightText(title, matchesTitle)}</span>
{#if note.isEmbed}
<span bind:this="{elEmbedIcon}" title="The document above is embedded in this note"></span>
{:else}
<span bind:this="{elFilePathIcon}"></span>
{/if}
<span>
{@html plugin.textProcessor.highlightText(title, matchesTitle)}
</span>
<span class="omnisearch-result__extension">
.{getExtension(note.path)}
</span>
@@ -91,23 +107,33 @@
{#if notePath}
<div class="omnisearch-result__folder-path">
<span bind:this="{elFolderPathIcon}"></span>
<span>{@html plugin.textProcessor.highlightText(notePath, matchesNotePath)}</span>
<span>
{@html plugin.textProcessor.highlightText(
notePath,
matchesNotePath
)}</span>
</div>
{/if}
<div style="display: flex; flex-direction: row;">
{#if $showExcerpt}
<div class="omnisearch-result__body">
{@html plugin.textProcessor.highlightText(cleanedContent, note.matches)}
</div>
{/if}
<!-- Do not display the excerpt for embedding references -->
{#if !note.isEmbed}
<div style="display: flex; flex-direction: row;">
{#if $showExcerpt}
<div class="omnisearch-result__body">
{@html plugin.textProcessor.highlightText(
cleanedContent,
note.matches
)}
</div>
{/if}
<!-- Image -->
{#if imagePath}
<div class="omnisearch-result__image-container">
<img style="width: 100px" src="{imagePath}" alt="" />
</div>
{/if}
</div>
<!-- Image -->
{#if imagePath}
<div class="omnisearch-result__image-container">
<img style="width: 100px" src="{imagePath}" alt="" />
</div>
{/if}
</div>
{/if}
</div>
</ResultItemContainer>

View File

@@ -1,12 +1,11 @@
import Dexie from 'dexie'
import type MiniSearch from 'minisearch'
import type { AsPlainObject } from 'minisearch'
import type { DocumentRef } from './globals'
import { Notice } from 'obsidian'
import type OmnisearchPlugin from './main'
export class Database extends Dexie {
public static readonly dbVersion = 8
public static readonly dbVersion = 9
searchHistory!: Dexie.Table<{ id?: number; query: string }, number>
minisearch!: Dexie.Table<
{
@@ -16,6 +15,7 @@ export class Database extends Dexie {
},
string
>
embeds!: Dexie.Table<{ embedded: string; references: string[] }, string>
constructor(private plugin: OmnisearchPlugin) {
super(Database.getDbName(plugin.app.appId))
@@ -23,6 +23,7 @@ export class Database extends Dexie {
this.version(Database.dbVersion).stores({
searchHistory: '++id',
minisearch: 'date',
embeds: 'embedded',
})
}
@@ -49,17 +50,15 @@ export class Database extends Dexie {
}
}
public async writeMinisearchCache(
minisearch: MiniSearch,
indexed: Map<string, number>
): Promise<void> {
const paths = Array.from(indexed).map(([k, v]) => ({ path: k, mtime: v }))
public async writeMinisearchCache(): Promise<void> {
const minisearchJson = this.plugin.searchEngine.getSerializedMiniSearch()
const paths = this.plugin.searchEngine.getSerializedIndexedDocuments()
const database = this.plugin.database
await database.minisearch.clear()
await database.minisearch.add({
date: new Date().toISOString(),
paths,
data: minisearch.toJSON(),
data: minisearchJson,
})
console.log('Omnisearch - Search cache written')
}
@@ -85,7 +84,8 @@ export class Database extends Dexie {
}
public async clearCache() {
new Notice('Omnisearch - Cache cleared. Please restart Obsidian.')
await this.minisearch.clear()
await this.embeds.clear()
new Notice('Omnisearch - Cache cleared. Please restart Obsidian.')
}
}

View File

@@ -81,6 +81,7 @@ export type ResultNote = {
content: string
foundWords: string[]
matches: SearchMatch[]
isEmbed: boolean
}
let inComposition = false

View File

@@ -28,6 +28,7 @@ import { CacheManager } from './cache-manager'
import { logDebug } from './tools/utils'
import { NotesIndexer } from './notes-indexer'
import { TextProcessor } from './tools/text-processing'
import { EmbedsRepository } from './repositories/embeds-repository'
export default class OmnisearchPlugin extends Plugin {
// FIXME: fix the type
@@ -42,6 +43,8 @@ export default class OmnisearchPlugin extends Plugin {
public readonly textProcessor = new TextProcessor(this)
public readonly searchEngine = new SearchEngine(this)
public readonly embedsRepository = new EmbedsRepository(this)
private ribbonButton?: HTMLElement
private refreshIndexCallback?: () => void
@@ -109,6 +112,7 @@ export default class OmnisearchPlugin extends Plugin {
if (this.notesIndexer.isFileIndexable(file.path)) {
logDebug('Indexing new file', file.path)
searchEngine.addFromPaths([file.path])
this.embedsRepository.refreshEmbeds(file.path)
}
})
)
@@ -117,6 +121,7 @@ export default class OmnisearchPlugin extends Plugin {
logDebug('Removing file', file.path)
this.cacheManager.removeFromLiveCache(file.path)
searchEngine.removeFromPaths([file.path])
this.embedsRepository.refreshEmbeds(file.path)
})
)
this.registerEvent(
@@ -124,6 +129,7 @@ export default class OmnisearchPlugin extends Plugin {
if (this.notesIndexer.isFileIndexable(file.path)) {
this.notesIndexer.flagNoteForReindex(file)
}
this.embedsRepository.refreshEmbeds(file.path)
})
)
this.registerEvent(
@@ -240,7 +246,7 @@ export default class OmnisearchPlugin extends Plugin {
}
}
const diff = searchEngine.getDiff(
const diff = searchEngine.getDocumentsToReindex(
files.map(f => ({ path: f.path, mtime: f.stat.mtime }))
)
@@ -281,7 +287,8 @@ export default class OmnisearchPlugin extends Plugin {
}
// Write the cache
await searchEngine.writeToCache()
await this.database.writeMinisearchCache()
await this.embedsRepository.writeToCache()
// Re-enable settings.caching
if (cacheEnabled) {

View File

@@ -0,0 +1,77 @@
import { getLinkpath } from 'obsidian'
import type OmnisearchPlugin from '../main'
import { logDebug } from '../tools/utils'
export class EmbedsRepository {
/** Map<image or pdf, notes where embedded> */
private embeds: Map<string, Set<string>> = new Map()
constructor(private plugin: OmnisearchPlugin) {}
public addEmbed(embed: string, notePath: string): void {
if (!this.embeds.has(embed)) {
this.embeds.set(embed, new Set())
}
this.embeds.get(embed)!.add(notePath)
}
public refreshEmbeds(notePath: string): void {
this.embeds.forEach((value, key) => {
if (value.has(notePath)) {
value.delete(notePath)
}
})
this.addEmbeds(notePath)
}
public getEmbeds(pathEmbedded: string): string[] {
const embeds = this.embeds.has(pathEmbedded)
? [...this.embeds.get(pathEmbedded)!]
: []
return embeds
}
public async writeToCache(): Promise<void> {
logDebug('Writing embeds to cache')
const database = this.plugin.database
const data: { embedded: string; references: string[] }[] = []
for (const [path, embedsList] of this.embeds) {
data.push({ embedded: path, references: [...embedsList] })
}
await database.embeds.clear()
await database.embeds.bulkAdd(data)
}
public async loadFromCache(): Promise<void> {
const database = this.plugin.database
if (!database.embeds) {
logDebug('No embeds in cache')
return
}
logDebug('Loading embeds from cache')
const embedsArr = await database.embeds.toArray()
for (const { embedded: path, references: embeds } of embedsArr) {
for (const embed of embeds) {
this.addEmbed(path, embed)
}
}
}
private addEmbeds(notePath: string): void {
// Get all embeds from the note
// and map them to TFiles to get the real path
const embeds = (
this.plugin.app.metadataCache.getCache(notePath)?.embeds ?? []
)
.map(embed =>
this.plugin.app.metadataCache.getFirstLinkpathDest(
getLinkpath(embed.link),
notePath
)
)
.filter(o => !!o)
for (const embed of embeds) {
this.addEmbed(embed.path, notePath)
}
}
}

View File

@@ -1,4 +1,8 @@
import MiniSearch, { type Options, type SearchResult } from 'minisearch'
import MiniSearch, {
type AsPlainObject,
type Options,
type SearchResult,
} from 'minisearch'
import type { DocumentRef, IndexedDocument, ResultNote } from '../globals'
import { chunkArray, logDebug, removeDiacritics } from '../tools/utils'
@@ -13,6 +17,7 @@ export class SearchEngine {
private minisearch: MiniSearch
/** Map<path, mtime> */
private indexedDocuments: Map<string, number> = new Map()
// private previousResults: SearchResult[] = []
// private previousQuery: Query | null = null
@@ -25,6 +30,7 @@ export class SearchEngine {
* Return true if the cache is valid
*/
async loadCache(): Promise<boolean> {
await this.plugin.embedsRepository.loadFromCache()
const cache = await this.plugin.database.getMinisearchCache()
if (cache) {
this.minisearch = await MiniSearch.loadJSAsync(
@@ -39,10 +45,11 @@ export class SearchEngine {
}
/**
* Returns the list of documents that need to be reindexed
* Returns the list of documents that need to be reindexed or removed,
* either because they are new, have been modified, or have been deleted
* @param docs
*/
getDiff(docs: DocumentRef[]): {
getDocumentsToReindex(docs: DocumentRef[]): {
toAdd: DocumentRef[]
toRemove: DocumentRef[]
} {
@@ -264,9 +271,9 @@ export class SearchEngine {
}
}
// Boost custom properties
const metadata = this.plugin.app.metadataCache.getCache(path)
if (metadata) {
// Boost custom properties
for (const { name, weight } of settings.weightCustomProperties) {
const values = metadata?.frontmatter?.[name]
if (values && result.terms.some(t => values.includes(t))) {
@@ -372,6 +379,28 @@ export class SearchEngine {
)
)
// Inject embeds for images, documents, and PDFs
for (let i = 0; i < documents.length; i++) {
const doc = documents[i]
const embeds = this.plugin.embedsRepository
.getEmbeds(doc.path)
// Limit to 5 embeds
.slice(0, 5)
for (const embed of embeds) {
// Inject the embed in the content after index i
documents[++i] = await this.plugin.cacheManager.getDocument(embed)
results[i] = {
id: documents[i].path,
score: 0,
terms: [],
queryTerms: [],
match: {},
isEmbed: true,
}
// console.log(documents[i])
}
}
// Map the raw results to get usable suggestions
const resultNotes = results.map(result => {
logDebug('Locating matches for', result.id)
@@ -412,6 +441,7 @@ export class SearchEngine {
score: result.score,
foundWords,
matches,
isEmbed: result.isEmbed,
...note,
}
return resultNote
@@ -419,11 +449,21 @@ export class SearchEngine {
return resultNotes
}
public async writeToCache(): Promise<void> {
await this.plugin.database.writeMinisearchCache(
this.minisearch,
this.indexedDocuments
)
/**
* For cache saving
*/
public getSerializedMiniSearch(): AsPlainObject {
return this.minisearch.toJSON()
}
/**
* For cache saving
*/
public getSerializedIndexedDocuments(): { path: string; mtime: number }[] {
return Array.from(this.indexedDocuments).map(([path, mtime]) => ({
path,
mtime,
}))
}
private getOptions(): Options<IndexedDocument> {

View File

@@ -151,7 +151,7 @@ export function getCtrlKeyLabel(): 'ctrl' | '⌘' {
export function isFileImage(path: string): boolean {
const ext = getExtension(path)
return ext === 'png' || ext === 'jpg' || ext === 'jpeg' || ext === 'webp'
return ext === 'png' || ext === 'jpg' || ext === 'jpeg' || ext === 'webp' || ext === 'gif'
}
export function isFilePDF(path: string): boolean {