feat(#245): in results list, show where an image/pdf is embedded

* Add an index of embeds for reference in results

* Notes embedding images are now shown in results

* Updated dependencies

* Correctly referencing all embeds

* Updated docs

* Basic embedded feature ok
This commit is contained in:
Simon Cambier
2024-09-25 20:47:27 +02:00
committed by GitHub
parent 437b7e2b9a
commit 1b442d1f24
14 changed files with 280 additions and 101 deletions

View File

@@ -1,4 +1,8 @@
import MiniSearch, { type Options, type SearchResult } from 'minisearch'
import MiniSearch, {
type AsPlainObject,
type Options,
type SearchResult,
} from 'minisearch'
import type { DocumentRef, IndexedDocument, ResultNote } from '../globals'
import { chunkArray, logDebug, removeDiacritics } from '../tools/utils'
@@ -13,6 +17,7 @@ export class SearchEngine {
private minisearch: MiniSearch
/** Map<path, mtime> */
private indexedDocuments: Map<string, number> = new Map()
// private previousResults: SearchResult[] = []
// private previousQuery: Query | null = null
@@ -25,6 +30,7 @@ export class SearchEngine {
* Return true if the cache is valid
*/
async loadCache(): Promise<boolean> {
await this.plugin.embedsRepository.loadFromCache()
const cache = await this.plugin.database.getMinisearchCache()
if (cache) {
this.minisearch = await MiniSearch.loadJSAsync(
@@ -39,10 +45,11 @@ export class SearchEngine {
}
/**
* Returns the list of documents that need to be reindexed
* Returns the list of documents that need to be reindexed or removed,
* either because they are new, have been modified, or have been deleted
* @param docs
*/
getDiff(docs: DocumentRef[]): {
getDocumentsToReindex(docs: DocumentRef[]): {
toAdd: DocumentRef[]
toRemove: DocumentRef[]
} {
@@ -264,9 +271,9 @@ export class SearchEngine {
}
}
// Boost custom properties
const metadata = this.plugin.app.metadataCache.getCache(path)
if (metadata) {
// Boost custom properties
for (const { name, weight } of settings.weightCustomProperties) {
const values = metadata?.frontmatter?.[name]
if (values && result.terms.some(t => values.includes(t))) {
@@ -372,6 +379,28 @@ export class SearchEngine {
)
)
// Inject embeds for images, documents, and PDFs
for (let i = 0; i < documents.length; i++) {
const doc = documents[i]
const embeds = this.plugin.embedsRepository
.getEmbeds(doc.path)
// Limit to 5 embeds
.slice(0, 5)
for (const embed of embeds) {
// Inject the embed in the content after index i
documents[++i] = await this.plugin.cacheManager.getDocument(embed)
results[i] = {
id: documents[i].path,
score: 0,
terms: [],
queryTerms: [],
match: {},
isEmbed: true,
}
// console.log(documents[i])
}
}
// Map the raw results to get usable suggestions
const resultNotes = results.map(result => {
logDebug('Locating matches for', result.id)
@@ -412,6 +441,7 @@ export class SearchEngine {
score: result.score,
foundWords,
matches,
isEmbed: result.isEmbed,
...note,
}
return resultNote
@@ -419,11 +449,21 @@ export class SearchEngine {
return resultNotes
}
public async writeToCache(): Promise<void> {
await this.plugin.database.writeMinisearchCache(
this.minisearch,
this.indexedDocuments
)
/**
* For cache saving
*/
public getSerializedMiniSearch(): AsPlainObject {
return this.minisearch.toJSON()
}
/**
* For cache saving
*/
public getSerializedIndexedDocuments(): { path: string; mtime: number }[] {
return Array.from(this.indexedDocuments).map(([path, mtime]) => ({
path,
mtime,
}))
}
private getOptions(): Options<IndexedDocument> {