WIP document mapping
This commit is contained in:
@@ -1,9 +1,74 @@
|
|||||||
import { Notice } from 'obsidian'
|
import { Notice, TFile } from 'obsidian'
|
||||||
import type { DocumentRef, IndexedDocument } from './globals'
|
import type { DocumentRef, IndexedDocument } from './globals'
|
||||||
import { database } from './database'
|
import { database } from './database'
|
||||||
import type { AsPlainObject } from 'minisearch'
|
import type { AsPlainObject } from 'minisearch'
|
||||||
import type MiniSearch from 'minisearch'
|
import type MiniSearch from 'minisearch'
|
||||||
import { makeMD5 } from './tools/utils'
|
import {
|
||||||
|
extractHeadingsFromCache,
|
||||||
|
getAliasesFromMetadata,
|
||||||
|
getTagsFromMetadata,
|
||||||
|
isFileImage,
|
||||||
|
isFilePDF,
|
||||||
|
isFilePlaintext,
|
||||||
|
makeMD5,
|
||||||
|
removeDiacritics
|
||||||
|
} from './tools/utils'
|
||||||
|
import { getImageText, getPdfText } from "obsidian-text-extract";
|
||||||
|
|
||||||
|
async function getIndexedDocument(
|
||||||
|
path: string
|
||||||
|
): Promise<IndexedDocument> {
|
||||||
|
const file = app.vault.getFiles().find(f => f.path === path)
|
||||||
|
if (!file) throw new Error(`Invalid file path: "${path}"`)
|
||||||
|
let content: string
|
||||||
|
if (isFilePlaintext(path)) {
|
||||||
|
content = await app.vault.cachedRead(file)
|
||||||
|
} else if (isFilePDF(path)) {
|
||||||
|
content = await getPdfText(file)
|
||||||
|
} else if (isFileImage(file.path)) {
|
||||||
|
content = await getImageText(file)
|
||||||
|
} else {
|
||||||
|
throw new Error('Invalid file format: ' + file.path)
|
||||||
|
}
|
||||||
|
content = removeDiacritics(content)
|
||||||
|
const metadata = app.metadataCache.getFileCache(file)
|
||||||
|
|
||||||
|
// Look for links that lead to non-existing files,
|
||||||
|
// and add them to the index.
|
||||||
|
if (metadata) {
|
||||||
|
// // FIXME: https://github.com/scambier/obsidian-omnisearch/issues/129
|
||||||
|
// const nonExisting = getNonExistingNotes(file, metadata)
|
||||||
|
// for (const name of nonExisting.filter(
|
||||||
|
// o => !cacheManager.getLiveDocument(o)
|
||||||
|
// )) {
|
||||||
|
// NotesIndex.addNonExistingToIndex(name, file.path)
|
||||||
|
// }
|
||||||
|
|
||||||
|
// EXCALIDRAW
|
||||||
|
// Remove the json code
|
||||||
|
if (metadata.frontmatter?.['excalidraw-plugin']) {
|
||||||
|
const comments =
|
||||||
|
metadata.sections?.filter(s => s.type === 'comment') ?? []
|
||||||
|
for (const { start, end } of comments.map(c => c.position)) {
|
||||||
|
content =
|
||||||
|
content.substring(0, start.offset - 1) + content.substring(end.offset)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
basename: removeDiacritics(file.basename),
|
||||||
|
content,
|
||||||
|
path: file.path,
|
||||||
|
mtime: file.stat.mtime,
|
||||||
|
|
||||||
|
tags: getTagsFromMetadata(metadata),
|
||||||
|
aliases: getAliasesFromMetadata(metadata).join(''),
|
||||||
|
headings1: metadata ? extractHeadingsFromCache(metadata, 1).join(' ') : '',
|
||||||
|
headings2: metadata ? extractHeadingsFromCache(metadata, 2).join(' ') : '',
|
||||||
|
headings3: metadata ? extractHeadingsFromCache(metadata, 3).join(' ') : '',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
class CacheManager {
|
class CacheManager {
|
||||||
/**
|
/**
|
||||||
@@ -11,6 +76,22 @@ class CacheManager {
|
|||||||
*/
|
*/
|
||||||
private nextQueryIsEmpty = false
|
private nextQueryIsEmpty = false
|
||||||
|
|
||||||
|
private documents: Map<string, IndexedDocument> = new Map()
|
||||||
|
|
||||||
|
public async addToLiveCache(path: string): Promise<void> {
|
||||||
|
const doc = await getIndexedDocument(path)
|
||||||
|
this.documents.set(path, doc)
|
||||||
|
console.log(path)
|
||||||
|
}
|
||||||
|
|
||||||
|
public async getDocument(path: string): Promise<IndexedDocument> {
|
||||||
|
if (this.documents.has(path)) {
|
||||||
|
return this.documents.get(path)!
|
||||||
|
}
|
||||||
|
await this.addToLiveCache(path)
|
||||||
|
return this.documents.get(path)!
|
||||||
|
}
|
||||||
|
|
||||||
public async addToSearchHistory(query: string): Promise<void> {
|
public async addToSearchHistory(query: string): Promise<void> {
|
||||||
if (!query) {
|
if (!query) {
|
||||||
this.nextQueryIsEmpty = true
|
this.nextQueryIsEmpty = true
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import {
|
|||||||
import type { TFile } from 'obsidian'
|
import type { TFile } from 'obsidian'
|
||||||
import type { IndexedDocument } from './globals'
|
import type { IndexedDocument } from './globals'
|
||||||
import { getImageText, getPdfText } from 'obsidian-text-extract'
|
import { getImageText, getPdfText } from 'obsidian-text-extract'
|
||||||
|
import { cacheManager } from "./cache-manager";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return all plaintext files as IndexedDocuments
|
* Return all plaintext files as IndexedDocuments
|
||||||
@@ -18,7 +19,7 @@ export async function getPlainTextFiles(): Promise<IndexedDocument[]> {
|
|||||||
const allFiles = app.vault.getFiles().filter(f => isFilePlaintext(f.path))
|
const allFiles = app.vault.getFiles().filter(f => isFilePlaintext(f.path))
|
||||||
const data: IndexedDocument[] = []
|
const data: IndexedDocument[] = []
|
||||||
for (const file of allFiles) {
|
for (const file of allFiles) {
|
||||||
const doc = await getIndexedDocument(file.path)
|
const doc = await cacheManager.getDocument(file.path)
|
||||||
data.push(doc)
|
data.push(doc)
|
||||||
// await cacheManager.updateLiveDocument(file.path, doc)
|
// await cacheManager.updateLiveDocument(file.path, doc)
|
||||||
}
|
}
|
||||||
@@ -47,7 +48,7 @@ async function getBinaryFiles(files: TFile[]): Promise<IndexedDocument[]> {
|
|||||||
for (const file of files) {
|
for (const file of files) {
|
||||||
input.push(
|
input.push(
|
||||||
new Promise(async (resolve, _reject) => {
|
new Promise(async (resolve, _reject) => {
|
||||||
const doc = await getIndexedDocument(file.path)
|
const doc = await cacheManager.getDocument(file.path)
|
||||||
data.push(doc)
|
data.push(doc)
|
||||||
return resolve(null)
|
return resolve(null)
|
||||||
})
|
})
|
||||||
@@ -57,61 +58,6 @@ async function getBinaryFiles(files: TFile[]): Promise<IndexedDocument[]> {
|
|||||||
return data
|
return data
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function getIndexedDocument(
|
|
||||||
path: string
|
|
||||||
): Promise<IndexedDocument> {
|
|
||||||
const file = app.vault.getFiles().find(f => f.path === path)
|
|
||||||
if (!file) throw new Error(`Invalid file path: "${path}"`)
|
|
||||||
let content: string
|
|
||||||
if (isFilePlaintext(path)) {
|
|
||||||
content = await app.vault.cachedRead(file)
|
|
||||||
} else if (isFilePDF(path)) {
|
|
||||||
content = await getPdfText(file)
|
|
||||||
} else if (isFileImage(file.path)) {
|
|
||||||
content = await getImageText(file)
|
|
||||||
} else {
|
|
||||||
throw new Error('Invalid file format: ' + file.path)
|
|
||||||
}
|
|
||||||
content = removeDiacritics(content)
|
|
||||||
const metadata = app.metadataCache.getFileCache(file)
|
|
||||||
|
|
||||||
// Look for links that lead to non-existing files,
|
|
||||||
// and add them to the index.
|
|
||||||
if (metadata) {
|
|
||||||
// // FIXME: https://github.com/scambier/obsidian-omnisearch/issues/129
|
|
||||||
// const nonExisting = getNonExistingNotes(file, metadata)
|
|
||||||
// for (const name of nonExisting.filter(
|
|
||||||
// o => !cacheManager.getLiveDocument(o)
|
|
||||||
// )) {
|
|
||||||
// NotesIndex.addNonExistingToIndex(name, file.path)
|
|
||||||
// }
|
|
||||||
|
|
||||||
// EXCALIDRAW
|
|
||||||
// Remove the json code
|
|
||||||
if (metadata.frontmatter?.['excalidraw-plugin']) {
|
|
||||||
const comments =
|
|
||||||
metadata.sections?.filter(s => s.type === 'comment') ?? []
|
|
||||||
for (const { start, end } of comments.map(c => c.position)) {
|
|
||||||
content =
|
|
||||||
content.substring(0, start.offset - 1) + content.substring(end.offset)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
basename: removeDiacritics(file.basename),
|
|
||||||
content,
|
|
||||||
path: file.path,
|
|
||||||
mtime: file.stat.mtime,
|
|
||||||
|
|
||||||
tags: getTagsFromMetadata(metadata),
|
|
||||||
aliases: getAliasesFromMetadata(metadata).join(''),
|
|
||||||
headings1: metadata ? extractHeadingsFromCache(metadata, 1).join(' ') : '',
|
|
||||||
headings2: metadata ? extractHeadingsFromCache(metadata, 2).join(' ') : '',
|
|
||||||
headings3: metadata ? extractHeadingsFromCache(metadata, 3).join(' ') : '',
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Convert a file into an IndexedDocument.
|
* Convert a file into an IndexedDocument.
|
||||||
* Will use the cache if possible.
|
* Will use the cache if possible.
|
||||||
|
|||||||
17
src/main.ts
17
src/main.ts
@@ -16,6 +16,7 @@ import {
|
|||||||
import { OmnisearchCache } from './database'
|
import { OmnisearchCache } from './database'
|
||||||
import * as NotesIndex from './notes-index'
|
import * as NotesIndex from './notes-index'
|
||||||
import { searchEngine } from './search/omnisearch'
|
import { searchEngine } from './search/omnisearch'
|
||||||
|
import { cacheManager } from './cache-manager'
|
||||||
|
|
||||||
export default class OmnisearchPlugin extends Plugin {
|
export default class OmnisearchPlugin extends Plugin {
|
||||||
private ribbonButton?: HTMLElement
|
private ribbonButton?: HTMLElement
|
||||||
@@ -110,16 +111,20 @@ export default class OmnisearchPlugin extends Plugin {
|
|||||||
*/
|
*/
|
||||||
async function populateIndex(): Promise<void> {
|
async function populateIndex(): Promise<void> {
|
||||||
console.time('Omnisearch - Indexing total time')
|
console.time('Omnisearch - Indexing total time')
|
||||||
|
indexingStep.set(IndexingStepType.ReadingFiles)
|
||||||
|
const files = app.vault.getFiles().filter(f => isFileIndexable(f.path))
|
||||||
|
|
||||||
|
// Map documents in the background
|
||||||
|
files.forEach(f => cacheManager.addToLiveCache(f.path))
|
||||||
|
|
||||||
if (!Platform.isIosApp) {
|
if (!Platform.isIosApp) {
|
||||||
|
console.time('Omnisearch - Loading index from cache')
|
||||||
await searchEngine.loadCache()
|
await searchEngine.loadCache()
|
||||||
|
console.timeEnd('Omnisearch - Loading index from cache')
|
||||||
}
|
}
|
||||||
|
|
||||||
indexingStep.set(IndexingStepType.ReadingFiles)
|
|
||||||
const diff = searchEngine.getDiff(
|
const diff = searchEngine.getDiff(
|
||||||
app.vault
|
files.map(f => ({ path: f.path, mtime: f.stat.mtime }))
|
||||||
.getFiles()
|
|
||||||
.filter(f => isFileIndexable(f.path))
|
|
||||||
.map(f => ({ path: f.path, mtime: f.stat.mtime }))
|
|
||||||
)
|
)
|
||||||
|
|
||||||
console.log(
|
console.log(
|
||||||
@@ -137,7 +142,7 @@ async function populateIndex(): Promise<void> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
indexingStep.set(IndexingStepType.IndexingFiles)
|
indexingStep.set(IndexingStepType.IndexingFiles)
|
||||||
await searchEngine.removeFromPaths(diff.toRemove.map(o => o.path))
|
searchEngine.removeFromPaths(diff.toRemove.map(o => o.path))
|
||||||
await searchEngine.addFromPaths(
|
await searchEngine.addFromPaths(
|
||||||
diff.toAdd.map(o => o.path),
|
diff.toAdd.map(o => o.path),
|
||||||
true
|
true
|
||||||
|
|||||||
@@ -23,7 +23,6 @@ import {
|
|||||||
stripMarkdownCharacters,
|
stripMarkdownCharacters,
|
||||||
} from '../tools/utils'
|
} from '../tools/utils'
|
||||||
import { Notice, Platform } from 'obsidian'
|
import { Notice, Platform } from 'obsidian'
|
||||||
import { getIndexedDocument } from '../file-loader'
|
|
||||||
import type { Query } from './query'
|
import type { Query } from './query'
|
||||||
import { cacheManager } from '../cache-manager'
|
import { cacheManager } from '../cache-manager'
|
||||||
|
|
||||||
@@ -112,7 +111,7 @@ export class Omnisearch {
|
|||||||
writeToCache: boolean
|
writeToCache: boolean
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
let documents = await Promise.all(
|
let documents = await Promise.all(
|
||||||
paths.map(async path => await getIndexedDocument(path))
|
paths.map(async path => await cacheManager.getDocument(path))
|
||||||
)
|
)
|
||||||
|
|
||||||
// If a document is already added, discard it
|
// If a document is already added, discard it
|
||||||
@@ -185,7 +184,7 @@ export class Omnisearch {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const documents = await Promise.all(
|
const documents = await Promise.all(
|
||||||
results.map(async result => await getIndexedDocument(result.id))
|
results.map(async result => await cacheManager.getDocument(result.id))
|
||||||
)
|
)
|
||||||
|
|
||||||
// If the search query contains quotes, filter out results that don't have the exact match
|
// If the search query contains quotes, filter out results that don't have the exact match
|
||||||
@@ -292,7 +291,7 @@ export class Omnisearch {
|
|||||||
|
|
||||||
// TODO: this already called in search(), pass each document in its SearchResult instead?
|
// TODO: this already called in search(), pass each document in its SearchResult instead?
|
||||||
const documents = await Promise.all(
|
const documents = await Promise.all(
|
||||||
results.map(async result => await getIndexedDocument(result.id))
|
results.map(async result => await cacheManager.getDocument(result.id))
|
||||||
)
|
)
|
||||||
|
|
||||||
// Map the raw results to get usable suggestions
|
// Map the raw results to get usable suggestions
|
||||||
|
|||||||
Reference in New Issue
Block a user