Refactored code to split search and index

This commit is contained in:
Simon Cambier
2022-09-30 21:33:44 +02:00
parent 342e36aa08
commit d47309f576
6 changed files with 197 additions and 195 deletions

View File

@@ -5,13 +5,14 @@
import ModalContainer from './ModalContainer.svelte'
import { eventBus, type ResultNote } from 'src/globals'
import { createNote, openNote } from 'src/notes'
import { getSuggestions, reindexNotes } from 'src/search'
import { getSuggestions } from 'src/search'
import { getCtrlKeyLabel, getExtension, loopIndex } from 'src/utils'
import { OmnisearchInFileModal, type OmnisearchVaultModal } from 'src/modals'
import ResultItemVault from './ResultItemVault.svelte'
import { Query } from 'src/query'
import { saveSearchHistory, searchHistory } from 'src/search-history'
import { settings } from '../settings'
import { reindexNotes } from '../notes-index'
export let modal: OmnisearchVaultModal
let selectedIndex = 0

View File

@@ -1,10 +1,5 @@
import { Plugin, TFile } from 'obsidian'
import {
addNoteToReindex,
addToIndex,
initGlobalSearchIndex,
removeFromIndex,
} from './search'
import { initGlobalSearchIndex } from './search'
import { OmnisearchInFileModal, OmnisearchVaultModal } from './modals'
import { loadSettings, settings, SettingsTab, showContext } from './settings'
import { eventBus } from './globals'
@@ -12,6 +7,7 @@ import { registerAPI } from '@vanakat/plugin-api'
import api from './api'
import { loadSearchHistory } from './search-history'
import { isFileIndexable } from './utils'
import { addNoteToReindex, addToIndex, removeFromIndex } from './notes-index'
function _registerAPI(plugin: OmnisearchPlugin): void {
registerAPI('omnisearch', api, plugin as any)

180
src/notes-index.ts Normal file
View File

@@ -0,0 +1,180 @@
import { Notice, TAbstractFile, TFile } from 'obsidian'
import {
extractHeadingsFromCache,
getAliasesFromMetadata,
getTagsFromMetadata,
isFileIndexable,
removeDiacritics,
wait,
} from './utils'
import {
addNoteToCache,
getNonExistingNotes,
getNonExistingNotesFromCache,
getNoteFromCache,
removeAnchors,
removeNoteFromCache,
saveNotesCacheToFile,
} from './notes'
import { getPdfText } from './pdf-parser'
import type { IndexedNote } from './globals'
import { searchIndexFilePath } from './globals'
import { settings } from './settings'
import { minisearchInstance } from './search'
let isIndexChanged: boolean
/**
* Adds a file to the index
* @param file
* @returns
*/
export async function addToIndex(file: TAbstractFile): Promise<void> {
if (!(file instanceof TFile) || !isFileIndexable(file.path)) {
return
}
// Check if the file was already indexed as non-existent,
// and if so, remove it from the index (before adding it again)
if (getNoteFromCache(file.path)?.doesNotExist) {
removeFromIndex(file.path)
}
try {
// console.log(`Omnisearch - adding ${file.path} to index`)
// Look for links that lead to non-existing files,
// and index them as well
const metadata = app.metadataCache.getFileCache(file)
if (metadata) {
const nonExisting = getNonExistingNotes(file, metadata)
for (const name of nonExisting.filter(o => !getNoteFromCache(o))) {
addNonExistingToIndex(name, file.path)
}
}
if (getNoteFromCache(file.path)) {
throw new Error(`${file.basename} is already indexed`)
}
let content
if (file.path.endsWith('.pdf')) {
content = removeDiacritics(await getPdfText(file as TFile))
} else {
// Fetch content from the cache to index it as-is
content = removeDiacritics(await app.vault.cachedRead(file))
}
// Make the document and index it
const note: IndexedNote = {
basename: removeDiacritics(file.basename),
content,
path: file.path,
mtime: file.stat.mtime,
tags: getTagsFromMetadata(metadata),
aliases: getAliasesFromMetadata(metadata).join(''),
headings1: metadata
? extractHeadingsFromCache(metadata, 1).join(' ')
: '',
headings2: metadata
? extractHeadingsFromCache(metadata, 2).join(' ')
: '',
headings3: metadata
? extractHeadingsFromCache(metadata, 3).join(' ')
: '',
}
minisearchInstance.add(note)
isIndexChanged = true
addNoteToCache(note.path, note)
} catch (e) {
console.trace('Error while indexing ' + file.basename)
console.error(e)
}
}
/**
* Index a non-existing note.
* Useful to find internal links that lead (yet) to nowhere
* @param name
* @param parent The note referencing the
*/
export function addNonExistingToIndex(name: string, parent: string): void {
name = removeAnchors(name)
const filename = name + (name.endsWith('.md') ? '' : '.md')
if (getNoteFromCache(filename)) return
const note = {
path: filename,
basename: name,
mtime: 0,
content: '',
aliases: '',
headings1: '',
headings2: '',
headings3: '',
doesNotExist: true,
parent,
} as IndexedNote
minisearchInstance.add(note)
isIndexChanged = true
addNoteToCache(filename, note)
}
/**
* Removes a file from the index, by its path
* @param path
*/
export function removeFromIndex(path: string): void {
if (!isFileIndexable(path)) {
console.info(`"${path}" is not an indexable file`)
return
}
const note = getNoteFromCache(path)
if (note) {
minisearchInstance.remove(note)
isIndexChanged = true
removeNoteFromCache(path)
getNonExistingNotesFromCache()
.filter(n => n.parent === path)
.forEach(n => {
removeFromIndex(n.path)
})
} else {
console.warn(`not not found under path ${path}`)
}
}
const notesToReindex = new Set<TAbstractFile>()
export function addNoteToReindex(note: TAbstractFile): void {
notesToReindex.add(note)
}
export async function reindexNotes(): Promise<void> {
if (settings.showIndexingNotices && notesToReindex.size > 0) {
new Notice(`Omnisearch - Reindexing ${notesToReindex.size} notes`, 2000)
}
for (const note of notesToReindex) {
removeFromIndex(note.path)
await addToIndex(note)
await wait(0)
}
notesToReindex.clear()
await saveIndexToFile()
}
export async function saveIndexToFile(): Promise<void> {
if (settings.storeIndexInFile && minisearchInstance && isIndexChanged) {
const json = JSON.stringify(minisearchInstance)
await app.vault.adapter.write(searchIndexFilePath, json)
console.log('Omnisearch - Index saved on disk')
await saveNotesCacheToFile()
isIndexChanged = false
}
}

View File

@@ -1,15 +1,10 @@
import { type CachedMetadata, MarkdownView, TFile } from 'obsidian'
import {
MarkdownView,
TFile,
WorkspaceLeaf,
type CachedMetadata,
} from 'obsidian'
import {
notesCacheFilePath,
type IndexedNote,
notesCacheFilePath,
type ResultNote,
} from './globals'
import { stringsToRegex, wait } from './utils'
import { stringsToRegex } from './utils'
import { settings } from './settings'
/**
@@ -37,10 +32,7 @@ export async function loadNotesCache(): Promise<void> {
console.error(e)
}
}
if (!notesCache) {
notesCache = {}
}
notesCache ||= {}
}
export function getNoteFromCache(key: string): IndexedNote | undefined {

View File

@@ -1,6 +1,3 @@
// import PDFJs from 'pdfjs-dist'
// import pdfjsWorker from 'pdfjs-dist/build/pdf.worker.entry'
import type { TextItem } from 'pdfjs-dist/types/src/display/api'
import type { TFile } from 'obsidian'
import {loadPdfJs} from "obsidian";
@@ -14,7 +11,7 @@ export async function getPdfText(file: TFile): Promise<string> {
const pageTexts = Array.from({ length: doc.numPages }, async (v, i) => {
const page = await doc.getPage(i + 1)
const content = await page.getTextContent()
return (content.items as TextItem[]).map(token => token.str).join('')
return (content.items as any[]).map(token => token.str).join('')
})
return (await Promise.all(pageTexts)).join('')
}

View File

@@ -1,17 +1,14 @@
import { Notice, TAbstractFile, TFile } from 'obsidian'
import { Notice } from 'obsidian'
import MiniSearch, { type Options, type SearchResult } from 'minisearch'
import {
chsRegex,
searchIndexFilePath,
SPACE_OR_PUNCTUATION,
type IndexedNote,
type ResultNote,
searchIndexFilePath,
type SearchMatch,
SPACE_OR_PUNCTUATION,
} from './globals'
import {
extractHeadingsFromCache,
getAliasesFromMetadata,
getTagsFromMetadata,
isFileIndexable,
removeDiacritics,
stringsToRegex,
@@ -21,21 +18,15 @@ import {
import type { Query } from './query'
import { settings } from './settings'
import {
removeNoteFromCache,
getNoteFromCache,
getNonExistingNotes,
resetNotesCache,
addNoteToCache,
removeAnchors,
getNonExistingNotesFromCache,
loadNotesCache,
saveNotesCacheToFile,
isCacheOutdated,
loadNotesCache,
resetNotesCache,
} from './notes'
import { getPdfText } from './pdf-parser'
import { addToIndex, removeFromIndex, saveIndexToFile } from './notes-index'
export let minisearchInstance: MiniSearch<IndexedNote>
let minisearchInstance: MiniSearch<IndexedNote>
let isIndexChanged: boolean
const tokenize = (text: string): string[] => {
const tokens = text.split(SPACE_OR_PUNCTUATION)
const chsSegmenter = (app as any).plugins.plugins['cm-chs-patch']
@@ -293,158 +284,3 @@ export async function getSuggestions(
return resultNote
})
}
/**
* Adds a file to the index
* @param file
* @returns
*/
export async function addToIndex(file: TAbstractFile): Promise<void> {
if (!(file instanceof TFile) || !isFileIndexable(file.path)) {
return
}
// Check if the file was already indexed as non-existent,
// and if so, remove it from the index (before adding it again)
if (getNoteFromCache(file.path)?.doesNotExist) {
removeFromIndex(file.path)
}
try {
// console.log(`Omnisearch - adding ${file.path} to index`)
// Look for links that lead to non-existing files,
// and index them as well
const metadata = app.metadataCache.getFileCache(file)
if (metadata) {
const nonExisting = getNonExistingNotes(file, metadata)
for (const name of nonExisting.filter(o => !getNoteFromCache(o))) {
addNonExistingToIndex(name, file.path)
}
}
if (getNoteFromCache(file.path)) {
throw new Error(`${file.basename} is already indexed`)
}
let content
if (file.path.endsWith('.pdf')) {
content = removeDiacritics(await getPdfText(file as TFile))
} else {
// Fetch content from the cache to index it as-is
content = removeDiacritics(await app.vault.cachedRead(file))
}
// Make the document and index it
const note: IndexedNote = {
basename: removeDiacritics(file.basename),
content,
path: file.path,
mtime: file.stat.mtime,
tags: getTagsFromMetadata(metadata),
aliases: getAliasesFromMetadata(metadata).join(''),
headings1: metadata
? extractHeadingsFromCache(metadata, 1).join(' ')
: '',
headings2: metadata
? extractHeadingsFromCache(metadata, 2).join(' ')
: '',
headings3: metadata
? extractHeadingsFromCache(metadata, 3).join(' ')
: '',
}
minisearchInstance.add(note)
isIndexChanged = true
addNoteToCache(note.path, note)
} catch (e) {
console.trace('Error while indexing ' + file.basename)
console.error(e)
}
}
/**
* Index a non-existing note.
* Useful to find internal links that lead (yet) to nowhere
* @param name
* @param parent The note referencing the
*/
export function addNonExistingToIndex(name: string, parent: string): void {
name = removeAnchors(name)
const filename = name + (name.endsWith('.md') ? '' : '.md')
if (getNoteFromCache(filename)) return
const note = {
path: filename,
basename: name,
mtime: 0,
content: '',
aliases: '',
headings1: '',
headings2: '',
headings3: '',
doesNotExist: true,
parent,
} as IndexedNote
minisearchInstance.add(note)
isIndexChanged = true
addNoteToCache(filename, note)
}
/**
* Removes a file from the index, by its path
* @param path
*/
export function removeFromIndex(path: string): void {
if (!isFileIndexable(path)) {
console.info(`"${path}" is not an indexable file`)
return
}
const note = getNoteFromCache(path)
if (note) {
minisearchInstance.remove(note)
isIndexChanged = true
removeNoteFromCache(path)
getNonExistingNotesFromCache()
.filter(n => n.parent === path)
.forEach(n => {
removeFromIndex(n.path)
})
} else {
console.warn(`not not found under path ${path}`)
}
}
const notesToReindex = new Set<TAbstractFile>()
export function addNoteToReindex(note: TAbstractFile): void {
notesToReindex.add(note)
}
export async function reindexNotes(): Promise<void> {
if (settings.showIndexingNotices && notesToReindex.size > 0) {
new Notice(`Omnisearch - Reindexing ${notesToReindex.size} notes`, 2000)
}
for (const note of notesToReindex) {
removeFromIndex(note.path)
await addToIndex(note)
await wait(0)
}
notesToReindex.clear()
await saveIndexToFile()
}
async function saveIndexToFile(): Promise<void> {
if (settings.storeIndexInFile && minisearchInstance && isIndexChanged) {
const json = JSON.stringify(minisearchInstance)
await app.vault.adapter.write(searchIndexFilePath, json)
console.log('Omnisearch - Index saved on disk')
await saveNotesCacheToFile()
isIndexChanged = false
}
}