#58 - new behavior

Omnisearch no longer register extensions, PDFs are indexed after plaintext files, updated settings descriptions
This commit is contained in:
Simon Cambier
2022-10-01 13:58:11 +02:00
parent 7dc01ecc8f
commit 124e5f5d61
6 changed files with 61 additions and 36 deletions

View File

@@ -6,7 +6,7 @@ import { eventBus } from './globals'
import { registerAPI } from '@vanakat/plugin-api' import { registerAPI } from '@vanakat/plugin-api'
import api from './api' import api from './api'
import { loadSearchHistory } from './search-history' import { loadSearchHistory } from './search-history'
import {isFileIndexable, showWelcomeNotice} from './utils' import {isFilePlaintext, showWelcomeNotice} from './utils'
import { addNoteToReindex, addToIndex, removeFromIndex } from './notes-index' import { addNoteToReindex, addToIndex, removeFromIndex } from './notes-index'
function _registerAPI(plugin: OmnisearchPlugin): void { function _registerAPI(plugin: OmnisearchPlugin): void {
@@ -22,10 +22,7 @@ export default class OmnisearchPlugin extends Plugin {
// additional files to index by Omnisearch // additional files to index by Omnisearch
await loadSettings(this) await loadSettings(this)
this.registerExtensions(settings.indexedFileTypes, 'markdown')
await loadSearchHistory() await loadSearchHistory()
_registerAPI(this) _registerAPI(this)
if (settings.ribbonIcon) { if (settings.ribbonIcon) {
@@ -75,7 +72,7 @@ export default class OmnisearchPlugin extends Plugin {
) )
this.registerEvent( this.registerEvent(
this.app.vault.on('rename', async (file, oldPath) => { this.app.vault.on('rename', async (file, oldPath) => {
if (file instanceof TFile && isFileIndexable(file.path)) { if (file instanceof TFile && isFilePlaintext(file.path)) {
removeFromIndex(oldPath) removeFromIndex(oldPath)
await addToIndex(file) await addToIndex(file)
} }

View File

@@ -2,8 +2,8 @@ import { Notice, TAbstractFile, TFile } from 'obsidian'
import { import {
extractHeadingsFromCache, extractHeadingsFromCache,
getAliasesFromMetadata, getAliasesFromMetadata,
getTagsFromMetadata, getTagsFromMetadata, isFileIndexable,
isFileIndexable, isFilePlaintext,
removeDiacritics, removeDiacritics,
wait, wait,
} from './utils' } from './utils'
@@ -129,7 +129,7 @@ export function addNonExistingToIndex(name: string, parent: string): void {
* @param path * @param path
*/ */
export function removeFromIndex(path: string): void { export function removeFromIndex(path: string): void {
if (!isFileIndexable(path)) { if (!isFilePlaintext(path)) {
console.info(`"${path}" is not an indexable file`) console.info(`"${path}" is not an indexable file`)
return return
} }

View File

@@ -1,11 +1,11 @@
import type { TFile } from 'obsidian' import type { TFile } from 'obsidian'
import {loadPdfJs} from "obsidian"; import { loadPdfJs } from 'obsidian'
let PDFJs: any = null let PDFJs: any = null
// https://stackoverflow.com/a/59929946 // https://stackoverflow.com/a/59929946
export async function getPdfText(file: TFile): Promise<string> { export async function getPdfText(file: TFile): Promise<string> {
PDFJs = PDFJs ?? await loadPdfJs() PDFJs = PDFJs ?? (await loadPdfJs())
const data = await app.vault.readBinary(file) const data = await app.vault.readBinary(file)
const doc = await PDFJs.getDocument(data).promise const doc = await PDFJs.getDocument(data).promise
const pageTexts = Array.from({ length: doc.numPages }, async (v, i) => { const pageTexts = Array.from({ length: doc.numPages }, async (v, i) => {

View File

@@ -9,7 +9,7 @@ import {
SPACE_OR_PUNCTUATION, SPACE_OR_PUNCTUATION,
} from './globals' } from './globals'
import { import {
isFileIndexable, isFilePlaintext,
removeDiacritics, removeDiacritics,
stringsToRegex, stringsToRegex,
stripMarkdownCharacters, stripMarkdownCharacters,
@@ -42,7 +42,7 @@ const tokenize = (text: string): string[] => {
* Initializes the MiniSearch instance, * Initializes the MiniSearch instance,
* and adds all the notes to the index * and adds all the notes to the index
*/ */
export async function initGlobalSearchIndex(force = false): Promise<void> { export async function initGlobalSearchIndex(): Promise<void> {
const options: Options<IndexedNote> = { const options: Options<IndexedNote> = {
tokenize, tokenize,
processTerm: (term: string) => processTerm: (term: string) =>
@@ -76,7 +76,7 @@ export async function initGlobalSearchIndex(force = false): Promise<void> {
} }
} }
if (!minisearchInstance || force) { if (!minisearchInstance) {
minisearchInstance = new MiniSearch(options) minisearchInstance = new MiniSearch(options)
resetNotesCache() resetNotesCache()
} }
@@ -84,7 +84,7 @@ export async function initGlobalSearchIndex(force = false): Promise<void> {
// Index files that are already present // Index files that are already present
const start = new Date().getTime() const start = new Date().getTime()
const allFiles = app.vault.getFiles().filter(f => isFileIndexable(f.path)) const allFiles = app.vault.getFiles().filter(f => isFilePlaintext(f.path))
let files let files
let notesSuffix let notesSuffix
@@ -99,16 +99,14 @@ export async function initGlobalSearchIndex(force = false): Promise<void> {
console.log(`Omnisearch - indexing ${files.length} ${notesSuffix}`) console.log(`Omnisearch - indexing ${files.length} ${notesSuffix}`)
// This is basically the same behavior as MiniSearch's `addAllAsync()`. // This is basically the same behavior as MiniSearch's `addAllAsync()`.
// We index files by batches of 10 // We index markdown and plaintext files by batches of 10
for (let i = 0; i < files.length; ++i) { for (let i = 0; i < files.length; ++i) {
if (i % 10 === 0) await wait(0) if (i % 10 === 0) await wait(0)
const file = files[i] const file = files[i]
if (file) { if (getNoteFromCache(file.path)) {
if (getNoteFromCache(file.path)) { removeFromIndex(file.path)
removeFromIndex(file.path)
}
await addToIndex(file)
} }
await addToIndex(file)
} }
if (files.length > 0) { if (files.length > 0) {
@@ -123,6 +121,27 @@ export async function initGlobalSearchIndex(force = false): Promise<void> {
} }
await saveIndexToFile() await saveIndexToFile()
// PDFs are indexed later, since they're heavier
await indexPDFs()
}
}
async function indexPDFs() {
if (settings.indexPDFs) {
console.warn("Omnisearch - Warnings on pdf.worker.min are due to some issues while reading PDFs file.")
const files = app.vault.getFiles().filter(f => f.path.endsWith('.pdf'))
for (const file of files) {
await wait(0)
if (getNoteFromCache(file.path)) {
removeFromIndex(file.path)
}
await addToIndex(file)
console.log(file.path)
}
if (settings.showIndexingNotices) {
new Notice(`Omnisearch - Indexed ${files.length} PDFs`)
}
} }
} }

View File

@@ -2,7 +2,6 @@ import { Plugin, PluginSettingTab, Setting, SliderComponent } from 'obsidian'
import { writable } from 'svelte/store' import { writable } from 'svelte/store'
import { notesCacheFilePath, searchIndexFilePath } from './globals' import { notesCacheFilePath, searchIndexFilePath } from './globals'
import type OmnisearchPlugin from './main' import type OmnisearchPlugin from './main'
import { initGlobalSearchIndex } from './search'
interface WeightingSettings { interface WeightingSettings {
weightBasename: number weightBasename: number
@@ -75,7 +74,7 @@ export class SettingsTab extends PluginSettingTab {
const diacriticsDesc = new DocumentFragment() const diacriticsDesc = new DocumentFragment()
diacriticsDesc.createSpan({}, span => { diacriticsDesc.createSpan({}, span => {
span.innerHTML = `Normalize diacritics in search terms. Words like "brûlée" or "žluťoučký" will be indexed as "brulee" and "zlutoucky".<br/> span.innerHTML = `Normalize diacritics in search terms. Words like "brûlée" or "žluťoučký" will be indexed as "brulee" and "zlutoucky".<br/>
<strong>Changing this will trigger a full reindex.</strong>` <strong>Needs a restart to fully take effect.</strong>`
}) })
new Setting(containerEl) new Setting(containerEl)
.setName('Ignore diacritics') .setName('Ignore diacritics')
@@ -84,7 +83,6 @@ export class SettingsTab extends PluginSettingTab {
toggle.setValue(settings.ignoreDiacritics).onChange(async v => { toggle.setValue(settings.ignoreDiacritics).onChange(async v => {
settings.ignoreDiacritics = v settings.ignoreDiacritics = v
await saveSettings(this.plugin) await saveSettings(this.plugin)
await initGlobalSearchIndex(true)
}) })
) )
@@ -93,7 +91,6 @@ export class SettingsTab extends PluginSettingTab {
indexedFileTypesDesc.createSpan({}, span => { indexedFileTypesDesc.createSpan({}, span => {
span.innerHTML = `In addition to standard <code>md</code> files, Omnisearch can also index other plain text files.<br/> span.innerHTML = `In addition to standard <code>md</code> files, Omnisearch can also index other plain text files.<br/>
Add extensions separated by a space. Example: <code>txt org</code>.<br /> Add extensions separated by a space. Example: <code>txt org</code>.<br />
This setting will also add these files in the navigation, and they will be treated as markdown.<br />
<strong>Needs a restart to fully take effect.</strong>` <strong>Needs a restart to fully take effect.</strong>`
}) })
new Setting(containerEl) new Setting(containerEl)
@@ -114,7 +111,8 @@ export class SettingsTab extends PluginSettingTab {
indexPDFsDesc.createSpan({}, span => { indexPDFsDesc.createSpan({}, span => {
span.innerHTML = `Omnisearch will index your PDFs, and return them in search results. span.innerHTML = `Omnisearch will index your PDFs, and return them in search results.
This feature is currently a work-in-progress, please report slowdowns or issues that you might experience.<br> This feature is currently a work-in-progress, please report slowdowns or issues that you might experience.<br>
<strong>Changing this will trigger a full reindex.</strong>` PDFs being quite slow to index, <strong style="color: var(--text-accent)">it is strongly recommended to also enable "Store index in file"</strong>.<br>
<strong>Needs a restart to fully take effect.</strong>`
}) })
new Setting(containerEl) new Setting(containerEl)
.setName('BETA - Index PDFs') .setName('BETA - Index PDFs')
@@ -123,9 +121,9 @@ export class SettingsTab extends PluginSettingTab {
toggle.setValue(settings.indexPDFs).onChange(async v => { toggle.setValue(settings.indexPDFs).onChange(async v => {
settings.indexPDFs = v settings.indexPDFs = v
await saveSettings(this.plugin) await saveSettings(this.plugin)
await initGlobalSearchIndex(true)
}) })
) )
// Store index // Store index
const serializedIndexDesc = new DocumentFragment() const serializedIndexDesc = new DocumentFragment()
serializedIndexDesc.createSpan({}, span => { serializedIndexDesc.createSpan({}, span => {
@@ -133,7 +131,7 @@ export class SettingsTab extends PluginSettingTab {
This results in faster loading times for bigger vaults and mobile devices.<br /> This results in faster loading times for bigger vaults and mobile devices.<br />
<em>⚠️ Note: the index can become corrupted - if you notice any issue, disable and re-enable this option to clear the cache.</em><br/> <em>⚠️ Note: the index can become corrupted - if you notice any issue, disable and re-enable this option to clear the cache.</em><br/>
<em>⚠️ Cache files in <code>.obsidian/plugins/omnisearch/</code> must not be synchronized.</em><br/> <em>⚠️ Cache files in <code>.obsidian/plugins/omnisearch/</code> must not be synchronized.</em><br/>
<strong>Changing this will trigger a full reindex.</strong> <strong>Needs a restart to fully take effect.</strong>
` `
}) })
new Setting(containerEl) new Setting(containerEl)
@@ -141,11 +139,18 @@ export class SettingsTab extends PluginSettingTab {
.setDesc(serializedIndexDesc) .setDesc(serializedIndexDesc)
.addToggle(toggle => .addToggle(toggle =>
toggle.setValue(settings.storeIndexInFile).onChange(async v => { toggle.setValue(settings.storeIndexInFile).onChange(async v => {
await app.vault.adapter.remove(notesCacheFilePath) try {
await app.vault.adapter.remove(searchIndexFilePath) await app.vault.adapter.remove(notesCacheFilePath)
} catch (e) {
console.warn(e)
}
try {
await app.vault.adapter.remove(searchIndexFilePath)
} catch (e) {
console.warn(e)
}
settings.storeIndexInFile = v settings.storeIndexInFile = v
await saveSettings(this.plugin) await saveSettings(this.plugin)
await initGlobalSearchIndex(true)
}) })
) )
@@ -311,7 +316,7 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = {
storeIndexInFile: false, storeIndexInFile: false,
welcomeMessage: '' welcomeMessage: '',
} as const } as const
export let settings = Object.assign({}, DEFAULT_SETTINGS) as OmnisearchSettings export let settings = Object.assign({}, DEFAULT_SETTINGS) as OmnisearchSettings

View File

@@ -173,11 +173,15 @@ export function getCtrlKeyLabel(): 'ctrl' | '⌘' {
} }
export function isFileIndexable(path: string): boolean { export function isFileIndexable(path: string): boolean {
return ( return (settings.indexPDFs && path.endsWith('.pdf')) || isFilePlaintext(path)
path.endsWith('.md') || }
(settings.indexPDFs && path.endsWith('.pdf')) ||
settings.indexedFileTypes.some(t => path.endsWith(`.${t}`)) export function isFilePlaintext(path: string): boolean {
) return getPlaintextExtensions().some(t => path.endsWith(`.${t}`))
}
export function getPlaintextExtensions(): string[] {
return [...settings.indexedFileTypes, 'md']
} }
export function getExtension(path: string): string { export function getExtension(path: string): string {