diff --git a/package.json b/package.json index e0a58e1..9428772 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "scambier.obsidian-search", - "version": "1.12.3", + "version": "1.13.0-beta.2", "description": "A search engine for Obsidian", "main": "dist/main.js", "scripts": { diff --git a/src/cache-manager.ts b/src/cache-manager.ts index 10132f1..e2a7ca5 100644 --- a/src/cache-manager.ts +++ b/src/cache-manager.ts @@ -11,6 +11,7 @@ import { getTagsFromMetadata, isFileCanvas, isFilePlaintext, + logDebug, makeMD5, removeDiacritics, } from './tools/utils' @@ -150,6 +151,7 @@ class CacheManager { if (this.documents.has(path)) { return this.documents.get(path)! } + logDebug('Generating IndexedDocument from', path) await this.addToLiveCache(path) return this.documents.get(path)! } diff --git a/src/components/ModalVault.svelte b/src/components/ModalVault.svelte index 3956e13..55fcd0f 100644 --- a/src/components/ModalVault.svelte +++ b/src/components/ModalVault.svelte @@ -15,6 +15,7 @@ getCtrlKeyLabel, getExtension, isFilePDF, + logDebug, loopIndex, } from 'src/tools/utils' import { @@ -27,7 +28,6 @@ import * as NotesIndex from '../notes-index' import { cacheManager } from '../cache-manager' import { searchEngine } from 'src/search/omnisearch' - import CancelablePromise, { cancelable } from 'cancelable-promise' export let modal: OmnisearchVaultModal export let previousQuery: string | undefined @@ -40,28 +40,13 @@ let searching = true let refInput: InputSearch | undefined - let pWaitingResults: CancelablePromise | null = null - $: selectedNote = resultNotes[selectedIndex] $: searchQuery = searchQuery ?? previousQuery $: if (searchQuery) { - if (pWaitingResults) { - pWaitingResults.cancel() - pWaitingResults = null - } searching = true - pWaitingResults = cancelable( - new Promise((resolve, reject) => { - updateResults() - .then(() => { - searching = false - resolve(null) - }) - .catch(e => { - reject(e) - }) - }) - ) + updateResults().then(() => { + searching = false + }) } else { searching = false resultNotes = [] @@ -130,9 +115,7 @@ async function updateResults() { query = new Query(searchQuery) - resultNotes = (await searchEngine.getSuggestions(query)).sort( - (a, b) => b.score - a.score - ) + resultNotes = await searchEngine.getSuggestions(query) selectedIndex = 0 await scrollIntoView() } diff --git a/src/globals.ts b/src/globals.ts index a9e77fc..49ce0be 100644 --- a/src/globals.ts +++ b/src/globals.ts @@ -14,7 +14,7 @@ export const excerptBefore = 100 export const excerptAfter = 300 export const highlightClass = `suggestion-highlight omnisearch-highlight ${ - settings.hightlight ? 'omnisearch-default-highlight' : '' + settings.highlight ? 'omnisearch-default-highlight' : '' }` export const eventBus = new EventBus() diff --git a/src/main.ts b/src/main.ts index 039ea86..0f1e655 100644 --- a/src/main.ts +++ b/src/main.ts @@ -18,7 +18,7 @@ import { isCacheEnabled, } from './globals' import api, { notifyOnIndexed } from './tools/api' -import { isFileIndexable } from './tools/utils' +import { isFileIndexable, logDebug } from './tools/utils' import { database, OmnisearchCache } from './database' import * as NotesIndex from './notes-index' import { searchEngine } from './search/omnisearch' @@ -69,6 +69,7 @@ export default class OmnisearchPlugin extends Plugin { this.registerEvent( this.app.vault.on('create', file => { if (isFileIndexable(file.path)) { + logDebug('Indexing new file', file.path) // await cacheManager.addToLiveCache(file.path) searchEngine.addFromPaths([file.path]) } @@ -76,6 +77,7 @@ export default class OmnisearchPlugin extends Plugin { ) this.registerEvent( this.app.vault.on('delete', file => { + logDebug('Removing file', file.path) cacheManager.removeFromLiveCache(file.path) searchEngine.removeFromPaths([file.path]) }) @@ -83,6 +85,7 @@ export default class OmnisearchPlugin extends Plugin { this.registerEvent( this.app.vault.on('modify', async file => { if (isFileIndexable(file.path)) { + logDebug('Updating file', file.path) await cacheManager.addToLiveCache(file.path) NotesIndex.markNoteForReindex(file) } @@ -91,6 +94,7 @@ export default class OmnisearchPlugin extends Plugin { this.registerEvent( this.app.vault.on('rename', async (file, oldPath) => { if (isFileIndexable(file.path)) { + logDebug('Renaming file', file.path) cacheManager.removeFromLiveCache(oldPath) cacheManager.addToLiveCache(file.path) searchEngine.removeFromPaths([oldPath]) diff --git a/src/search/omnisearch.ts b/src/search/omnisearch.ts index 3c97c8e..0a06b8f 100644 --- a/src/search/omnisearch.ts +++ b/src/search/omnisearch.ts @@ -9,9 +9,12 @@ import { chsRegex, getChsSegmenter, SPACE_OR_PUNCTUATION } from '../globals' import { settings } from '../settings' import { chunkArray, + logDebug, removeDiacritics, + splitCamelCase, stringsToRegex, stripMarkdownCharacters, + warnDebug, } from '../tools/utils' import { Notice } from 'obsidian' import type { Query } from './query' @@ -25,7 +28,11 @@ const tokenize = (text: string): string[] => { return tokens.flatMap(word => chsRegex.test(word) ? chsSegmenter.cut(word) : [word] ) - } else return tokens + } else { + if (settings.splitCamelCase) + return [...tokens, ...tokens.flatMap(splitCamelCase)] + return tokens + } } export class Omnisearch { @@ -117,11 +124,13 @@ export class Omnisearch { * @param paths */ public async addFromPaths(paths: string[]): Promise { + logDebug('Adding files', paths) let documents = ( await Promise.all( paths.map(async path => await cacheManager.getDocument(path)) ) ).filter(d => !!d?.path) + logDebug('Sorting documents to first index markdown') // Index markdown files first documents = sortBy(documents, d => (d.path.endsWith('.md') ? 0 : 1)) @@ -133,6 +142,7 @@ export class Omnisearch { // Split the documents in smaller chunks to add them to minisearch const chunkedDocs = chunkArray(documents, 500) for (const docs of chunkedDocs) { + logDebug('Indexing into search engine', docs) // Update the list of indexed docs docs.forEach(doc => this.indexedDocuments.set(doc.path, doc.mtime)) @@ -170,6 +180,8 @@ export class Omnisearch { return [] } + logDebug('Starting search for', query) + let results = this.minisearch.search(query.segmentsToStr(), { prefix: term => term.length >= options.prefixLength, // length <= 3: no fuzziness @@ -187,6 +199,8 @@ export class Omnisearch { }, }) + logDebug('Found', results.length, 'results') + // Filter query results to only keep files that match query.extensions (if any) if (query.extensions.length) { results = results.filter(r => { @@ -242,7 +256,10 @@ export class Omnisearch { } } - results = results.slice(0, 50) + logDebug('Sorting and limiting results') + + // Sort results and keep the 50 best + results = results.sort((a, b) => b.score - a.score).slice(0, 50) const documents = await Promise.all( results.map(async result => await cacheManager.getDocument(result.id)) @@ -251,6 +268,7 @@ export class Omnisearch { // If the search query contains quotes, filter out results that don't have the exact match const exactTerms = query.getExactTerms() if (exactTerms.length) { + logDebug('Filtering with quoted terms') results = results.filter(r => { const document = documents.find(d => d.path === r.id) const title = document?.path.toLowerCase() ?? '' @@ -264,6 +282,7 @@ export class Omnisearch { // If the search query contains exclude terms, filter out results that have them const exclusions = query.exclusions if (exclusions.length) { + logDebug('Filtering with exclusions') results = results.filter(r => { const content = stripMarkdownCharacters( documents.find(d => d.path === r.id)?.content ?? '' @@ -271,6 +290,8 @@ export class Omnisearch { return exclusions.every(q => !content.includes(q.value)) }) } + + logDebug('Deduping') // FIXME: // Dedupe results - clutch for https://github.com/scambier/obsidian-omnisearch/issues/129 results = results.filter( @@ -284,11 +305,16 @@ export class Omnisearch { } public getMatches(text: string, reg: RegExp, query: Query): SearchMatch[] { + const startTime = new Date().getTime() let match: RegExpExecArray | null = null const matches: SearchMatch[] = [] let count = 0 while ((match = reg.exec(text)) !== null) { - if (++count >= 100) break // Avoid infinite loops, stop looking after 100 matches + // Avoid infinite loops, stop looking after 100 matches or if we're taking too much time + if (++count >= 100 || new Date().getTime() - startTime > 50) { + warnDebug('Stopped getMatches at', count, 'results') + break + } const m = match[0] if (m) matches.push({ match: m, offset: match.index }) } @@ -331,17 +357,13 @@ export class Omnisearch { }) } - // Extract tags from the query - const tags = query.segments - .filter(s => s.value.startsWith('#')) - .map(s => s.value) - const documents = await Promise.all( results.map(async result => await cacheManager.getDocument(result.id)) ) // Map the raw results to get usable suggestions const resultNotes = results.map(result => { + logDebug('Locating matches for', result.id) let note = documents.find(d => d.path === result.id) if (!note) { // throw new Error(`Omnisearch - Note "${result.id}" not indexed`) @@ -357,6 +379,12 @@ export class Omnisearch { query.segments.forEach(s => { s.value = s.value.replace(/^#/, '') }) + + // Extract tags from the query + const tags = query.segments + .filter(s => s.value.startsWith('#')) + .map(s => s.value) + // Clean search matches that match quoted expressions, // and inject those expressions instead const foundWords = [ @@ -370,13 +398,15 @@ export class Omnisearch { // Tags, starting with # ...tags, ].filter(w => w.length > 1 || /\p{Emoji}/u.test(w)) + logDebug('Matching tokens:', foundWords) - // console.log(foundWords) + logDebug('Getting matches locations...') const matches = this.getMatches( note.content, stringsToRegex(foundWords), query ) + logDebug('Matches:', matches) const resultNote: ResultNote = { score: result.score, foundWords, diff --git a/src/settings.ts b/src/settings.ts index f3778a0..9f8b536 100644 --- a/src/settings.ts +++ b/src/settings.ts @@ -45,7 +45,9 @@ export interface OmnisearchSettings extends WeightingSettings { welcomeMessage: string /** If a query returns 0 result, try again with more relax conditions */ simpleSearch: boolean - hightlight: boolean + highlight: boolean + splitCamelCase: boolean + verboseLogging: boolean } /** @@ -204,6 +206,25 @@ export class SettingsTab extends PluginSettingTab { }) ) + // Split CamelCaseWords + const camelCaseDesc = new DocumentFragment() + camelCaseDesc.createSpan({}, span => { + span.innerHTML = `Enable this if you want to be able to search for CamelCaseWords as separate words.
+ ⚠️ Changing this setting will clear the cache.
+ Needs a restart to fully take effect. + ` + }) + new Setting(containerEl) + .setName('Split CamelCaseWords') + .setDesc(camelCaseDesc) + .addToggle(toggle => + toggle.setValue(settings.splitCamelCase).onChange(async v => { + await database.clearCache() + settings.splitCamelCase = v + await saveSettings(this.plugin) + }) + ) + // Simpler search new Setting(containerEl) .setName('Simpler search') @@ -301,8 +322,8 @@ export class SettingsTab extends PluginSettingTab { 'Will highlight matching results when enabled. See README for more customization options.' ) .addToggle(toggle => - toggle.setValue(settings.hightlight).onChange(async v => { - settings.hightlight = v + toggle.setValue(settings.highlight).onChange(async v => { + settings.highlight = v await saveSettings(this.plugin) }) ) @@ -337,6 +358,22 @@ export class SettingsTab extends PluginSettingTab { //#endregion Results Weighting + //#region Debugging + + new Setting(containerEl).setName('Debugging').setHeading() + + new Setting(containerEl) + .setName('Enable verbose logging') + .setDesc('Adds a LOT of logs for debugging purposes. Don\'t forget to disable it.') + .addToggle(toggle => + toggle.setValue(settings.verboseLogging).onChange(async v => { + settings.verboseLogging = v + await saveSettings(this.plugin) + }) + ) + + //#endregion Debugginh + //#region Danger Zone if (isCacheEnabled()) { new Setting(containerEl).setName('Danger Zone').setHeading() @@ -379,12 +416,13 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = { indexedFileTypes: [] as string[], PDFIndexing: false, imagesIndexing: false, + splitCamelCase: false, ribbonIcon: true, showExcerpt: true, renderLineReturnInExcerpts: true, showCreateButton: false, - hightlight: true, + highlight: true, showPreviousQueryResults: true, simpleSearch: false, @@ -395,6 +433,7 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = { weightH3: 1.1, welcomeMessage: '', + verboseLogging: false, } as const export let settings = Object.assign({}, DEFAULT_SETTINGS) as OmnisearchSettings diff --git a/src/tools/utils.ts b/src/tools/utils.ts index 90e7ca9..8ab8131 100644 --- a/src/tools/utils.ts +++ b/src/tools/utils.ts @@ -91,14 +91,17 @@ export function getAllIndices(text: string, regex: RegExp): SearchMatch[] { */ export function stringsToRegex(strings: string[]): RegExp { if (!strings.length) return /^$/g - // Default word split is not applied if the user uses the cm-chs-patch plugin const joined = '(' + - (getChsSegmenter() ? '' : `^|${SPACE_OR_PUNCTUATION.source}`) + + // Default word split is not applied if the user uses the cm-chs-patch plugin + (getChsSegmenter() + ? '' + : // Split on start of line, spaces, punctuation, or capital letters (for camelCase) + settings.splitCamelCase + ? `^|${SPACE_OR_PUNCTUATION.source}|[A-Z]` + : `^|${SPACE_OR_PUNCTUATION.source}`) + ')' + - '(' + - strings.map(s => escapeRegex(s)).join('|') + - ')' + `(${strings.map(s => escapeRegex(s)).join('|')})` const reg = new RegExp(`${joined}`, 'giu') return reg @@ -313,3 +316,27 @@ export function chunkArray(arr: T[], len: number): T[][] { return chunks } + +/** + * Converts a 'fooBarBAZLorem' into ['foo', 'Bar', 'BAZ', 'Lorem] + * @param text + */ +export function splitCamelCase(text: string): string[] { + return text.replace(/([a-z](?=[A-Z]))/g, '$1 ').split(' ') +} + +export function logDebug(...args: any[]): void { + printDebug(console.log, ...args) +} + +export function warnDebug(...args: any[]): void { + printDebug(console.warn, ...args) +} + +function printDebug(fn: (...args: any[]) => any, ...args: any[]): void { + if (settings.verboseLogging) { + const t = new Date() + const ts = `${t.getMinutes()}:${t.getSeconds()}:${t.getMilliseconds()}` + fn(...['Omnisearch -', ts + ' -', ...args]) + } +} diff --git a/src/vendor/parse-query.ts b/src/vendor/parse-query.ts index 0679133..a0c846a 100644 --- a/src/vendor/parse-query.ts +++ b/src/vendor/parse-query.ts @@ -6,6 +6,8 @@ * MIT Licensed */ +import { warnDebug } from "../tools/utils"; + interface SearchParserOptions { offsets?: boolean tokenize: true @@ -30,7 +32,7 @@ type SearchParserTextOffset = { type SearchParserOffset = ( | SearchParserKeyWordOffset | SearchParserTextOffset -) & { + ) & { offsetStart: number offsetEnd: number } @@ -43,7 +45,7 @@ interface SearchParserResult extends ISearchParserDictionary { export function parseQuery( string: string, - options: SearchParserOptions + options: SearchParserOptions, ): SearchParserResult { // Set a default options object when none is provided if (!options) { @@ -74,9 +76,14 @@ export function parseQuery( const regex = /(\S+:'(?:[^'\\]|\\.)*')|(\S+:"(?:[^"\\]|\\.)*")|(-?"(?:[^"\\]|\\.)*")|(-?'(?:[^'\\]|\\.)*')|\S+|\S+:\S+/g let match - let count = 0 // TODO: FIXME: this is a hack to avoid infinite loops + let count = 0 + const startTime = new Date().getTime() + while ((match = regex.exec(string)) !== null) { - if (++count >= 100) break + if (++count >= 100 || new Date().getTime() - startTime > 50) { + warnDebug('Stopped SearchParserResult at', count, 'results') + break + } let term = match[0] const sepIndex = term.indexOf(':') @@ -291,11 +298,11 @@ export function parseQuery( query[key].from = rangeValues[0] query[key].to = rangeValues[1] } - // When pairs of ranges are specified - // keyword:XXXX-YYYY,AAAA-BBBB - // else if (!rangeValues.length % 2) { - // } - // When only getting a single value, + // When pairs of ranges are specified + // keyword:XXXX-YYYY,AAAA-BBBB + // else if (!rangeValues.length % 2) { + // } + // When only getting a single value, // or an odd number of values else { query[key].from = value