From 96b4ac631df8c1db33a71e5564f74c8505ccead8 Mon Sep 17 00:00:00 2001 From: Simon Cambier Date: Fri, 24 Feb 2023 12:16:40 +0100 Subject: [PATCH 1/3] #176 - WIP tokenization of CamelCase words Technically works, but highlighting needs a rework --- src/search/omnisearch.ts | 3 ++- src/tools/utils.ts | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/search/omnisearch.ts b/src/search/omnisearch.ts index c9a3421..a057604 100644 --- a/src/search/omnisearch.ts +++ b/src/search/omnisearch.ts @@ -10,6 +10,7 @@ import { settings } from '../settings' import { chunkArray, removeDiacritics, + splitCamelCase, stringsToRegex, stripMarkdownCharacters, } from '../tools/utils' @@ -25,7 +26,7 @@ const tokenize = (text: string): string[] => { return tokens.flatMap(word => chsRegex.test(word) ? chsSegmenter.cut(word) : [word] ) - } else return tokens + } else return tokens.flatMap(splitCamelCase) } export class Omnisearch { diff --git a/src/tools/utils.ts b/src/tools/utils.ts index 9254408..66ee8d2 100644 --- a/src/tools/utils.ts +++ b/src/tools/utils.ts @@ -307,3 +307,11 @@ export function chunkArray(arr: T[], len: number): T[][] { return chunks } + +/** + * Converts a 'fooBarBAZLorem' into ['foo', 'Bar', 'BAZ', 'Lorem] + * @param text + */ +export function splitCamelCase(text: string): string[] { + return text.replace(/([a-z](?=[A-Z]))/g, '$1 ').split(' ') +} From 0529b978cb07d8f04bbfba534802ea76f645f1f7 Mon Sep 17 00:00:00 2001 From: Simon Cambier Date: Thu, 2 Mar 2023 20:06:40 +0100 Subject: [PATCH 2/3] Fixed typo --- src/globals.ts | 2 +- src/settings.ts | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/globals.ts b/src/globals.ts index a9e77fc..49ce0be 100644 --- a/src/globals.ts +++ b/src/globals.ts @@ -14,7 +14,7 @@ export const excerptBefore = 100 export const excerptAfter = 300 export const highlightClass = `suggestion-highlight omnisearch-highlight ${ - settings.hightlight ? 'omnisearch-default-highlight' : '' + settings.highlight ? 'omnisearch-default-highlight' : '' }` export const eventBus = new EventBus() diff --git a/src/settings.ts b/src/settings.ts index f3778a0..b0a7e58 100644 --- a/src/settings.ts +++ b/src/settings.ts @@ -45,7 +45,7 @@ export interface OmnisearchSettings extends WeightingSettings { welcomeMessage: string /** If a query returns 0 result, try again with more relax conditions */ simpleSearch: boolean - hightlight: boolean + highlight: boolean } /** @@ -301,8 +301,8 @@ export class SettingsTab extends PluginSettingTab { 'Will highlight matching results when enabled. See README for more customization options.' ) .addToggle(toggle => - toggle.setValue(settings.hightlight).onChange(async v => { - settings.hightlight = v + toggle.setValue(settings.highlight).onChange(async v => { + settings.highlight = v await saveSettings(this.plugin) }) ) @@ -384,7 +384,7 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = { showExcerpt: true, renderLineReturnInExcerpts: true, showCreateButton: false, - hightlight: true, + highlight: true, showPreviousQueryResults: true, simpleSearch: false, From 36ccb52982fd55d74850a3f4851f7a94409fcf6b Mon Sep 17 00:00:00 2001 From: Simon Cambier Date: Thu, 2 Mar 2023 21:32:37 +0100 Subject: [PATCH 3/3] #176 - Option to index CamelCaseWords --- src/search/omnisearch.ts | 6 +++++- src/settings.ts | 21 +++++++++++++++++++++ src/tools/utils.ts | 13 ++++++++----- 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/src/search/omnisearch.ts b/src/search/omnisearch.ts index 902607d..f621fbf 100644 --- a/src/search/omnisearch.ts +++ b/src/search/omnisearch.ts @@ -26,7 +26,11 @@ const tokenize = (text: string): string[] => { return tokens.flatMap(word => chsRegex.test(word) ? chsSegmenter.cut(word) : [word] ) - } else return tokens.flatMap(splitCamelCase) + } else { + if (settings.splitCamelCase) + return [...tokens, ...tokens.flatMap(splitCamelCase)] + return tokens + } } export class Omnisearch { diff --git a/src/settings.ts b/src/settings.ts index b0a7e58..5a6ca90 100644 --- a/src/settings.ts +++ b/src/settings.ts @@ -46,6 +46,7 @@ export interface OmnisearchSettings extends WeightingSettings { /** If a query returns 0 result, try again with more relax conditions */ simpleSearch: boolean highlight: boolean + splitCamelCase: boolean } /** @@ -204,6 +205,25 @@ export class SettingsTab extends PluginSettingTab { }) ) + // Split CamelCaseWords + const camelCaseDesc = new DocumentFragment() + camelCaseDesc.createSpan({}, span => { + span.innerHTML = `Enable this if you want to be able to search for CamelCaseWords as separate words.
+ ⚠️ Changing this setting will clear the cache.
+ Needs a restart to fully take effect. + ` + }) + new Setting(containerEl) + .setName('Split CamelCaseWords') + .setDesc(camelCaseDesc) + .addToggle(toggle => + toggle.setValue(settings.splitCamelCase).onChange(async v => { + await database.clearCache() + settings.splitCamelCase = v + await saveSettings(this.plugin) + }) + ) + // Simpler search new Setting(containerEl) .setName('Simpler search') @@ -379,6 +399,7 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = { indexedFileTypes: [] as string[], PDFIndexing: false, imagesIndexing: false, + splitCamelCase: false, ribbonIcon: true, showExcerpt: true, diff --git a/src/tools/utils.ts b/src/tools/utils.ts index 73eed56..035ae57 100644 --- a/src/tools/utils.ts +++ b/src/tools/utils.ts @@ -91,14 +91,17 @@ export function getAllIndices(text: string, regex: RegExp): SearchMatch[] { */ export function stringsToRegex(strings: string[]): RegExp { if (!strings.length) return /^$/g - // Default word split is not applied if the user uses the cm-chs-patch plugin const joined = '(' + - (getChsSegmenter() ? '' : `^|${SPACE_OR_PUNCTUATION.source}`) + + // Default word split is not applied if the user uses the cm-chs-patch plugin + (getChsSegmenter() + ? '' + : // Split on start of line, spaces, punctuation, or capital letters (for camelCase) + settings.splitCamelCase + ? `^|${SPACE_OR_PUNCTUATION.source}|[A-Z]` + : `^|${SPACE_OR_PUNCTUATION.source}`) + ')' + - '(' + - strings.map(s => escapeRegex(s)).join('|') + - ')' + `(${strings.map(s => escapeRegex(s)).join('|')})` const reg = new RegExp(`${joined}`, 'giu') return reg