Merge branch 'feature/176-CamelCaseTokens' into develop

This commit is contained in:
Simon Cambier
2023-03-02 21:33:31 +01:00
4 changed files with 48 additions and 11 deletions

View File

@@ -14,7 +14,7 @@ export const excerptBefore = 100
export const excerptAfter = 300 export const excerptAfter = 300
export const highlightClass = `suggestion-highlight omnisearch-highlight ${ export const highlightClass = `suggestion-highlight omnisearch-highlight ${
settings.hightlight ? 'omnisearch-default-highlight' : '' settings.highlight ? 'omnisearch-default-highlight' : ''
}` }`
export const eventBus = new EventBus() export const eventBus = new EventBus()

View File

@@ -10,6 +10,7 @@ import { settings } from '../settings'
import { import {
chunkArray, chunkArray,
removeDiacritics, removeDiacritics,
splitCamelCase,
stringsToRegex, stringsToRegex,
stripMarkdownCharacters, stripMarkdownCharacters,
} from '../tools/utils' } from '../tools/utils'
@@ -25,7 +26,11 @@ const tokenize = (text: string): string[] => {
return tokens.flatMap(word => return tokens.flatMap(word =>
chsRegex.test(word) ? chsSegmenter.cut(word) : [word] chsRegex.test(word) ? chsSegmenter.cut(word) : [word]
) )
} else return tokens } else {
if (settings.splitCamelCase)
return [...tokens, ...tokens.flatMap(splitCamelCase)]
return tokens
}
} }
export class Omnisearch { export class Omnisearch {

View File

@@ -45,7 +45,8 @@ export interface OmnisearchSettings extends WeightingSettings {
welcomeMessage: string welcomeMessage: string
/** If a query returns 0 result, try again with more relax conditions */ /** If a query returns 0 result, try again with more relax conditions */
simpleSearch: boolean simpleSearch: boolean
hightlight: boolean highlight: boolean
splitCamelCase: boolean
} }
/** /**
@@ -204,6 +205,25 @@ export class SettingsTab extends PluginSettingTab {
}) })
) )
// Split CamelCaseWords
const camelCaseDesc = new DocumentFragment()
camelCaseDesc.createSpan({}, span => {
span.innerHTML = `Enable this if you want to be able to search for CamelCaseWords as separate words.<br/>
⚠️ <span style="color: var(--text-accent)">Changing this setting will clear the cache.</span><br>
<strong style="color: var(--text-accent)">Needs a restart to fully take effect.</strong>
`
})
new Setting(containerEl)
.setName('Split CamelCaseWords')
.setDesc(camelCaseDesc)
.addToggle(toggle =>
toggle.setValue(settings.splitCamelCase).onChange(async v => {
await database.clearCache()
settings.splitCamelCase = v
await saveSettings(this.plugin)
})
)
// Simpler search // Simpler search
new Setting(containerEl) new Setting(containerEl)
.setName('Simpler search') .setName('Simpler search')
@@ -301,8 +321,8 @@ export class SettingsTab extends PluginSettingTab {
'Will highlight matching results when enabled. See README for more customization options.' 'Will highlight matching results when enabled. See README for more customization options.'
) )
.addToggle(toggle => .addToggle(toggle =>
toggle.setValue(settings.hightlight).onChange(async v => { toggle.setValue(settings.highlight).onChange(async v => {
settings.hightlight = v settings.highlight = v
await saveSettings(this.plugin) await saveSettings(this.plugin)
}) })
) )
@@ -379,12 +399,13 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = {
indexedFileTypes: [] as string[], indexedFileTypes: [] as string[],
PDFIndexing: false, PDFIndexing: false,
imagesIndexing: false, imagesIndexing: false,
splitCamelCase: false,
ribbonIcon: true, ribbonIcon: true,
showExcerpt: true, showExcerpt: true,
renderLineReturnInExcerpts: true, renderLineReturnInExcerpts: true,
showCreateButton: false, showCreateButton: false,
hightlight: true, highlight: true,
showPreviousQueryResults: true, showPreviousQueryResults: true,
simpleSearch: false, simpleSearch: false,

View File

@@ -91,14 +91,17 @@ export function getAllIndices(text: string, regex: RegExp): SearchMatch[] {
*/ */
export function stringsToRegex(strings: string[]): RegExp { export function stringsToRegex(strings: string[]): RegExp {
if (!strings.length) return /^$/g if (!strings.length) return /^$/g
// Default word split is not applied if the user uses the cm-chs-patch plugin
const joined = const joined =
'(' + '(' +
(getChsSegmenter() ? '' : `^|${SPACE_OR_PUNCTUATION.source}`) + // Default word split is not applied if the user uses the cm-chs-patch plugin
(getChsSegmenter()
? ''
: // Split on start of line, spaces, punctuation, or capital letters (for camelCase)
settings.splitCamelCase
? `^|${SPACE_OR_PUNCTUATION.source}|[A-Z]`
: `^|${SPACE_OR_PUNCTUATION.source}`) +
')' + ')' +
'(' + `(${strings.map(s => escapeRegex(s)).join('|')})`
strings.map(s => escapeRegex(s)).join('|') +
')'
const reg = new RegExp(`${joined}`, 'giu') const reg = new RegExp(`${joined}`, 'giu')
return reg return reg
@@ -313,3 +316,11 @@ export function chunkArray<T>(arr: T[], len: number): T[][] {
return chunks return chunks
} }
/**
* Converts a 'fooBarBAZLorem' into ['foo', 'Bar', 'BAZ', 'Lorem]
* @param text
*/
export function splitCamelCase(text: string): string[] {
return text.replace(/([a-z](?=[A-Z]))/g, '$1 ').split(' ')
}