#176 - Option to index CamelCaseWords

This commit is contained in:
Simon Cambier
2023-03-02 21:32:37 +01:00
parent 0529b978cb
commit 36ccb52982
3 changed files with 34 additions and 6 deletions

View File

@@ -26,7 +26,11 @@ const tokenize = (text: string): string[] => {
return tokens.flatMap(word =>
chsRegex.test(word) ? chsSegmenter.cut(word) : [word]
)
} else return tokens.flatMap(splitCamelCase)
} else {
if (settings.splitCamelCase)
return [...tokens, ...tokens.flatMap(splitCamelCase)]
return tokens
}
}
export class Omnisearch {

View File

@@ -46,6 +46,7 @@ export interface OmnisearchSettings extends WeightingSettings {
/** If a query returns 0 result, try again with more relax conditions */
simpleSearch: boolean
highlight: boolean
splitCamelCase: boolean
}
/**
@@ -204,6 +205,25 @@ export class SettingsTab extends PluginSettingTab {
})
)
// Split CamelCaseWords
const camelCaseDesc = new DocumentFragment()
camelCaseDesc.createSpan({}, span => {
span.innerHTML = `Enable this if you want to be able to search for CamelCaseWords as separate words.<br/>
⚠️ <span style="color: var(--text-accent)">Changing this setting will clear the cache.</span><br>
<strong style="color: var(--text-accent)">Needs a restart to fully take effect.</strong>
`
})
new Setting(containerEl)
.setName('Split CamelCaseWords')
.setDesc(camelCaseDesc)
.addToggle(toggle =>
toggle.setValue(settings.splitCamelCase).onChange(async v => {
await database.clearCache()
settings.splitCamelCase = v
await saveSettings(this.plugin)
})
)
// Simpler search
new Setting(containerEl)
.setName('Simpler search')
@@ -379,6 +399,7 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = {
indexedFileTypes: [] as string[],
PDFIndexing: false,
imagesIndexing: false,
splitCamelCase: false,
ribbonIcon: true,
showExcerpt: true,

View File

@@ -91,14 +91,17 @@ export function getAllIndices(text: string, regex: RegExp): SearchMatch[] {
*/
export function stringsToRegex(strings: string[]): RegExp {
if (!strings.length) return /^$/g
// Default word split is not applied if the user uses the cm-chs-patch plugin
const joined =
'(' +
(getChsSegmenter() ? '' : `^|${SPACE_OR_PUNCTUATION.source}`) +
// Default word split is not applied if the user uses the cm-chs-patch plugin
(getChsSegmenter()
? ''
: // Split on start of line, spaces, punctuation, or capital letters (for camelCase)
settings.splitCamelCase
? `^|${SPACE_OR_PUNCTUATION.source}|[A-Z]`
: `^|${SPACE_OR_PUNCTUATION.source}`) +
')' +
'(' +
strings.map(s => escapeRegex(s)).join('|') +
')'
`(${strings.map(s => escapeRegex(s)).join('|')})`
const reg = new RegExp(`${joined}`, 'giu')
return reg