Merge branch 'feature/176-CamelCaseTokens' into develop

This commit is contained in:
Simon Cambier
2023-03-02 21:33:31 +01:00
4 changed files with 48 additions and 11 deletions

View File

@@ -14,7 +14,7 @@ export const excerptBefore = 100
export const excerptAfter = 300
export const highlightClass = `suggestion-highlight omnisearch-highlight ${
settings.hightlight ? 'omnisearch-default-highlight' : ''
settings.highlight ? 'omnisearch-default-highlight' : ''
}`
export const eventBus = new EventBus()

View File

@@ -10,6 +10,7 @@ import { settings } from '../settings'
import {
chunkArray,
removeDiacritics,
splitCamelCase,
stringsToRegex,
stripMarkdownCharacters,
} from '../tools/utils'
@@ -25,7 +26,11 @@ const tokenize = (text: string): string[] => {
return tokens.flatMap(word =>
chsRegex.test(word) ? chsSegmenter.cut(word) : [word]
)
} else return tokens
} else {
if (settings.splitCamelCase)
return [...tokens, ...tokens.flatMap(splitCamelCase)]
return tokens
}
}
export class Omnisearch {

View File

@@ -45,7 +45,8 @@ export interface OmnisearchSettings extends WeightingSettings {
welcomeMessage: string
/** If a query returns 0 result, try again with more relax conditions */
simpleSearch: boolean
hightlight: boolean
highlight: boolean
splitCamelCase: boolean
}
/**
@@ -204,6 +205,25 @@ export class SettingsTab extends PluginSettingTab {
})
)
// Split CamelCaseWords
const camelCaseDesc = new DocumentFragment()
camelCaseDesc.createSpan({}, span => {
span.innerHTML = `Enable this if you want to be able to search for CamelCaseWords as separate words.<br/>
⚠️ <span style="color: var(--text-accent)">Changing this setting will clear the cache.</span><br>
<strong style="color: var(--text-accent)">Needs a restart to fully take effect.</strong>
`
})
new Setting(containerEl)
.setName('Split CamelCaseWords')
.setDesc(camelCaseDesc)
.addToggle(toggle =>
toggle.setValue(settings.splitCamelCase).onChange(async v => {
await database.clearCache()
settings.splitCamelCase = v
await saveSettings(this.plugin)
})
)
// Simpler search
new Setting(containerEl)
.setName('Simpler search')
@@ -301,8 +321,8 @@ export class SettingsTab extends PluginSettingTab {
'Will highlight matching results when enabled. See README for more customization options.'
)
.addToggle(toggle =>
toggle.setValue(settings.hightlight).onChange(async v => {
settings.hightlight = v
toggle.setValue(settings.highlight).onChange(async v => {
settings.highlight = v
await saveSettings(this.plugin)
})
)
@@ -379,12 +399,13 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = {
indexedFileTypes: [] as string[],
PDFIndexing: false,
imagesIndexing: false,
splitCamelCase: false,
ribbonIcon: true,
showExcerpt: true,
renderLineReturnInExcerpts: true,
showCreateButton: false,
hightlight: true,
highlight: true,
showPreviousQueryResults: true,
simpleSearch: false,

View File

@@ -91,14 +91,17 @@ export function getAllIndices(text: string, regex: RegExp): SearchMatch[] {
*/
export function stringsToRegex(strings: string[]): RegExp {
if (!strings.length) return /^$/g
// Default word split is not applied if the user uses the cm-chs-patch plugin
const joined =
'(' +
(getChsSegmenter() ? '' : `^|${SPACE_OR_PUNCTUATION.source}`) +
// Default word split is not applied if the user uses the cm-chs-patch plugin
(getChsSegmenter()
? ''
: // Split on start of line, spaces, punctuation, or capital letters (for camelCase)
settings.splitCamelCase
? `^|${SPACE_OR_PUNCTUATION.source}|[A-Z]`
: `^|${SPACE_OR_PUNCTUATION.source}`) +
')' +
'(' +
strings.map(s => escapeRegex(s)).join('|') +
')'
`(${strings.map(s => escapeRegex(s)).join('|')})`
const reg = new RegExp(`${joined}`, 'giu')
return reg
@@ -313,3 +316,11 @@ export function chunkArray<T>(arr: T[], len: number): T[][] {
return chunks
}
/**
* Converts a 'fooBarBAZLorem' into ['foo', 'Bar', 'BAZ', 'Lorem]
* @param text
*/
export function splitCamelCase(text: string): string[] {
return text.replace(/([a-z](?=[A-Z]))/g, '$1 ').split(' ')
}