#176 - WIP tokenization of CamelCase words
Technically works, but highlighting needs a rework
This commit is contained in:
@@ -10,6 +10,7 @@ import { settings } from '../settings'
|
|||||||
import {
|
import {
|
||||||
chunkArray,
|
chunkArray,
|
||||||
removeDiacritics,
|
removeDiacritics,
|
||||||
|
splitCamelCase,
|
||||||
stringsToRegex,
|
stringsToRegex,
|
||||||
stripMarkdownCharacters,
|
stripMarkdownCharacters,
|
||||||
} from '../tools/utils'
|
} from '../tools/utils'
|
||||||
@@ -25,7 +26,7 @@ const tokenize = (text: string): string[] => {
|
|||||||
return tokens.flatMap(word =>
|
return tokens.flatMap(word =>
|
||||||
chsRegex.test(word) ? chsSegmenter.cut(word) : [word]
|
chsRegex.test(word) ? chsSegmenter.cut(word) : [word]
|
||||||
)
|
)
|
||||||
} else return tokens
|
} else return tokens.flatMap(splitCamelCase)
|
||||||
}
|
}
|
||||||
|
|
||||||
export class Omnisearch {
|
export class Omnisearch {
|
||||||
|
|||||||
@@ -307,3 +307,11 @@ export function chunkArray<T>(arr: T[], len: number): T[][] {
|
|||||||
|
|
||||||
return chunks
|
return chunks
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts a 'fooBarBAZLorem' into ['foo', 'Bar', 'BAZ', 'Lorem]
|
||||||
|
* @param text
|
||||||
|
*/
|
||||||
|
export function splitCamelCase(text: string): string[] {
|
||||||
|
return text.replace(/([a-z](?=[A-Z]))/g, '$1 ').split(' ')
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user