Fix: issue#190 (#317)
* Fixed search results with diacritics - Caches are now stored with diacritics regardless of settings.ignoreDiacritics - Modified getMatches() behavior to return results with correct form - Modified ResultItemVault.svelte * Fixed highlighting words with comma and period - remove commas and periods from matches * Fixed highlighting of Cyrillic characters - changed highlight regexp determination to be based on character type * Fixed highlighting problem of Japanese and Korean - marked some Japanese diacritics to escape removal - added NFC normalization to keep right form of Korean character * Fixed highlighting of words with punctuation - deleted space/punctuation list from stringsToRegex() - it seems to be working correctly with words with punctuation and hyphenated words AFAIK * Deleted some unused imports * Modified the comment * Added comment * Fixed highlighting issue with comma and period * Fixed highlighting issue with caret and other symbols - Added `^` to separators - Changed regex to use separators - Added escape of `^` from diacritics removal
This commit is contained in:
@@ -115,14 +115,23 @@ export function getTagsFromMetadata(metadata: CachedMetadata | null): string[] {
|
||||
* https://stackoverflow.com/a/37511463
|
||||
*/
|
||||
export function removeDiacritics(str: string): string {
|
||||
// Japanese diacritics that should be distinguished
|
||||
const excludeDiacritics: string[] = ['\\u30FC', '\\u309A', '\\u3099']
|
||||
const regexpExclude: string = excludeDiacritics.join('|')
|
||||
const regexp: RegExp = new RegExp(`(?!${regexpExclude})\\p{Diacritic}`, 'gu')
|
||||
|
||||
if (str === null || str === undefined) {
|
||||
return ''
|
||||
}
|
||||
// Keep backticks for code blocks, because otherwise they are removed by the .normalize() function
|
||||
// https://stackoverflow.com/a/36100275
|
||||
str = str.replaceAll('`', '[__omnisearch__backtick__]')
|
||||
str = str.normalize('NFD').replace(/\p{Diacritic}/gu, '')
|
||||
// Keep caret same as above
|
||||
str = str.replaceAll('^', '[__omnisearch__caret__]')
|
||||
// To keep right form of Korean character, NFC normalization is necessary
|
||||
str = str.normalize('NFD').replace(regexp, '').normalize('NFC')
|
||||
str = str.replaceAll('[__omnisearch__backtick__]', '`')
|
||||
str = str.replaceAll('[__omnisearch__caret__]', '^')
|
||||
return str
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user