From c4fa419aa02d704faaac6a46ac785c5bbc868f99 Mon Sep 17 00:00:00 2001 From: YuNing Chen Date: Fri, 3 Jan 2025 19:25:49 +0800 Subject: [PATCH] perf: move regexp outside func to avoid repeated construction (#430) --- src/tools/utils.ts | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/tools/utils.ts b/src/tools/utils.ts index 1fb49d2..2b4386b 100644 --- a/src/tools/utils.ts +++ b/src/tools/utils.ts @@ -106,6 +106,11 @@ export function getTagsFromMetadata(metadata: CachedMetadata | null): string[] { return tags } +// Define cached diacritics regex once outside the function +const japaneseDiacritics = ['\\u30FC', '\\u309A', '\\u3099'] +const regexpExclude = japaneseDiacritics.join('|') +const diacriticsRegex = new RegExp(`(?!${regexpExclude})\\p{Diacritic}`, 'gu') + /** * https://stackoverflow.com/a/37511463 */ @@ -114,11 +119,6 @@ export function removeDiacritics(str: string, arabic = false): string { return '' } - // Japanese diacritics that should be distinguished - const japaneseDiacritics: string[] = ['\\u30FC', '\\u309A', '\\u3099'] - const regexpExclude: string = japaneseDiacritics.join('|') - const regexp: RegExp = new RegExp(`(?!${regexpExclude})\\p{Diacritic}`, 'gu') - if (arabic) { // Arabic diacritics // https://stackoverflow.com/a/40959537 @@ -139,7 +139,7 @@ export function removeDiacritics(str: string, arabic = false): string { // Keep caret same as above str = str.replaceAll('^', '[__omnisearch__caret__]') // To keep right form of Korean character, NFC normalization is necessary - str = str.normalize('NFD').replace(regexp, '').normalize('NFC') + str = str.normalize('NFD').replace(diacriticsRegex, '').normalize('NFC') str = str.replaceAll('[__omnisearch__backtick__]', '`') str = str.replaceAll('[__omnisearch__caret__]', '^') return str