From 0ebe903bb9a6d8e2f92ab517e867ddc805600695 Mon Sep 17 00:00:00 2001 From: YuNing Chen Date: Fri, 3 Jan 2025 19:27:19 +0800 Subject: [PATCH] perf: combine token split into 1 pass (#427) --- src/search/tokenizer.ts | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/search/tokenizer.ts b/src/search/tokenizer.ts index 6149070..84f0b73 100644 --- a/src/search/tokenizer.ts +++ b/src/search/tokenizer.ts @@ -27,15 +27,11 @@ export class Tokenizer { } let tokens = this.tokenizeTokens(text, { skipChs: true }) - - // Split hyphenated tokens - tokens = [...tokens, ...tokens.flatMap(splitHyphens)] - - // Split camelCase tokens into "camel" and "case - tokens = [...tokens, ...tokens.flatMap(splitCamelCase)] - - // Add whole words (aka "not tokens") - tokens = [...tokens, ...words] + tokens = [...tokens.flatMap(token => [ + token, + ...splitHyphens(token), + ...splitCamelCase(token), + ]), ...words] // Add urls if (urls.length) {