From 095e5f841d2b5831cfdf7ab0e5154d5339ba6985 Mon Sep 17 00:00:00 2001
From: YuNing Chen <v009008777@hotmail.com>
Date: Wed, 2 Aug 2023 15:59:30 +0800
Subject: [PATCH] Change the order of tokenizing text. (#267)

A Chinese user may also have english notes, but previous implementation can not handle hyphens and camel case

This commit should fix by changing the order of how tokens are generated
---
 src/search/omnisearch.ts | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/search/omnisearch.ts b/src/search/omnisearch.ts
index a5de141..a91a684 100644
--- a/src/search/omnisearch.ts
+++ b/src/search/omnisearch.ts
@@ -25,6 +25,12 @@ import { sortBy } from 'lodash-es'
 const tokenize = (text: string): string[] => {
   let tokens = text.split(SPACE_OR_PUNCTUATION)
 
+  // Split hyphenated tokens
+  tokens = [...tokens, ...tokens.flatMap(splitHyphens)]
+
+  // Split camelCase tokens into "camel" and "case
+  tokens = [...tokens, ...tokens.flatMap(splitCamelCase)]
+
   // When enabled, we only use the chsSegmenter,
   // and not the other custom tokenizers
   const chsSegmenter = getChsSegmenter()
@@ -32,12 +38,8 @@ const tokenize = (text: string): string[] => {
     tokens = tokens.flatMap(word =>
       chsRegex.test(word) ? chsSegmenter.cut(word) : [word]
     )
-  } else {
-    // Split camelCase tokens into "camel" and "case
-    tokens = [...tokens, ...tokens.flatMap(splitCamelCase)]
-    // Split hyphenated tokens
-    tokens = [...tokens, ...tokens.flatMap(splitHyphens)]
   }
+
   return tokens
 }