Improve simplified Chinese text search result

2022-04-30 10:33:12 +08:00
parent 8caeaaf66e
commit 4407e62dcb
1 changed files with 15 additions and 1 deletions
@@ -12,6 +12,20 @@ let minisearchInstance: MiniSearch<IndexedNote>
 let indexedNotes: Record<string, IndexedNote> = {}
 const chsPattern = /[\u4e00-\u9fa5]/
 const tokenize = (text: string): string[] => {
  const tokens = text.split(SPACE_OR_PUNCTUATION)
  const chsSegmenter = (app as any).plugins.plugins['cm-chs-patch']
  if (chsSegmenter) {
    return tokens.flatMap(word =>
      chsPattern.test(word) ? chsSegmenter.cut(word) : [word],
    )
  }
  else return tokens
 }
 /**
 * Initializes the MiniSearch instance,
 * and adds all the notes to the index
@@ -19,7 +33,7 @@ let indexedNotes: Record<string, IndexedNote> = {}
 export async function initGlobalSearchIndex(): Promise<void> {
  indexedNotes = {}
  minisearchInstance = new MiniSearch({
-    tokenize: text => text.split(SPACE_OR_PUNCTUATION),
+    tokenize,
    idField: 'path',
    fields: ['basename', 'content', 'headings1', 'headings2', 'headings3'],
  })