Improve simplified Chinese text search result

This commit is contained in:
aidenlx
2022-04-30 10:33:12 +08:00
parent 8caeaaf66e
commit 4407e62dcb

View File

@@ -12,6 +12,20 @@ let minisearchInstance: MiniSearch<IndexedNote>
let indexedNotes: Record<string, IndexedNote> = {}
const chsPattern = /[\u4e00-\u9fa5]/
const tokenize = (text: string): string[] => {
const tokens = text.split(SPACE_OR_PUNCTUATION)
const chsSegmenter = (app as any).plugins.plugins['cm-chs-patch']
if (chsSegmenter) {
return tokens.flatMap(word =>
chsPattern.test(word) ? chsSegmenter.cut(word) : [word],
)
}
else return tokens
}
/**
* Initializes the MiniSearch instance,
* and adds all the notes to the index
@@ -19,7 +33,7 @@ let indexedNotes: Record<string, IndexedNote> = {}
export async function initGlobalSearchIndex(): Promise<void> {
indexedNotes = {}
minisearchInstance = new MiniSearch({
tokenize: text => text.split(SPACE_OR_PUNCTUATION),
tokenize,
idField: 'path',
fields: ['basename', 'content', 'headings1', 'headings2', 'headings3'],
})