Improve simplified Chinese text search result

This commit is contained in:
aidenlx
2022-04-30 10:33:12 +08:00
parent 8caeaaf66e
commit 4407e62dcb

View File

@@ -12,6 +12,20 @@ let minisearchInstance: MiniSearch<IndexedNote>
let indexedNotes: Record<string, IndexedNote> = {} let indexedNotes: Record<string, IndexedNote> = {}
const chsPattern = /[\u4e00-\u9fa5]/
const tokenize = (text: string): string[] => {
const tokens = text.split(SPACE_OR_PUNCTUATION)
const chsSegmenter = (app as any).plugins.plugins['cm-chs-patch']
if (chsSegmenter) {
return tokens.flatMap(word =>
chsPattern.test(word) ? chsSegmenter.cut(word) : [word],
)
}
else return tokens
}
/** /**
* Initializes the MiniSearch instance, * Initializes the MiniSearch instance,
* and adds all the notes to the index * and adds all the notes to the index
@@ -19,7 +33,7 @@ let indexedNotes: Record<string, IndexedNote> = {}
export async function initGlobalSearchIndex(): Promise<void> { export async function initGlobalSearchIndex(): Promise<void> {
indexedNotes = {} indexedNotes = {}
minisearchInstance = new MiniSearch({ minisearchInstance = new MiniSearch({
tokenize: text => text.split(SPACE_OR_PUNCTUATION), tokenize,
idField: 'path', idField: 'path',
fields: ['basename', 'content', 'headings1', 'headings2', 'headings3'], fields: ['basename', 'content', 'headings1', 'headings2', 'headings3'],
}) })