feat: Filter stop words and short tokens from search index

Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
This commit is contained in:
2026-02-05 15:21:06 -07:00
parent 637c20905e
commit df73ab0f1c

View File

@@ -22,6 +22,8 @@ import { sortBy } from 'lodash-es'
import type OmnisearchPlugin from '../main'
import { Tokenizer } from './tokenizer'
const STOP_WORDS = new Set(["a", "an", "the", "and", "or", "but", "if", "in", "on", "at", "by", "for", "with", "to", "from", "of", "is", "it", "that", "this"])
export class SearchEngine {
private tokenizer: Tokenizer
private minisearch: MiniSearch
@@ -573,11 +575,20 @@ export class SearchEngine {
}
return (doc as any)[fieldName]
},
processTerm: (term: string) =>
(this.plugin.settings.ignoreDiacritics
? removeDiacritics(term, this.plugin.settings.ignoreArabicDiacritics)
processTerm: (term: string) => {
const processedTerm = (
this.plugin.settings.ignoreDiacritics
? removeDiacritics(
term,
this.plugin.settings.ignoreArabicDiacritics
)
: term
).toLowerCase(),
).toLowerCase()
if (processedTerm.length < 3 || STOP_WORDS.has(processedTerm)) {
return null
}
return processedTerm
},
idField: 'path',
fields: [
'basename',