From df73ab0f1ca730bb8d97a7b650d6796f7dc26d6c Mon Sep 17 00:00:00 2001 From: Tanner Collin Date: Thu, 5 Feb 2026 15:21:06 -0700 Subject: [PATCH] feat: Filter stop words and short tokens from search index Co-authored-by: aider (gemini/gemini-2.5-pro) --- src/search/search-engine.ts | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/search/search-engine.ts b/src/search/search-engine.ts index 513b024..51a9d54 100644 --- a/src/search/search-engine.ts +++ b/src/search/search-engine.ts @@ -22,6 +22,8 @@ import { sortBy } from 'lodash-es' import type OmnisearchPlugin from '../main' import { Tokenizer } from './tokenizer' +const STOP_WORDS = new Set(["a", "an", "the", "and", "or", "but", "if", "in", "on", "at", "by", "for", "with", "to", "from", "of", "is", "it", "that", "this"]) + export class SearchEngine { private tokenizer: Tokenizer private minisearch: MiniSearch @@ -573,11 +575,20 @@ export class SearchEngine { } return (doc as any)[fieldName] }, - processTerm: (term: string) => - (this.plugin.settings.ignoreDiacritics - ? removeDiacritics(term, this.plugin.settings.ignoreArabicDiacritics) - : term - ).toLowerCase(), + processTerm: (term: string) => { + const processedTerm = ( + this.plugin.settings.ignoreDiacritics + ? removeDiacritics( + term, + this.plugin.settings.ignoreArabicDiacritics + ) + : term + ).toLowerCase() + if (processedTerm.length < 3 || STOP_WORDS.has(processedTerm)) { + return null + } + return processedTerm + }, idField: 'path', fields: [ 'basename',