Ignore aider

fix: Resolve TypeScript build errors with type imports and assertion
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2026-02-05 16:00:22 -07:00 · 2026-02-05 15:56:32 -07:00 · 2026-02-05 15:36:39 -07:00 · 2026-02-05 15:24:19 -07:00 · 2026-02-05 15:21:06 -07:00 · 2026-02-05 13:59:00 -07:00
6 changed files with 104 additions and 29 deletions
@@ -22,3 +22,4 @@ dist
 coverage
 package-lock.json
 Doc Omnisearch/.obsidian
+.aider*
@@ -112,7 +112,7 @@ export type AIImageAnalyzerAPI = {
 }

 export const SEPARATORS =
-  /[|\t\n\r\^"= -#%-*,.`\/<>:;?@[-\]_{}\u00A0\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C77\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166E\u1680\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2000-\u200A\u2010-\u2029\u202F-\u2043\u2045-\u2051\u2053-\u205F\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4F\u3000-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]/
+  /[|\t\n\r\^"= -#-%&(*,.`\/<>:;?@[-\]_{}\u00A0\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C77\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166E\u1680\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2000-\u200A\u2010-\u2017\u201A-\u2029\u202F-\u2043\u2045-\u2051\u2053-\u205F\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4F\u3000-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]/
    .toString()
    .slice(1, -1)
 export const SPACE_OR_PUNCTUATION = new RegExp(`${SEPARATORS}+`, 'u')
@@ -229,6 +229,28 @@ export class DocumentsRepository {
        metadata?.frontmatter?.[this.plugin.settings.displayTitle] ?? ''
    }
    const tags = getTagsFromMetadata(metadata)
+    const headings1 = metadata ? extractHeadingsFromCache(metadata, 1) : []
+    const headings2 = metadata ? extractHeadingsFromCache(metadata, 2) : []
+    const headings3 = metadata ? extractHeadingsFromCache(metadata, 3) : []
+
+    const lines = content.split('\n')
+    const colonHeadings: string[] = []
+    for (let i = 0; i < lines.length; i++) {
+      const line = lines[i].trim()
+      if (line.endsWith(':')) {
+        const prevLine = i > 0 ? lines[i - 1].trim() : null
+        const nextLine = i < lines.length - 1 ? lines[i + 1].trim() : null
+
+        if (
+          prevLine === '' &&
+          nextLine !== null &&
+          nextLine !== ''
+        ) {
+          colonHeadings.push(line.slice(0, -1).trim())
+        }
+      }
+    }
+
    return {
      basename: file.basename,
      displayTitle,
@@ -241,15 +263,9 @@ export class DocumentsRepository {
      tags: tags,
      unmarkedTags: tags.map(t => t.replace('#', '')),
      aliases: getAliasesFromMetadata(metadata).join(''),
-      headings1: metadata
-        ? extractHeadingsFromCache(metadata, 1).join(' ')
-        : '',
-      headings2: metadata
-        ? extractHeadingsFromCache(metadata, 2).join(' ')
-        : '',
-      headings3: metadata
-        ? extractHeadingsFromCache(metadata, 3).join(' ')
-        : '',
+      headings1: headings1.join(' '),
+      headings2: headings2.join(' '),
+      headings3: [...headings3, ...colonHeadings].join(' '),
    }
  }
 }
@@ -8,6 +8,7 @@ import {
  type DocumentRef,
  type IndexedDocument,
  type ResultNote,
+  type SearchMatch,
 } from '../globals'

 import {
@@ -22,6 +23,8 @@ import { sortBy } from 'lodash-es'
 import type OmnisearchPlugin from '../main'
 import { Tokenizer } from './tokenizer'

+const STOP_WORDS = new Set(["a", "an", "the", "and", "or", "but", "if", "in", "on", "at", "by", "for", "with", "to", "from", "of", "is", "it", "that", "this"])
+
 export class SearchEngine {
  private tokenizer: Tokenizer
  private minisearch: MiniSearch
@@ -481,6 +484,16 @@ export class SearchEngine {
        query
      )

+      let bestMatch: SearchMatch | undefined
+      if (
+        matches.length > 0 &&
+        (query.query.text.length > 1 || query.getExactTerms().length > 0) &&
+        query.getBestStringForExcerpt() &&
+        matches[0].match.toLowerCase() === query.getBestStringForExcerpt()
+      ) {
+        bestMatch = matches.shift()
+      }
+
      const lowerCaseBasename = note.basename.toLowerCase()
      const titleMatchWord = foundWords.find(word =>
        lowerCaseBasename.includes(word.toLowerCase())
@@ -514,6 +527,10 @@ export class SearchEngine {
        }
      }

+      if (bestMatch) {
+        matches.unshift(bestMatch)
+      }
+
      logVerbose(`Matches for note "${note.path}"`, matches)
      const resultNote: ResultNote = {
        score: result.score,
@@ -559,11 +576,20 @@ export class SearchEngine {
        }
        return (doc as any)[fieldName]
      },
-      processTerm: (term: string) =>
-        (this.plugin.settings.ignoreDiacritics
-          ? removeDiacritics(term, this.plugin.settings.ignoreArabicDiacritics)
-          : term
-        ).toLowerCase(),
+      processTerm: (term: string) => {
+        const processedTerm = (
+          this.plugin.settings.ignoreDiacritics
+            ? removeDiacritics(
+                term,
+                this.plugin.settings.ignoreArabicDiacritics
+              )
+            : term
+        ).toLowerCase()
+        if (processedTerm.length < 3 || STOP_WORDS.has(processedTerm)) {
+          return null
+        }
+        return processedTerm
+      },
      idField: 'path',
      fields: [
        'basename',
@@ -1,10 +1,12 @@
-import type { QueryCombination } from 'minisearch'
+import type { Query, QueryCombination } from 'minisearch'
 import { BRACKETS_AND_SPACE, chsRegex, SPACE_OR_PUNCTUATION } from '../globals'
 import { logVerbose, splitCamelCase, splitHyphens } from '../tools/utils'
 import type OmnisearchPlugin from '../main'

 const markdownLinkExtractor = require('markdown-link-extractor')

+const STOP_WORDS = new Set(["a", "an", "the", "and", "or", "but", "if", "in", "on", "at", "by", "for", "with", "to", "from", "of", "is", "it", "that", "this"])
+
 export class Tokenizer {
  constructor(private plugin: OmnisearchPlugin) {}

@@ -60,21 +62,47 @@ export class Tokenizer {
  public tokenizeForSearch(text: string): QueryCombination {
    // Extract urls and remove them from the query
    const urls: string[] = markdownLinkExtractor(text)
+    const originalText = text
    text = urls.reduce((acc, url) => acc.replace(url, ''), text)

    const tokens = [...this.tokenizeTokens(text), ...urls].filter(Boolean)

+    const isStopWord = (term: string): boolean => {
+      const lower = term.toLowerCase()
+      return lower.length < 3 || STOP_WORDS.has(lower)
+    }
+
+    const queries = [
+      { combineWith: 'AND', queries: [originalText] },
+      { combineWith: 'AND', queries: tokens },
+      {
+        combineWith: 'AND',
+        queries: this.tokenizeWords(text).filter(Boolean),
+      },
+      { combineWith: 'AND', queries: tokens.flatMap(splitHyphens) },
+      { combineWith: 'AND', queries: tokens.flatMap(splitCamelCase) },
+    ].map(q => ({
+      ...q,
+      queries: q.queries.filter(t => !isStopWord(t)),
+    }))
+
+    const nonEmptyQueries = queries.filter(q => q.queries.length > 0)
+
+    // Deduplicate
+    const uniqueQueries = []
+    const seen = new Set()
+    for (const q of nonEmptyQueries) {
+      // sort to make order irrelevant for duplication check
+      const key = JSON.stringify(q.queries.sort())
+      if (!seen.has(key)) {
+        uniqueQueries.push(q)
+        seen.add(key)
+      }
+    }
+
    return {
      combineWith: 'OR',
-      queries: [
-        { combineWith: 'AND', queries: tokens },
-        {
-          combineWith: 'AND',
-          queries: this.tokenizeWords(text).filter(Boolean),
-        },
-        { combineWith: 'AND', queries: tokens.flatMap(splitHyphens) },
-        { combineWith: 'AND', queries: tokens.flatMap(splitCamelCase) },
-      ],
+      queries: uniqueQueries as Query[],
    }
  }

@@ -64,7 +64,6 @@ export class TextProcessor {
    words: string[],
    query?: Query
  ): SearchMatch[] {
-    words = words.map(escapeHTML)
    const reg = this.stringsToRegex(words)
    const originalText = text
    // text = text.toLowerCase().replace(new RegExp(SEPARATORS, 'gu'), ' ')
@@ -96,11 +95,16 @@ export class TextProcessor {
      query &&
      (query.query.text.length > 1 || query.getExactTerms().length > 0)
    ) {
-      const best = text.indexOf(query.getBestStringForExcerpt())
-      if (best > -1 && matches.find(m => m.offset === best)) {
+      const bestMatchStr = query.getBestStringForExcerpt()
+      const best = text.toLowerCase().indexOf(bestMatchStr)
+      if (best > -1) {
+        // We found the full query. We make it the first result, and remove any other match that it contains.
+        matches = matches.filter(
+          m => m.offset < best || m.offset >= best + bestMatchStr.length
+        )
        matches.unshift({
          offset: best,
-          match: query.getBestStringForExcerpt(),
+          match: originalText.substring(best, best + bestMatchStr.length),
        })
      }
    }
Author	SHA1	Message	Date
tanner	1297a1034a	Ignore aider	2026-02-05 16:00:22 -07:00
tanner	b195bf65ee	fix: Resolve TypeScript build errors with type imports and assertion Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>	2026-02-05 15:56:32 -07:00
tanner	2ef3a1392f	feat: Treat contextual colon-suffixed lines as headings for indexing Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>	2026-02-05 15:36:39 -07:00
tanner	c75d5d89f7	fix: Filter stop words and short tokens from search queries Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>	2026-02-05 15:24:19 -07:00
tanner	df73ab0f1c	feat: Filter stop words and short tokens from search index Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>	2026-02-05 15:21:06 -07:00
tanner	637c20905e	fix: Improve search tokenizer by adding exact phrase and filtering queries Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>	2026-02-05 13:59:00 -07:00
tanner	c4c4e782fb	fix: Correct single-word query ranking to prioritize headings Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>	2026-02-05 13:21:14 -07:00
tanner	2b00a7af2d	fix: Prioritize exact phrase matches and fix case-sensitive search Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>	2026-02-05 13:09:36 -07:00
tanner	3c84980903	fix: Prevent premature HTML escaping of search terms Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>	2026-02-05 13:03:34 -07:00
tanner	f17f9756a3	fix: Prevent search tokenizer from splitting on apostrophes Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>	2026-02-05 12:57:44 -07:00