Compare commits

...

16 Commits

Author SHA1 Message Date
5e5708de4e Add installation instructions to README 2026-02-06 09:43:57 -07:00
70deab0b77 Add main.js to repo for easy distribution 2026-02-06 09:28:19 -07:00
269a1e6ea4 Organize README, add aka info 2026-02-06 09:26:39 -07:00
9e68a725d0 Make aka header matching stricter 2026-02-06 09:12:28 -07:00
44da87a29d feat: Extract 'aka' lines from first paragraph for H1 indexing
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2026-02-06 09:06:34 -07:00
61a3089c09 Customize README 2026-02-05 16:18:13 -07:00
1297a1034a Ignore aider 2026-02-05 16:00:22 -07:00
b195bf65ee fix: Resolve TypeScript build errors with type imports and assertion
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2026-02-05 15:56:32 -07:00
2ef3a1392f feat: Treat contextual colon-suffixed lines as headings for indexing
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2026-02-05 15:36:39 -07:00
c75d5d89f7 fix: Filter stop words and short tokens from search queries
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2026-02-05 15:24:19 -07:00
df73ab0f1c feat: Filter stop words and short tokens from search index
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2026-02-05 15:21:06 -07:00
637c20905e fix: Improve search tokenizer by adding exact phrase and filtering queries
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2026-02-05 13:59:00 -07:00
c4c4e782fb fix: Correct single-word query ranking to prioritize headings
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2026-02-05 13:21:14 -07:00
2b00a7af2d fix: Prioritize exact phrase matches and fix case-sensitive search
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2026-02-05 13:09:36 -07:00
3c84980903 fix: Prevent premature HTML escaping of search terms
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2026-02-05 13:03:34 -07:00
f17f9756a3 fix: Prevent search tokenizer from splitting on apostrophes
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2026-02-05 12:57:44 -07:00
9 changed files with 367 additions and 41 deletions

1
.gitignore vendored
View File

@@ -22,3 +22,4 @@ dist
coverage coverage
package-lock.json package-lock.json
Doc Omnisearch/.obsidian Doc Omnisearch/.obsidian
.aider*

View File

@@ -1,23 +1,83 @@
# Omnisearch for Obsidian # Tannersearch for Obsidian
[![Sponsor me](https://img.shields.io/badge/%E2%9D%A4%20Like%20this%20plugin%3F-Sponsor%20me!-ff69b4)](https://github.com/sponsors/scambier) This is a fork of [obsidian-omnisearch](https://github.com/scambier/obsidian-omnisearch) (by [@scambier](https://github.com/scambier)) with the following changes:
![Obsidian plugin](https://img.shields.io/endpoint?url=https%3A%2F%2Fscambier.xyz%2Fobsidian-endpoints%2Fomnisearch.json)
![GitHub release (latest by date and asset)](https://img.shields.io/github/downloads/scambier/obsidian-omnisearch/latest/main.js)
![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/scambier/obsidian-omnisearch)
![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/scambier/obsidian-omnisearch?include_prereleases&label=BRAT%20beta)
> 🏆 Winner of the _[2023 Gems of the Year](https://obsidian.md/blog/2023-goty-winners/)_ in the "Existing plugin" category 🏆 When opening a result, the cursor placement prioritizes note titles over headings over content
- this means if the note's name matches your search, it opens at the top instead of on a random match in the middle of the note
Search terms aren't split on apostrophes
- searching for "Sun's BBQ" searches for ["Sun's", "BBQ"] instead of ["Sun", "s", "BBQ"]
Search terms less than 3 characters long or common words are ignored
- ignored words: "a", "an", "the", "and", "or", "but", "if", "in", "on", "at", "by", "for", "with", "to", "from", "of", "is", "it", "that", "this"
The first line of a paragraph is ranked like Heading 3 if it ends in a colon
- for example,
```
Japan trip:
- passport
- cash
- umbrella
```
... "Japan trip:" is indexed and ranked the same as "### Japan trip"
If the first paragraph of a note contains a line like "aka other name", then "other name" is ranked like H1
- for example,
```
see also: [[Travel General]]
Aka: packing list
content
```
... "packing list" is indexed and ranked the same as "# packing list". Note that "Aka:" isn't case or colon sensitive.
--- ### Fork Installation
Ensure the original Omnisearch plugin is installed, see instructions below.
Download main.js into your `.obsidian/plugins/omnisearch` directory, example:
```
$ cd ~/notes/.obsidian/plugins/omnisearch
$ mv main.js main.js.bak
$ wget https://raw.githubusercontent.com/tannercollin/obsidian-tannersearch/refs/heads/master/dist/main.js
```
In Obsidian, open Settings > Community Plugings. Disable and enable Omnisearch.
Open Settings > Omnisearch. Scroll to bottom. Click "Clear cache" data.
Restart Obsidian.
Note: on mobile you'll have to use some sort of sync or downloader and move the main.js over to your vault.
### Building the Fork
If you'd rather build the fork yourself:
```
$ git clone https://github.com/tannercollin/obsidian-tannersearch.git
$ cd obsidian-tannersearch/
$ npm install --legacy-peer-deps
$ npm run build
```
Then copy `dist/main.js` as above.
# Original README
**Omnisearch** is a search engine that "_just works_". **Omnisearch** is a search engine that "_just works_".
It always instantly shows you the most relevant results, thanks to its smart weighting algorithm. It always instantly shows you the most relevant results, thanks to its smart weighting algorithm.
Under the hood, it uses the excellent [MiniSearch](https://github.com/lucaong/minisearch) library. This free plugin is totally unrelated to the omnisearch.ai paid product. Under the hood, it uses the excellent [MiniSearch](https://github.com/lucaong/minisearch) library. This free plugin is totally unrelated to the omnisearch.ai paid product.
![](https://raw.githubusercontent.com/scambier/obsidian-omnisearch/master/images/omnisearch.gif)
## Documentation ## Documentation
https://publish.obsidian.md/omnisearch/Index https://publish.obsidian.md/omnisearch/Index
@@ -72,5 +132,3 @@ Omnisearch is licensed under [GPL-3](https://tldrlegal.com/license/gnu-general-p
To all people who donate through [Ko-Fi](https://ko-fi.com/scambier) To all people who donate through [Ko-Fi](https://ko-fi.com/scambier)
or [Github Sponsors](https://github.com/sponsors/scambier) ❤ or [Github Sponsors](https://github.com/sponsors/scambier) ❤
![JetBrains logo](https://resources.jetbrains.com/storage/products/company/brand/logos/jetbrains.svg)

187
dist/main.js vendored Normal file

File diff suppressed because one or more lines are too long

Binary file not shown.

Before

Width:  |  Height:  |  Size: 475 KiB

View File

@@ -112,7 +112,7 @@ export type AIImageAnalyzerAPI = {
} }
export const SEPARATORS = export const SEPARATORS =
/[|\t\n\r\^"= -#%-*,.`\/<>:;?@[-\]_{}\u00A0\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C77\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166E\u1680\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2000-\u200A\u2010-\u2029\u202F-\u2043\u2045-\u2051\u2053-\u205F\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4F\u3000-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]/ /[|\t\n\r\^"= -#-%&(*,.`\/<>:;?@[-\]_{}\u00A0\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C77\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166E\u1680\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2000-\u200A\u2010-\u2017\u201A-\u2029\u202F-\u2043\u2045-\u2051\u2053-\u205F\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4F\u3000-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]/
.toString() .toString()
.slice(1, -1) .slice(1, -1)
export const SPACE_OR_PUNCTUATION = new RegExp(`${SEPARATORS}+`, 'u') export const SPACE_OR_PUNCTUATION = new RegExp(`${SEPARATORS}+`, 'u')

View File

@@ -229,6 +229,34 @@ export class DocumentsRepository {
metadata?.frontmatter?.[this.plugin.settings.displayTitle] ?? '' metadata?.frontmatter?.[this.plugin.settings.displayTitle] ?? ''
} }
const tags = getTagsFromMetadata(metadata) const tags = getTagsFromMetadata(metadata)
const headings1 = metadata ? extractHeadingsFromCache(metadata, 1) : []
const headings2 = metadata ? extractHeadingsFromCache(metadata, 2) : []
const headings3 = metadata ? extractHeadingsFromCache(metadata, 3) : []
const akaHeadings: string[] = content
.split(/\n\s*\n/)[0]
.split('\n')
.map(line => line.match(/^aka:?\s*(.+)$/i)?.[1]?.trim())
.filter((heading): heading is string => !!heading)
const lines = content.split('\n')
const colonHeadings: string[] = []
for (let i = 0; i < lines.length; i++) {
const line = lines[i].trim()
if (line.endsWith(':')) {
const prevLine = i > 0 ? lines[i - 1].trim() : null
const nextLine = i < lines.length - 1 ? lines[i + 1].trim() : null
if (
prevLine === '' &&
nextLine !== null &&
nextLine !== ''
) {
colonHeadings.push(line.slice(0, -1).trim())
}
}
}
return { return {
basename: file.basename, basename: file.basename,
displayTitle, displayTitle,
@@ -241,15 +269,9 @@ export class DocumentsRepository {
tags: tags, tags: tags,
unmarkedTags: tags.map(t => t.replace('#', '')), unmarkedTags: tags.map(t => t.replace('#', '')),
aliases: getAliasesFromMetadata(metadata).join(''), aliases: getAliasesFromMetadata(metadata).join(''),
headings1: metadata headings1: [...headings1, ...akaHeadings].join(' '),
? extractHeadingsFromCache(metadata, 1).join(' ') headings2: headings2.join(' '),
: '', headings3: [...headings3, ...colonHeadings].join(' '),
headings2: metadata
? extractHeadingsFromCache(metadata, 2).join(' ')
: '',
headings3: metadata
? extractHeadingsFromCache(metadata, 3).join(' ')
: '',
} }
} }
} }

View File

@@ -8,6 +8,7 @@ import {
type DocumentRef, type DocumentRef,
type IndexedDocument, type IndexedDocument,
type ResultNote, type ResultNote,
type SearchMatch,
} from '../globals' } from '../globals'
import { import {
@@ -22,6 +23,8 @@ import { sortBy } from 'lodash-es'
import type OmnisearchPlugin from '../main' import type OmnisearchPlugin from '../main'
import { Tokenizer } from './tokenizer' import { Tokenizer } from './tokenizer'
const STOP_WORDS = new Set(["a", "an", "the", "and", "or", "but", "if", "in", "on", "at", "by", "for", "with", "to", "from", "of", "is", "it", "that", "this"])
export class SearchEngine { export class SearchEngine {
private tokenizer: Tokenizer private tokenizer: Tokenizer
private minisearch: MiniSearch private minisearch: MiniSearch
@@ -481,6 +484,16 @@ export class SearchEngine {
query query
) )
let bestMatch: SearchMatch | undefined
if (
matches.length > 0 &&
(query.query.text.length > 1 || query.getExactTerms().length > 0) &&
query.getBestStringForExcerpt() &&
matches[0].match.toLowerCase() === query.getBestStringForExcerpt()
) {
bestMatch = matches.shift()
}
const lowerCaseBasename = note.basename.toLowerCase() const lowerCaseBasename = note.basename.toLowerCase()
const titleMatchWord = foundWords.find(word => const titleMatchWord = foundWords.find(word =>
lowerCaseBasename.includes(word.toLowerCase()) lowerCaseBasename.includes(word.toLowerCase())
@@ -514,6 +527,10 @@ export class SearchEngine {
} }
} }
if (bestMatch) {
matches.unshift(bestMatch)
}
logVerbose(`Matches for note "${note.path}"`, matches) logVerbose(`Matches for note "${note.path}"`, matches)
const resultNote: ResultNote = { const resultNote: ResultNote = {
score: result.score, score: result.score,
@@ -559,11 +576,20 @@ export class SearchEngine {
} }
return (doc as any)[fieldName] return (doc as any)[fieldName]
}, },
processTerm: (term: string) => processTerm: (term: string) => {
(this.plugin.settings.ignoreDiacritics const processedTerm = (
? removeDiacritics(term, this.plugin.settings.ignoreArabicDiacritics) this.plugin.settings.ignoreDiacritics
: term ? removeDiacritics(
).toLowerCase(), term,
this.plugin.settings.ignoreArabicDiacritics
)
: term
).toLowerCase()
if (processedTerm.length < 3 || STOP_WORDS.has(processedTerm)) {
return null
}
return processedTerm
},
idField: 'path', idField: 'path',
fields: [ fields: [
'basename', 'basename',

View File

@@ -1,10 +1,12 @@
import type { QueryCombination } from 'minisearch' import type { Query, QueryCombination } from 'minisearch'
import { BRACKETS_AND_SPACE, chsRegex, SPACE_OR_PUNCTUATION } from '../globals' import { BRACKETS_AND_SPACE, chsRegex, SPACE_OR_PUNCTUATION } from '../globals'
import { logVerbose, splitCamelCase, splitHyphens } from '../tools/utils' import { logVerbose, splitCamelCase, splitHyphens } from '../tools/utils'
import type OmnisearchPlugin from '../main' import type OmnisearchPlugin from '../main'
const markdownLinkExtractor = require('markdown-link-extractor') const markdownLinkExtractor = require('markdown-link-extractor')
const STOP_WORDS = new Set(["a", "an", "the", "and", "or", "but", "if", "in", "on", "at", "by", "for", "with", "to", "from", "of", "is", "it", "that", "this"])
export class Tokenizer { export class Tokenizer {
constructor(private plugin: OmnisearchPlugin) {} constructor(private plugin: OmnisearchPlugin) {}
@@ -60,21 +62,47 @@ export class Tokenizer {
public tokenizeForSearch(text: string): QueryCombination { public tokenizeForSearch(text: string): QueryCombination {
// Extract urls and remove them from the query // Extract urls and remove them from the query
const urls: string[] = markdownLinkExtractor(text) const urls: string[] = markdownLinkExtractor(text)
const originalText = text
text = urls.reduce((acc, url) => acc.replace(url, ''), text) text = urls.reduce((acc, url) => acc.replace(url, ''), text)
const tokens = [...this.tokenizeTokens(text), ...urls].filter(Boolean) const tokens = [...this.tokenizeTokens(text), ...urls].filter(Boolean)
const isStopWord = (term: string): boolean => {
const lower = term.toLowerCase()
return lower.length < 3 || STOP_WORDS.has(lower)
}
const queries = [
{ combineWith: 'AND', queries: [originalText] },
{ combineWith: 'AND', queries: tokens },
{
combineWith: 'AND',
queries: this.tokenizeWords(text).filter(Boolean),
},
{ combineWith: 'AND', queries: tokens.flatMap(splitHyphens) },
{ combineWith: 'AND', queries: tokens.flatMap(splitCamelCase) },
].map(q => ({
...q,
queries: q.queries.filter(t => !isStopWord(t)),
}))
const nonEmptyQueries = queries.filter(q => q.queries.length > 0)
// Deduplicate
const uniqueQueries = []
const seen = new Set()
for (const q of nonEmptyQueries) {
// sort to make order irrelevant for duplication check
const key = JSON.stringify(q.queries.sort())
if (!seen.has(key)) {
uniqueQueries.push(q)
seen.add(key)
}
}
return { return {
combineWith: 'OR', combineWith: 'OR',
queries: [ queries: uniqueQueries as Query[],
{ combineWith: 'AND', queries: tokens },
{
combineWith: 'AND',
queries: this.tokenizeWords(text).filter(Boolean),
},
{ combineWith: 'AND', queries: tokens.flatMap(splitHyphens) },
{ combineWith: 'AND', queries: tokens.flatMap(splitCamelCase) },
],
} }
} }

View File

@@ -64,7 +64,6 @@ export class TextProcessor {
words: string[], words: string[],
query?: Query query?: Query
): SearchMatch[] { ): SearchMatch[] {
words = words.map(escapeHTML)
const reg = this.stringsToRegex(words) const reg = this.stringsToRegex(words)
const originalText = text const originalText = text
// text = text.toLowerCase().replace(new RegExp(SEPARATORS, 'gu'), ' ') // text = text.toLowerCase().replace(new RegExp(SEPARATORS, 'gu'), ' ')
@@ -96,11 +95,16 @@ export class TextProcessor {
query && query &&
(query.query.text.length > 1 || query.getExactTerms().length > 0) (query.query.text.length > 1 || query.getExactTerms().length > 0)
) { ) {
const best = text.indexOf(query.getBestStringForExcerpt()) const bestMatchStr = query.getBestStringForExcerpt()
if (best > -1 && matches.find(m => m.offset === best)) { const best = text.toLowerCase().indexOf(bestMatchStr)
if (best > -1) {
// We found the full query. We make it the first result, and remove any other match that it contains.
matches = matches.filter(
m => m.offset < best || m.offset >= best + bestMatchStr.length
)
matches.unshift({ matches.unshift({
offset: best, offset: best,
match: query.getBestStringForExcerpt(), match: originalText.substring(best, best + bestMatchStr.length),
}) })
} }
} }