Compare commits

..

16 Commits

Author SHA1 Message Date
5e5708de4e Add installation instructions to README 2026-02-06 09:43:57 -07:00
70deab0b77 Add main.js to repo for easy distribution 2026-02-06 09:28:19 -07:00
269a1e6ea4 Organize README, add aka info 2026-02-06 09:26:39 -07:00
9e68a725d0 Make aka header matching stricter 2026-02-06 09:12:28 -07:00
44da87a29d feat: Extract 'aka' lines from first paragraph for H1 indexing
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2026-02-06 09:06:34 -07:00
61a3089c09 Customize README 2026-02-05 16:18:13 -07:00
1297a1034a Ignore aider 2026-02-05 16:00:22 -07:00
b195bf65ee fix: Resolve TypeScript build errors with type imports and assertion
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2026-02-05 15:56:32 -07:00
2ef3a1392f feat: Treat contextual colon-suffixed lines as headings for indexing
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2026-02-05 15:36:39 -07:00
c75d5d89f7 fix: Filter stop words and short tokens from search queries
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2026-02-05 15:24:19 -07:00
df73ab0f1c feat: Filter stop words and short tokens from search index
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2026-02-05 15:21:06 -07:00
637c20905e fix: Improve search tokenizer by adding exact phrase and filtering queries
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2026-02-05 13:59:00 -07:00
c4c4e782fb fix: Correct single-word query ranking to prioritize headings
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2026-02-05 13:21:14 -07:00
2b00a7af2d fix: Prioritize exact phrase matches and fix case-sensitive search
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2026-02-05 13:09:36 -07:00
3c84980903 fix: Prevent premature HTML escaping of search terms
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2026-02-05 13:03:34 -07:00
f17f9756a3 fix: Prevent search tokenizer from splitting on apostrophes
Co-authored-by: aider (gemini/gemini-2.5-pro) <aider@aider.chat>
2026-02-05 12:57:44 -07:00
9 changed files with 367 additions and 41 deletions

1
.gitignore vendored
View File

@@ -22,3 +22,4 @@ dist
coverage
package-lock.json
Doc Omnisearch/.obsidian
.aider*

View File

@@ -1,23 +1,83 @@
# Omnisearch for Obsidian
# Tannersearch for Obsidian
[![Sponsor me](https://img.shields.io/badge/%E2%9D%A4%20Like%20this%20plugin%3F-Sponsor%20me!-ff69b4)](https://github.com/sponsors/scambier)
![Obsidian plugin](https://img.shields.io/endpoint?url=https%3A%2F%2Fscambier.xyz%2Fobsidian-endpoints%2Fomnisearch.json)
![GitHub release (latest by date and asset)](https://img.shields.io/github/downloads/scambier/obsidian-omnisearch/latest/main.js)
![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/scambier/obsidian-omnisearch)
![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/scambier/obsidian-omnisearch?include_prereleases&label=BRAT%20beta)
This is a fork of [obsidian-omnisearch](https://github.com/scambier/obsidian-omnisearch) (by [@scambier](https://github.com/scambier)) with the following changes:
> 🏆 Winner of the _[2023 Gems of the Year](https://obsidian.md/blog/2023-goty-winners/)_ in the "Existing plugin" category 🏆
When opening a result, the cursor placement prioritizes note titles over headings over content
- this means if the note's name matches your search, it opens at the top instead of on a random match in the middle of the note
Search terms aren't split on apostrophes
- searching for "Sun's BBQ" searches for ["Sun's", "BBQ"] instead of ["Sun", "s", "BBQ"]
Search terms less than 3 characters long or common words are ignored
- ignored words: "a", "an", "the", "and", "or", "but", "if", "in", "on", "at", "by", "for", "with", "to", "from", "of", "is", "it", "that", "this"
The first line of a paragraph is ranked like Heading 3 if it ends in a colon
- for example,
```
Japan trip:
- passport
- cash
- umbrella
```
... "Japan trip:" is indexed and ranked the same as "### Japan trip"
If the first paragraph of a note contains a line like "aka other name", then "other name" is ranked like H1
- for example,
```
see also: [[Travel General]]
Aka: packing list
content
```
... "packing list" is indexed and ranked the same as "# packing list". Note that "Aka:" isn't case or colon sensitive.
---
### Fork Installation
Ensure the original Omnisearch plugin is installed, see instructions below.
Download main.js into your `.obsidian/plugins/omnisearch` directory, example:
```
$ cd ~/notes/.obsidian/plugins/omnisearch
$ mv main.js main.js.bak
$ wget https://raw.githubusercontent.com/tannercollin/obsidian-tannersearch/refs/heads/master/dist/main.js
```
In Obsidian, open Settings > Community Plugings. Disable and enable Omnisearch.
Open Settings > Omnisearch. Scroll to bottom. Click "Clear cache" data.
Restart Obsidian.
Note: on mobile you'll have to use some sort of sync or downloader and move the main.js over to your vault.
### Building the Fork
If you'd rather build the fork yourself:
```
$ git clone https://github.com/tannercollin/obsidian-tannersearch.git
$ cd obsidian-tannersearch/
$ npm install --legacy-peer-deps
$ npm run build
```
Then copy `dist/main.js` as above.
# Original README
**Omnisearch** is a search engine that "_just works_".
It always instantly shows you the most relevant results, thanks to its smart weighting algorithm.
Under the hood, it uses the excellent [MiniSearch](https://github.com/lucaong/minisearch) library. This free plugin is totally unrelated to the omnisearch.ai paid product.
![](https://raw.githubusercontent.com/scambier/obsidian-omnisearch/master/images/omnisearch.gif)
## Documentation
https://publish.obsidian.md/omnisearch/Index
@@ -72,5 +132,3 @@ Omnisearch is licensed under [GPL-3](https://tldrlegal.com/license/gnu-general-p
To all people who donate through [Ko-Fi](https://ko-fi.com/scambier)
or [Github Sponsors](https://github.com/sponsors/scambier) ❤
![JetBrains logo](https://resources.jetbrains.com/storage/products/company/brand/logos/jetbrains.svg)

187
dist/main.js vendored Normal file

File diff suppressed because one or more lines are too long

Binary file not shown.

Before

Width:  |  Height:  |  Size: 475 KiB

View File

@@ -112,7 +112,7 @@ export type AIImageAnalyzerAPI = {
}
export const SEPARATORS =
/[|\t\n\r\^"= -#%-*,.`\/<>:;?@[-\]_{}\u00A0\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C77\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166E\u1680\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2000-\u200A\u2010-\u2029\u202F-\u2043\u2045-\u2051\u2053-\u205F\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4F\u3000-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]/
/[|\t\n\r\^"= -#-%&(*,.`\/<>:;?@[-\]_{}\u00A0\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C77\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166E\u1680\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2000-\u200A\u2010-\u2017\u201A-\u2029\u202F-\u2043\u2045-\u2051\u2053-\u205F\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4F\u3000-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]/
.toString()
.slice(1, -1)
export const SPACE_OR_PUNCTUATION = new RegExp(`${SEPARATORS}+`, 'u')

View File

@@ -229,6 +229,34 @@ export class DocumentsRepository {
metadata?.frontmatter?.[this.plugin.settings.displayTitle] ?? ''
}
const tags = getTagsFromMetadata(metadata)
const headings1 = metadata ? extractHeadingsFromCache(metadata, 1) : []
const headings2 = metadata ? extractHeadingsFromCache(metadata, 2) : []
const headings3 = metadata ? extractHeadingsFromCache(metadata, 3) : []
const akaHeadings: string[] = content
.split(/\n\s*\n/)[0]
.split('\n')
.map(line => line.match(/^aka:?\s*(.+)$/i)?.[1]?.trim())
.filter((heading): heading is string => !!heading)
const lines = content.split('\n')
const colonHeadings: string[] = []
for (let i = 0; i < lines.length; i++) {
const line = lines[i].trim()
if (line.endsWith(':')) {
const prevLine = i > 0 ? lines[i - 1].trim() : null
const nextLine = i < lines.length - 1 ? lines[i + 1].trim() : null
if (
prevLine === '' &&
nextLine !== null &&
nextLine !== ''
) {
colonHeadings.push(line.slice(0, -1).trim())
}
}
}
return {
basename: file.basename,
displayTitle,
@@ -241,15 +269,9 @@ export class DocumentsRepository {
tags: tags,
unmarkedTags: tags.map(t => t.replace('#', '')),
aliases: getAliasesFromMetadata(metadata).join(''),
headings1: metadata
? extractHeadingsFromCache(metadata, 1).join(' ')
: '',
headings2: metadata
? extractHeadingsFromCache(metadata, 2).join(' ')
: '',
headings3: metadata
? extractHeadingsFromCache(metadata, 3).join(' ')
: '',
headings1: [...headings1, ...akaHeadings].join(' '),
headings2: headings2.join(' '),
headings3: [...headings3, ...colonHeadings].join(' '),
}
}
}

View File

@@ -8,6 +8,7 @@ import {
type DocumentRef,
type IndexedDocument,
type ResultNote,
type SearchMatch,
} from '../globals'
import {
@@ -22,6 +23,8 @@ import { sortBy } from 'lodash-es'
import type OmnisearchPlugin from '../main'
import { Tokenizer } from './tokenizer'
const STOP_WORDS = new Set(["a", "an", "the", "and", "or", "but", "if", "in", "on", "at", "by", "for", "with", "to", "from", "of", "is", "it", "that", "this"])
export class SearchEngine {
private tokenizer: Tokenizer
private minisearch: MiniSearch
@@ -481,6 +484,16 @@ export class SearchEngine {
query
)
let bestMatch: SearchMatch | undefined
if (
matches.length > 0 &&
(query.query.text.length > 1 || query.getExactTerms().length > 0) &&
query.getBestStringForExcerpt() &&
matches[0].match.toLowerCase() === query.getBestStringForExcerpt()
) {
bestMatch = matches.shift()
}
const lowerCaseBasename = note.basename.toLowerCase()
const titleMatchWord = foundWords.find(word =>
lowerCaseBasename.includes(word.toLowerCase())
@@ -514,6 +527,10 @@ export class SearchEngine {
}
}
if (bestMatch) {
matches.unshift(bestMatch)
}
logVerbose(`Matches for note "${note.path}"`, matches)
const resultNote: ResultNote = {
score: result.score,
@@ -559,11 +576,20 @@ export class SearchEngine {
}
return (doc as any)[fieldName]
},
processTerm: (term: string) =>
(this.plugin.settings.ignoreDiacritics
? removeDiacritics(term, this.plugin.settings.ignoreArabicDiacritics)
: term
).toLowerCase(),
processTerm: (term: string) => {
const processedTerm = (
this.plugin.settings.ignoreDiacritics
? removeDiacritics(
term,
this.plugin.settings.ignoreArabicDiacritics
)
: term
).toLowerCase()
if (processedTerm.length < 3 || STOP_WORDS.has(processedTerm)) {
return null
}
return processedTerm
},
idField: 'path',
fields: [
'basename',

View File

@@ -1,10 +1,12 @@
import type { QueryCombination } from 'minisearch'
import type { Query, QueryCombination } from 'minisearch'
import { BRACKETS_AND_SPACE, chsRegex, SPACE_OR_PUNCTUATION } from '../globals'
import { logVerbose, splitCamelCase, splitHyphens } from '../tools/utils'
import type OmnisearchPlugin from '../main'
const markdownLinkExtractor = require('markdown-link-extractor')
const STOP_WORDS = new Set(["a", "an", "the", "and", "or", "but", "if", "in", "on", "at", "by", "for", "with", "to", "from", "of", "is", "it", "that", "this"])
export class Tokenizer {
constructor(private plugin: OmnisearchPlugin) {}
@@ -60,21 +62,47 @@ export class Tokenizer {
public tokenizeForSearch(text: string): QueryCombination {
// Extract urls and remove them from the query
const urls: string[] = markdownLinkExtractor(text)
const originalText = text
text = urls.reduce((acc, url) => acc.replace(url, ''), text)
const tokens = [...this.tokenizeTokens(text), ...urls].filter(Boolean)
const isStopWord = (term: string): boolean => {
const lower = term.toLowerCase()
return lower.length < 3 || STOP_WORDS.has(lower)
}
const queries = [
{ combineWith: 'AND', queries: [originalText] },
{ combineWith: 'AND', queries: tokens },
{
combineWith: 'AND',
queries: this.tokenizeWords(text).filter(Boolean),
},
{ combineWith: 'AND', queries: tokens.flatMap(splitHyphens) },
{ combineWith: 'AND', queries: tokens.flatMap(splitCamelCase) },
].map(q => ({
...q,
queries: q.queries.filter(t => !isStopWord(t)),
}))
const nonEmptyQueries = queries.filter(q => q.queries.length > 0)
// Deduplicate
const uniqueQueries = []
const seen = new Set()
for (const q of nonEmptyQueries) {
// sort to make order irrelevant for duplication check
const key = JSON.stringify(q.queries.sort())
if (!seen.has(key)) {
uniqueQueries.push(q)
seen.add(key)
}
}
return {
combineWith: 'OR',
queries: [
{ combineWith: 'AND', queries: tokens },
{
combineWith: 'AND',
queries: this.tokenizeWords(text).filter(Boolean),
},
{ combineWith: 'AND', queries: tokens.flatMap(splitHyphens) },
{ combineWith: 'AND', queries: tokens.flatMap(splitCamelCase) },
],
queries: uniqueQueries as Query[],
}
}

View File

@@ -64,7 +64,6 @@ export class TextProcessor {
words: string[],
query?: Query
): SearchMatch[] {
words = words.map(escapeHTML)
const reg = this.stringsToRegex(words)
const originalText = text
// text = text.toLowerCase().replace(new RegExp(SEPARATORS, 'gu'), ' ')
@@ -96,11 +95,16 @@ export class TextProcessor {
query &&
(query.query.text.length > 1 || query.getExactTerms().length > 0)
) {
const best = text.indexOf(query.getBestStringForExcerpt())
if (best > -1 && matches.find(m => m.offset === best)) {
const bestMatchStr = query.getBestStringForExcerpt()
const best = text.toLowerCase().indexOf(bestMatchStr)
if (best > -1) {
// We found the full query. We make it the first result, and remove any other match that it contains.
matches = matches.filter(
m => m.offset < best || m.offset >= best + bestMatchStr.length
)
matches.unshift({
offset: best,
match: query.getBestStringForExcerpt(),
match: originalText.substring(best, best + bestMatchStr.length),
})
}
}