Compare commits
16 Commits
50db35b667
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| 5e5708de4e | |||
| 70deab0b77 | |||
| 269a1e6ea4 | |||
| 9e68a725d0 | |||
| 44da87a29d | |||
| 61a3089c09 | |||
| 1297a1034a | |||
| b195bf65ee | |||
| 2ef3a1392f | |||
| c75d5d89f7 | |||
| df73ab0f1c | |||
| 637c20905e | |||
| c4c4e782fb | |||
| 2b00a7af2d | |||
| 3c84980903 | |||
| f17f9756a3 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -22,3 +22,4 @@ dist
|
||||
coverage
|
||||
package-lock.json
|
||||
Doc Omnisearch/.obsidian
|
||||
.aider*
|
||||
|
||||
82
README.md
82
README.md
@@ -1,23 +1,83 @@
|
||||
# Omnisearch for Obsidian
|
||||
# Tannersearch for Obsidian
|
||||
|
||||
[](https://github.com/sponsors/scambier)
|
||||

|
||||

|
||||

|
||||

|
||||
This is a fork of [obsidian-omnisearch](https://github.com/scambier/obsidian-omnisearch) (by [@scambier](https://github.com/scambier)) with the following changes:
|
||||
|
||||
> 🏆 Winner of the _[2023 Gems of the Year](https://obsidian.md/blog/2023-goty-winners/)_ in the "Existing plugin" category 🏆
|
||||
When opening a result, the cursor placement prioritizes note titles over headings over content
|
||||
- this means if the note's name matches your search, it opens at the top instead of on a random match in the middle of the note
|
||||
|
||||
Search terms aren't split on apostrophes
|
||||
- searching for "Sun's BBQ" searches for ["Sun's", "BBQ"] instead of ["Sun", "s", "BBQ"]
|
||||
|
||||
Search terms less than 3 characters long or common words are ignored
|
||||
- ignored words: "a", "an", "the", "and", "or", "but", "if", "in", "on", "at", "by", "for", "with", "to", "from", "of", "is", "it", "that", "this"
|
||||
|
||||
The first line of a paragraph is ranked like Heading 3 if it ends in a colon
|
||||
- for example,
|
||||
|
||||
```
|
||||
Japan trip:
|
||||
- passport
|
||||
- cash
|
||||
- umbrella
|
||||
```
|
||||
|
||||
... "Japan trip:" is indexed and ranked the same as "### Japan trip"
|
||||
|
||||
If the first paragraph of a note contains a line like "aka other name", then "other name" is ranked like H1
|
||||
- for example,
|
||||
|
||||
```
|
||||
see also: [[Travel General]]
|
||||
Aka: packing list
|
||||
|
||||
content
|
||||
```
|
||||
|
||||
... "packing list" is indexed and ranked the same as "# packing list". Note that "Aka:" isn't case or colon sensitive.
|
||||
|
||||
|
||||
---
|
||||
### Fork Installation
|
||||
|
||||
Ensure the original Omnisearch plugin is installed, see instructions below.
|
||||
|
||||
Download main.js into your `.obsidian/plugins/omnisearch` directory, example:
|
||||
|
||||
```
|
||||
$ cd ~/notes/.obsidian/plugins/omnisearch
|
||||
$ mv main.js main.js.bak
|
||||
$ wget https://raw.githubusercontent.com/tannercollin/obsidian-tannersearch/refs/heads/master/dist/main.js
|
||||
```
|
||||
|
||||
In Obsidian, open Settings > Community Plugings. Disable and enable Omnisearch.
|
||||
|
||||
Open Settings > Omnisearch. Scroll to bottom. Click "Clear cache" data.
|
||||
|
||||
Restart Obsidian.
|
||||
|
||||
Note: on mobile you'll have to use some sort of sync or downloader and move the main.js over to your vault.
|
||||
|
||||
### Building the Fork
|
||||
|
||||
If you'd rather build the fork yourself:
|
||||
|
||||
```
|
||||
$ git clone https://github.com/tannercollin/obsidian-tannersearch.git
|
||||
$ cd obsidian-tannersearch/
|
||||
$ npm install --legacy-peer-deps
|
||||
$ npm run build
|
||||
```
|
||||
|
||||
Then copy `dist/main.js` as above.
|
||||
|
||||
|
||||
|
||||
# Original README
|
||||
|
||||
**Omnisearch** is a search engine that "_just works_".
|
||||
It always instantly shows you the most relevant results, thanks to its smart weighting algorithm.
|
||||
|
||||
Under the hood, it uses the excellent [MiniSearch](https://github.com/lucaong/minisearch) library. This free plugin is totally unrelated to the omnisearch.ai paid product.
|
||||
|
||||

|
||||
|
||||
## Documentation
|
||||
|
||||
https://publish.obsidian.md/omnisearch/Index
|
||||
@@ -72,5 +132,3 @@ Omnisearch is licensed under [GPL-3](https://tldrlegal.com/license/gnu-general-p
|
||||
|
||||
To all people who donate through [Ko-Fi](https://ko-fi.com/scambier)
|
||||
or [Github Sponsors](https://github.com/sponsors/scambier) ❤
|
||||
|
||||

|
||||
|
||||
187
dist/main.js
vendored
Normal file
187
dist/main.js
vendored
Normal file
File diff suppressed because one or more lines are too long
Binary file not shown.
|
Before Width: | Height: | Size: 475 KiB |
@@ -112,7 +112,7 @@ export type AIImageAnalyzerAPI = {
|
||||
}
|
||||
|
||||
export const SEPARATORS =
|
||||
/[|\t\n\r\^"= -#%-*,.`\/<>:;?@[-\]_{}\u00A0\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C77\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166E\u1680\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2000-\u200A\u2010-\u2029\u202F-\u2043\u2045-\u2051\u2053-\u205F\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4F\u3000-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]/
|
||||
/[|\t\n\r\^"= -#-%&(*,.`\/<>:;?@[-\]_{}\u00A0\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C77\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166E\u1680\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2000-\u200A\u2010-\u2017\u201A-\u2029\u202F-\u2043\u2045-\u2051\u2053-\u205F\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4F\u3000-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]/
|
||||
.toString()
|
||||
.slice(1, -1)
|
||||
export const SPACE_OR_PUNCTUATION = new RegExp(`${SEPARATORS}+`, 'u')
|
||||
|
||||
@@ -229,6 +229,34 @@ export class DocumentsRepository {
|
||||
metadata?.frontmatter?.[this.plugin.settings.displayTitle] ?? ''
|
||||
}
|
||||
const tags = getTagsFromMetadata(metadata)
|
||||
const headings1 = metadata ? extractHeadingsFromCache(metadata, 1) : []
|
||||
const headings2 = metadata ? extractHeadingsFromCache(metadata, 2) : []
|
||||
const headings3 = metadata ? extractHeadingsFromCache(metadata, 3) : []
|
||||
|
||||
const akaHeadings: string[] = content
|
||||
.split(/\n\s*\n/)[0]
|
||||
.split('\n')
|
||||
.map(line => line.match(/^aka:?\s*(.+)$/i)?.[1]?.trim())
|
||||
.filter((heading): heading is string => !!heading)
|
||||
|
||||
const lines = content.split('\n')
|
||||
const colonHeadings: string[] = []
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const line = lines[i].trim()
|
||||
if (line.endsWith(':')) {
|
||||
const prevLine = i > 0 ? lines[i - 1].trim() : null
|
||||
const nextLine = i < lines.length - 1 ? lines[i + 1].trim() : null
|
||||
|
||||
if (
|
||||
prevLine === '' &&
|
||||
nextLine !== null &&
|
||||
nextLine !== ''
|
||||
) {
|
||||
colonHeadings.push(line.slice(0, -1).trim())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
basename: file.basename,
|
||||
displayTitle,
|
||||
@@ -241,15 +269,9 @@ export class DocumentsRepository {
|
||||
tags: tags,
|
||||
unmarkedTags: tags.map(t => t.replace('#', '')),
|
||||
aliases: getAliasesFromMetadata(metadata).join(''),
|
||||
headings1: metadata
|
||||
? extractHeadingsFromCache(metadata, 1).join(' ')
|
||||
: '',
|
||||
headings2: metadata
|
||||
? extractHeadingsFromCache(metadata, 2).join(' ')
|
||||
: '',
|
||||
headings3: metadata
|
||||
? extractHeadingsFromCache(metadata, 3).join(' ')
|
||||
: '',
|
||||
headings1: [...headings1, ...akaHeadings].join(' '),
|
||||
headings2: headings2.join(' '),
|
||||
headings3: [...headings3, ...colonHeadings].join(' '),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ import {
|
||||
type DocumentRef,
|
||||
type IndexedDocument,
|
||||
type ResultNote,
|
||||
type SearchMatch,
|
||||
} from '../globals'
|
||||
|
||||
import {
|
||||
@@ -22,6 +23,8 @@ import { sortBy } from 'lodash-es'
|
||||
import type OmnisearchPlugin from '../main'
|
||||
import { Tokenizer } from './tokenizer'
|
||||
|
||||
const STOP_WORDS = new Set(["a", "an", "the", "and", "or", "but", "if", "in", "on", "at", "by", "for", "with", "to", "from", "of", "is", "it", "that", "this"])
|
||||
|
||||
export class SearchEngine {
|
||||
private tokenizer: Tokenizer
|
||||
private minisearch: MiniSearch
|
||||
@@ -481,6 +484,16 @@ export class SearchEngine {
|
||||
query
|
||||
)
|
||||
|
||||
let bestMatch: SearchMatch | undefined
|
||||
if (
|
||||
matches.length > 0 &&
|
||||
(query.query.text.length > 1 || query.getExactTerms().length > 0) &&
|
||||
query.getBestStringForExcerpt() &&
|
||||
matches[0].match.toLowerCase() === query.getBestStringForExcerpt()
|
||||
) {
|
||||
bestMatch = matches.shift()
|
||||
}
|
||||
|
||||
const lowerCaseBasename = note.basename.toLowerCase()
|
||||
const titleMatchWord = foundWords.find(word =>
|
||||
lowerCaseBasename.includes(word.toLowerCase())
|
||||
@@ -514,6 +527,10 @@ export class SearchEngine {
|
||||
}
|
||||
}
|
||||
|
||||
if (bestMatch) {
|
||||
matches.unshift(bestMatch)
|
||||
}
|
||||
|
||||
logVerbose(`Matches for note "${note.path}"`, matches)
|
||||
const resultNote: ResultNote = {
|
||||
score: result.score,
|
||||
@@ -559,11 +576,20 @@ export class SearchEngine {
|
||||
}
|
||||
return (doc as any)[fieldName]
|
||||
},
|
||||
processTerm: (term: string) =>
|
||||
(this.plugin.settings.ignoreDiacritics
|
||||
? removeDiacritics(term, this.plugin.settings.ignoreArabicDiacritics)
|
||||
processTerm: (term: string) => {
|
||||
const processedTerm = (
|
||||
this.plugin.settings.ignoreDiacritics
|
||||
? removeDiacritics(
|
||||
term,
|
||||
this.plugin.settings.ignoreArabicDiacritics
|
||||
)
|
||||
: term
|
||||
).toLowerCase(),
|
||||
).toLowerCase()
|
||||
if (processedTerm.length < 3 || STOP_WORDS.has(processedTerm)) {
|
||||
return null
|
||||
}
|
||||
return processedTerm
|
||||
},
|
||||
idField: 'path',
|
||||
fields: [
|
||||
'basename',
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
import type { QueryCombination } from 'minisearch'
|
||||
import type { Query, QueryCombination } from 'minisearch'
|
||||
import { BRACKETS_AND_SPACE, chsRegex, SPACE_OR_PUNCTUATION } from '../globals'
|
||||
import { logVerbose, splitCamelCase, splitHyphens } from '../tools/utils'
|
||||
import type OmnisearchPlugin from '../main'
|
||||
|
||||
const markdownLinkExtractor = require('markdown-link-extractor')
|
||||
|
||||
const STOP_WORDS = new Set(["a", "an", "the", "and", "or", "but", "if", "in", "on", "at", "by", "for", "with", "to", "from", "of", "is", "it", "that", "this"])
|
||||
|
||||
export class Tokenizer {
|
||||
constructor(private plugin: OmnisearchPlugin) {}
|
||||
|
||||
@@ -60,13 +62,18 @@ export class Tokenizer {
|
||||
public tokenizeForSearch(text: string): QueryCombination {
|
||||
// Extract urls and remove them from the query
|
||||
const urls: string[] = markdownLinkExtractor(text)
|
||||
const originalText = text
|
||||
text = urls.reduce((acc, url) => acc.replace(url, ''), text)
|
||||
|
||||
const tokens = [...this.tokenizeTokens(text), ...urls].filter(Boolean)
|
||||
|
||||
return {
|
||||
combineWith: 'OR',
|
||||
queries: [
|
||||
const isStopWord = (term: string): boolean => {
|
||||
const lower = term.toLowerCase()
|
||||
return lower.length < 3 || STOP_WORDS.has(lower)
|
||||
}
|
||||
|
||||
const queries = [
|
||||
{ combineWith: 'AND', queries: [originalText] },
|
||||
{ combineWith: 'AND', queries: tokens },
|
||||
{
|
||||
combineWith: 'AND',
|
||||
@@ -74,7 +81,28 @@ export class Tokenizer {
|
||||
},
|
||||
{ combineWith: 'AND', queries: tokens.flatMap(splitHyphens) },
|
||||
{ combineWith: 'AND', queries: tokens.flatMap(splitCamelCase) },
|
||||
],
|
||||
].map(q => ({
|
||||
...q,
|
||||
queries: q.queries.filter(t => !isStopWord(t)),
|
||||
}))
|
||||
|
||||
const nonEmptyQueries = queries.filter(q => q.queries.length > 0)
|
||||
|
||||
// Deduplicate
|
||||
const uniqueQueries = []
|
||||
const seen = new Set()
|
||||
for (const q of nonEmptyQueries) {
|
||||
// sort to make order irrelevant for duplication check
|
||||
const key = JSON.stringify(q.queries.sort())
|
||||
if (!seen.has(key)) {
|
||||
uniqueQueries.push(q)
|
||||
seen.add(key)
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
combineWith: 'OR',
|
||||
queries: uniqueQueries as Query[],
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -64,7 +64,6 @@ export class TextProcessor {
|
||||
words: string[],
|
||||
query?: Query
|
||||
): SearchMatch[] {
|
||||
words = words.map(escapeHTML)
|
||||
const reg = this.stringsToRegex(words)
|
||||
const originalText = text
|
||||
// text = text.toLowerCase().replace(new RegExp(SEPARATORS, 'gu'), ' ')
|
||||
@@ -96,11 +95,16 @@ export class TextProcessor {
|
||||
query &&
|
||||
(query.query.text.length > 1 || query.getExactTerms().length > 0)
|
||||
) {
|
||||
const best = text.indexOf(query.getBestStringForExcerpt())
|
||||
if (best > -1 && matches.find(m => m.offset === best)) {
|
||||
const bestMatchStr = query.getBestStringForExcerpt()
|
||||
const best = text.toLowerCase().indexOf(bestMatchStr)
|
||||
if (best > -1) {
|
||||
// We found the full query. We make it the first result, and remove any other match that it contains.
|
||||
matches = matches.filter(
|
||||
m => m.offset < best || m.offset >= best + bestMatchStr.length
|
||||
)
|
||||
matches.unshift({
|
||||
offset: best,
|
||||
match: query.getBestStringForExcerpt(),
|
||||
match: originalText.substring(best, best + bestMatchStr.length),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user