Merge branch 'master' into develop

This commit is contained in:
Simon Cambier
2024-05-18 22:13:51 +02:00
17 changed files with 3391 additions and 2740 deletions

View File

@@ -6,8 +6,13 @@
![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/scambier/obsidian-omnisearch)
![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/scambier/obsidian-omnisearch?include_prereleases&label=BRAT%20beta)
> **Omnisearch** is a search engine that "_just works_". It always instantly shows you the most relevant results, thanks
> to its smart weighting algorithm.
> 🏆 Winner of the _[2023 Gems of the Year](https://obsidian.md/blog/2023-goty-winners/)_ in the "Existing plugin" category 🏆
---
**Omnisearch** is a search engine that "_just works_".
It always instantly shows you the most relevant results, thanks to its smart weighting algorithm.
Under the hood, it uses the excellent [MiniSearch](https://github.com/lucaong/minisearch) library.
@@ -47,7 +52,7 @@ You can check the [CHANGELOG](./CHANGELOG.md) for more information on the differ
- Directly Insert a `[[link]]` from the search results
- Supports Vim navigation keys
**Note:** support of Chinese, Japanese, Korean, etc. depends
**Note:** support of Chinese depends
on [this additional plugin](https://github.com/aidenlx/cm-chs-patch). Please read its documentation for more
information.

View File

@@ -1,7 +1,7 @@
{
"id": "omnisearch",
"name": "Omnisearch",
"version": "1.21.1",
"version": "1.22.2",
"minAppVersion": "1.3.0",
"description": "A search engine that just works",
"author": "Simon Cambier",

View File

@@ -1,7 +1,7 @@
{
"id": "omnisearch",
"name": "Omnisearch",
"version": "1.21.1",
"version": "1.22.2",
"minAppVersion": "1.3.0",
"description": "A search engine that just works",
"author": "Simon Cambier",

View File

@@ -1,6 +1,6 @@
{
"name": "scambier.obsidian-search",
"version": "1.22.0-beta.3",
"version": "1.22.2",
"description": "A search engine for Obsidian",
"main": "dist/main.js",
"scripts": {
@@ -52,5 +52,6 @@
"overrides": {
"moment@>=2.18.0 <2.29.4": ">=2.29.4"
}
}
},
"packageManager": "pnpm@9.1.0+sha512.67f5879916a9293e5cf059c23853d571beaf4f753c707f40cb22bed5fb1578c6aad3b6c4107ccb3ba0b35be003eb621a16471ac836c87beb53f9d54bb4612724"
}

5635
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

View File

@@ -25,6 +25,9 @@ import type { CanvasData } from 'obsidian/canvas'
import type { AsPlainObject } from 'minisearch'
import type MiniSearch from 'minisearch'
import { settings } from './settings'
import { getObsidianApp } from './stores/obsidian-app'
const app = getObsidianApp()
/**
* This function is responsible for extracting the text from a file and

View File

@@ -2,10 +2,11 @@ import Dexie from 'dexie'
import type { AsPlainObject } from 'minisearch'
import type { DocumentRef } from './globals'
import { Notice } from 'obsidian'
import { getObsidianApp } from './stores/obsidian-app'
export class OmnisearchCache extends Dexie {
public static readonly dbVersion = 8
public static readonly dbName = 'omnisearch/cache/' + app.appId
public static readonly dbName = 'omnisearch/cache/' + getObsidianApp().appId
private static instance: OmnisearchCache

View File

@@ -3,6 +3,7 @@ import { writable } from 'svelte/store'
import { settings } from './settings'
import type { TFile } from 'obsidian'
import { Platform } from 'obsidian'
import { getObsidianApp } from './stores/obsidian-app'
export const regexLineSplit = /\r?\n|\r|((\.|\?|!)( |\r?\n|\r))/g
export const regexYaml = /^---\s*\n(.*?)\n?^---\s?/ms
@@ -101,7 +102,7 @@ export function isInputComposition(): boolean {
* @returns
*/
export function getChsSegmenter(): any | undefined {
return (app as any).plugins.plugins['cm-chs-patch']
return (getObsidianApp() as any).plugins.plugins['cm-chs-patch']
}
export type TextExtractorApi = {
@@ -114,7 +115,7 @@ export type TextExtractorApi = {
* @returns
*/
export function getTextExtractor(): TextExtractorApi | undefined {
return (app as any).plugins?.plugins?.['text-extractor']?.api
return (getObsidianApp() as any).plugins?.plugins?.['text-extractor']?.api
}
export function isCacheEnabled(): boolean {

View File

@@ -25,14 +25,15 @@ import { database, OmnisearchCache } from './database'
import * as NotesIndex from './notes-index'
import { searchEngine } from './search/omnisearch'
import { cacheManager } from './cache-manager'
import { setObsidianApp } from './stores/obsidian-app'
export default class OmnisearchPlugin extends Plugin {
private ribbonButton?: HTMLElement
// FIXME: fix the type
public apiHttpServer: null | any = null
private ribbonButton?: HTMLElement
async onload(): Promise<void> {
setObsidianApp(this.app)
await loadSettings(this)
this.addSettingTab(new SettingsTab(this))
@@ -131,13 +132,13 @@ export default class OmnisearchPlugin extends Plugin {
async executeFirstLaunchTasks(): Promise<void> {
const code = '1.21.0'
if (settings.welcomeMessage !== code && getTextExtractor()) {
const welcome = new DocumentFragment()
welcome.createSpan({}, span => {
span.innerHTML = `🔎 Omnisearch can now index .docx and .xlsx documents. Don't forget to update Text Extractor and enable the toggle in Omnisearch settings.`
})
new Notice(welcome, 20_000)
}
// if (settings.welcomeMessage !== code && getTextExtractor()) {
// const welcome = new DocumentFragment()
// welcome.createSpan({}, span => {
// span.innerHTML = `🔎 Omnisearch can now index .docx and .xlsx documents. Don't forget to update Text Extractor and enable the toggle in Omnisearch settings.`
// })
// new Notice(welcome, 20_000)
// }
settings.welcomeMessage = code
await this.saveData(settings)
}

View File

@@ -9,8 +9,12 @@ import { cacheManager } from '../cache-manager'
import { sortBy } from 'lodash-es'
import { getMatches, stringsToRegex } from 'src/tools/text-processing'
import { tokenizeForIndexing, tokenizeForSearch } from './tokenizer'
import { getObsidianApp } from '../stores/obsidian-app'
export class Omnisearch {
app = getObsidianApp()
public static readonly options: Options<IndexedDocument> = {
tokenize: tokenizeForIndexing,
extractField: (doc, fieldName) => {
@@ -188,6 +192,7 @@ export class Omnisearch {
headings1: settings.weightH1,
headings2: settings.weightH2,
headings3: settings.weightH3,
tags: settings.weightUnmarkedTags,
unmarkedTags: settings.weightUnmarkedTags,
},
// The query is already tokenized, don't tokenize again
@@ -232,36 +237,40 @@ export class Omnisearch {
return results.filter(r => r.id === options.singleFilePath)
}
logDebug(
'searching with downranked folders',
settings.downrankedFoldersFilters
)
// Hide or downrank files that are in Obsidian's excluded list
if (settings.hideExcluded) {
// Filter the files out
results = results.filter(
result =>
!(
app.metadataCache.isUserIgnored &&
app.metadataCache.isUserIgnored(result.id)
this.app.metadataCache.isUserIgnored &&
this.app.metadataCache.isUserIgnored(result.id)
)
)
} else {
// Just downrank them
results.forEach(result => {
if (
app.metadataCache.isUserIgnored &&
app.metadataCache.isUserIgnored(result.id)
this.app.metadataCache.isUserIgnored &&
this.app.metadataCache.isUserIgnored(result.id)
) {
result.score /= 10
}
})
}
logDebug(
'searching with downranked folders',
settings.downrankedFoldersFilters
)
// downrank files that are in folders listed in the downrankedFoldersFilters
if (settings.downrankedFoldersFilters.length > 0) {
results.forEach(result => {
// Extract tags from the query
const tags = query.getTags()
for (const result of results) {
const path = result.id
if (settings.downrankedFoldersFilters.length > 0) {
// downrank files that are in folders listed in the downrankedFoldersFilters
let downrankingFolder = false
settings.downrankedFoldersFilters.forEach(filter => {
if (path.startsWith(filter)) {
@@ -285,21 +294,27 @@ export class Omnisearch {
break
}
}
})
}
// Extract tags from the query
const tags = query.getTags()
// Boost custom properties
const metadata = this.app.metadataCache.getCache(path)
if (metadata) {
for (const { name, weight } of settings.weightCustomProperties) {
const values = metadata?.frontmatter?.[name]
if (values && result.terms.some(t => values.includes(t))) {
logDebug(`Boosting field "${name}" x${weight} for ${path}`)
result.score *= weight
}
}
}
// Put the results with tags on top
for (const tag of tags) {
for (const result of results) {
if ((result.tags ?? []).includes(tag)) {
result.score *= 100
}
}
}
logDebug('Sorting and limiting results')
// Sort results and keep the 50 best

View File

@@ -5,15 +5,28 @@ import {
chsRegex,
getChsSegmenter,
} from 'src/globals'
import { settings } from 'src/settings'
import { logDebug, splitCamelCase, splitHyphens } from 'src/tools/utils'
const markdownLinkExtractor = require('markdown-link-extractor')
function tokenizeWords(text: string): string[] {
return text.split(BRACKETS_AND_SPACE)
function tokenizeWords(text: string, { skipChs = false } = {}): string[] {
const tokens = text.split(BRACKETS_AND_SPACE)
if (skipChs) return tokens
return tokenizeChsWord(tokens)
}
function tokenizeTokens(text: string): string[] {
return text.split(SPACE_OR_PUNCTUATION)
function tokenizeTokens(text: string, { skipChs = false } = {}): string[] {
const tokens = text.split(SPACE_OR_PUNCTUATION)
if (skipChs) return tokens
return tokenizeChsWord(tokens)
}
function tokenizeChsWord(tokens: string[]): string[] {
const segmenter = getChsSegmenter()
if (!segmenter) return tokens
return tokens.flatMap(word =>
chsRegex.test(word) ? segmenter.cut(word, { search: true }) : [word]
)
}
/**
@@ -24,9 +37,16 @@ function tokenizeTokens(text: string): string[] {
*/
export function tokenizeForIndexing(text: string): string[] {
const words = tokenizeWords(text)
const urls: string[] = markdownLinkExtractor(text)
let urls: string[] = []
if (settings.tokenizeUrls) {
try {
urls = markdownLinkExtractor(text)
} catch (e) {
logDebug('Error extracting urls', e)
}
}
let tokens = tokenizeTokens(text)
let tokens = tokenizeTokens(text, { skipChs: true })
// Split hyphenated tokens
tokens = [...tokens, ...tokens.flatMap(splitHyphens)]
@@ -42,14 +62,6 @@ export function tokenizeForIndexing(text: string): string[] {
tokens = [...tokens, ...urls]
}
const chsSegmenter = getChsSegmenter()
if (chsSegmenter) {
const chs = tokens.flatMap(word =>
chsRegex.test(word) ? chsSegmenter.cut(word) : [word]
)
tokens = [...tokens, ...chs]
}
// Remove duplicates
tokens = [...new Set(tokens)]
@@ -63,21 +75,12 @@ export function tokenizeForIndexing(text: string): string[] {
* @returns
*/
export function tokenizeForSearch(text: string): QueryCombination {
// Extract urls and remove them from the query
const urls: string[] = markdownLinkExtractor(text)
text = urls.reduce((acc, url) => acc.replace(url, ''), text)
const tokens = [...tokenizeTokens(text), ...urls].filter(Boolean)
let chs: string[] = []
const chsSegmenter = getChsSegmenter()
if (chsSegmenter) {
chs = tokens.flatMap(word =>
chsRegex.test(word) ? chsSegmenter.cut(word) : [word]
)
}
return {
combineWith: 'OR',
queries: [
@@ -85,7 +88,6 @@ export function tokenizeForSearch(text: string): QueryCombination {
{ combineWith: 'AND', queries: tokenizeWords(text).filter(Boolean) },
{ combineWith: 'AND', queries: tokens.flatMap(splitHyphens) },
{ combineWith: 'AND', queries: tokens.flatMap(splitCamelCase) },
{ combineWith: 'AND', queries: chs },
],
}
}

View File

@@ -1,3 +1,4 @@
// noinspection CssUnresolvedCustomProperty
import {
Notice,
Platform,
@@ -14,6 +15,7 @@ import {
isCacheEnabled,
} from './globals'
import type OmnisearchPlugin from './main'
import { getObsidianApp } from './stores/obsidian-app'
interface WeightingSettings {
weightBasename: number
@@ -25,6 +27,7 @@ interface WeightingSettings {
}
export interface OmnisearchSettings extends WeightingSettings {
weightCustomProperties: { name: string; weight: number }[]
/** Enables caching to speed up indexing */
useCache: boolean
/** Respect the "excluded files" Obsidian setting by downranking results ignored files */
@@ -58,6 +61,7 @@ export interface OmnisearchSettings extends WeightingSettings {
welcomeMessage: string
/** If a query returns 0 result, try again with more relax conditions */
simpleSearch: boolean
tokenizeUrls: boolean
highlight: boolean
splitCamelCase: boolean
openInNewPane: boolean
@@ -99,7 +103,7 @@ export class SettingsTab extends PluginSettingTab {
}
// Settings main title
containerEl.createEl('h2', { text: 'Omnisearch' })
containerEl.createEl('h1', { text: 'Omnisearch' })
// Sponsor link - Thank you!
const divSponsor = containerEl.createDiv()
@@ -130,7 +134,7 @@ export class SettingsTab extends PluginSettingTab {
// PDF Indexing
const indexPDFsDesc = new DocumentFragment()
indexPDFsDesc.createSpan({}, span => {
span.innerHTML = `Omnisearch will use Text Extractor to index the content of your PDFs`
span.innerHTML = `Omnisearch will use Text Extractor to index the content of your PDFs.`
})
new Setting(containerEl)
.setName(
@@ -149,7 +153,7 @@ export class SettingsTab extends PluginSettingTab {
// Images Indexing
const indexImagesDesc = new DocumentFragment()
indexImagesDesc.createSpan({}, span => {
span.innerHTML = `Omnisearch will use Text Extractor to OCR your images and index their content`
span.innerHTML = `Omnisearch will use Text Extractor to OCR your images and index their content.`
})
new Setting(containerEl)
.setName(`Images OCR indexing ${getTextExtractor() ? '' : '⚠️ Disabled'}`)
@@ -166,7 +170,7 @@ export class SettingsTab extends PluginSettingTab {
// Office Documents Indexing
const indexOfficesDesc = new DocumentFragment()
indexOfficesDesc.createSpan({}, span => {
span.innerHTML = `Omnisearch will use Text Extractor to index the content of your office documents (currently <pre style="display:inline">.docx</pre> and <pre style="display:inline">.xlsx</pre>)`
span.innerHTML = `Omnisearch will use Text Extractor to index the content of your office documents (currently <pre style="display:inline">.docx</pre> and <pre style="display:inline">.xlsx</pre>).`
})
new Setting(containerEl)
.setName(
@@ -188,7 +192,7 @@ export class SettingsTab extends PluginSettingTab {
span.innerHTML = `
Omnisearch can index file<strong>names</strong> of "unsupported" files, such as e.g. <pre style="display:inline">.mp4</pre>
or non-extracted PDFs & images.<br/>
"Obsidian setting" will respect the value of "Files & Links > Detect all file extensions"`
"Obsidian setting" will respect the value of "Files & Links > Detect all file extensions".`
})
new Setting(containerEl)
.setName('Index paths of unsupported files')
@@ -261,7 +265,7 @@ export class SettingsTab extends PluginSettingTab {
.setName('Respect Obsidian\'s "Excluded Files"')
.setDesc(
`By default, files that are in Obsidian\'s "Options > Files & Links > Excluded Files" list are downranked in results.
Enable this option to completely hide them`
Enable this option to completely hide them.`
)
.addToggle(toggle =>
toggle.setValue(settings.hideExcluded).onChange(async v => {
@@ -321,6 +325,23 @@ export class SettingsTab extends PluginSettingTab {
})
)
// Extract URLs
// Crashes on iOS
if (!Platform.isIosApp) {
new Setting(containerEl)
.setName('Tokenize URLs')
.setDesc(
`Enable this if you want to be able to search for URLs as separate words.
This setting has a strong impact on indexing performance, and can crash Obsidian under certain conditions.`
)
.addToggle(toggle =>
toggle.setValue(settings.tokenizeUrls).onChange(async v => {
settings.tokenizeUrls = v
await saveSettings(this.plugin)
})
)
}
// Open in new pane
new Setting(containerEl)
.setName('Open in new pane')
@@ -480,10 +501,63 @@ export class SettingsTab extends PluginSettingTab {
new Setting(containerEl)
.setName(
`Tags without the # (default: ${DEFAULT_SETTINGS.weightUnmarkedTags})`
`Tags (default: ${DEFAULT_SETTINGS.weightUnmarkedTags})`
)
.addSlider(cb => this.weightSlider(cb, 'weightUnmarkedTags'))
//#region Specific tags
new Setting(containerEl)
.setName('Header properties fields')
.setDesc('You can set custom weights for values of header properties (e.g. "keywords").')
for (let i = 0; i < settings.weightCustomProperties.length; i++) {
const item = settings.weightCustomProperties[i]
new Setting(containerEl)
.setName((i + 1).toString() + '.')
// TODO: add autocompletion from app.metadataCache.getAllPropertyInfos()
.addText(text => {
text
.setPlaceholder('Property name')
.setValue(item.name)
.onChange(async v => {
item.name = v
await saveSettings(this.plugin)
})
})
.addSlider(cb => {
cb.setLimits(1, 5, 0.1)
.setValue(item.weight)
.setDynamicTooltip()
.onChange(async v => {
item.weight = v
await saveSettings(this.plugin)
})
})
// Remove the tag
.addButton(btn => {
btn.setButtonText('Remove')
btn.onClick(async () => {
settings.weightCustomProperties.splice(i, 1)
await saveSettings(this.plugin)
this.display()
})
})
}
// Add a new custom tag
new Setting(containerEl)
.addButton(btn => {
btn.setButtonText('Add a new property')
btn.onClick(cb => {
settings.weightCustomProperties.push({ name: '', weight: 1 })
this.display()
})
})
//#endregion Specific tags
//#endregion Results Weighting
//#region HTTP Server
@@ -617,9 +691,9 @@ export class SettingsTab extends PluginSettingTab {
new Setting(containerEl)
.setName('Clear cache data')
.setDesc(resetCacheDesc)
.addButton(cb => {
cb.setButtonText('Clear cache')
cb.onClick(async () => {
.addButton(btn => {
btn.setButtonText('Clear cache')
btn.onClick(async () => {
await database.clearCache()
})
})
@@ -638,6 +712,8 @@ export class SettingsTab extends PluginSettingTab {
}
}
const app = getObsidianApp()
export const DEFAULT_SETTINGS: OmnisearchSettings = {
useCache: true,
hideExcluded: false,
@@ -647,7 +723,7 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = {
PDFIndexing: false,
officeIndexing: false,
imagesIndexing: false,
unsupportedFilesIndexing: 'no',
unsupportedFilesIndexing: 'default',
splitCamelCase: false,
openInNewPane: false,
vimLikeNavigationShortcut: app.vault.getConfig('vimMode') as boolean,
@@ -659,6 +735,7 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = {
highlight: true,
showPreviousQueryResults: true,
simpleSearch: false,
tokenizeUrls: false,
fuzziness: '1',
weightBasename: 3,
@@ -667,6 +744,7 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = {
weightH2: 1.3,
weightH3: 1.1,
weightUnmarkedTags: 1.1,
weightCustomProperties: [] as { name: string; weight: number }[],
httpApiEnabled: false,
httpApiPort: '51361',

View File

@@ -0,0 +1,17 @@
import type { App } from 'obsidian'
let obsidianApp: App | null = null
export function setObsidianApp(app: App) {
obsidianApp = app
}
/**
* Helper function to get the Obsidian app instance.
*/
export function getObsidianApp() {
if (!obsidianApp) {
throw new Error('Obsidian app not set')
}
return obsidianApp as App
}

View File

@@ -1,19 +0,0 @@
import type { ResultNote } from 'src/globals'
import { writable } from 'svelte/store'
function createSearchResultsStore() {
const { subscribe, set, update } = writable<ResultNote[]>([])
return {
subscribe,
add: (item: ResultNote) =>
update(arr => {
arr.push(item)
return arr
}),
set: (items: ResultNote[]) => set(items),
reset: () => set([]),
}
}
export const searchResultsStore = createSearchResultsStore()

View File

@@ -3,6 +3,7 @@ import { Query } from '../search/query'
import { searchEngine } from '../search/omnisearch'
import { makeExcerpt } from './text-processing'
import { refreshIndex } from '../notes-index'
import { getObsidianApp } from '../stores/obsidian-app'
type ResultNoteApi = {
score: number
@@ -19,6 +20,8 @@ export type SearchMatchApi = {
offset: number
}
const app = getObsidianApp()
let notified = false
/**

View File

@@ -1,6 +1,8 @@
import { type CachedMetadata, MarkdownView, TFile } from 'obsidian'
import type { ResultNote } from '../globals'
import { stringsToRegex } from './text-processing'
import { getObsidianApp } from '../stores/obsidian-app'
const app = getObsidianApp()
export async function openNote(
item: ResultNote,

View File

@@ -129,5 +129,11 @@
"1.20.3": "1.3.0",
"1.20.4": "1.3.0",
"1.21.0": "1.3.0",
"1.21.1": "1.3.0"
"1.21.1": "1.3.0",
"1.22.0-beta.1": "1.3.0",
"1.22.0-beta.2": "1.3.0",
"1.22.0-beta.3": "1.3.0",
"1.22.0": "1.3.0",
"1.22.1": "1.3.0",
"1.22.2": "1.3.0"
}