Merge branch 'master' into develop
This commit is contained in:
67
CHANGELOG.md
67
CHANGELOG.md
@@ -1,5 +1,72 @@
|
||||
# Omnisearch Changelog
|
||||
|
||||
This changelog is not exhaustive.
|
||||
|
||||
## 1.21.x
|
||||
|
||||
- Added support for .docx and .xlsx
|
||||
|
||||
## 1.20.x
|
||||
|
||||
- Refactored indexing tokenization process to correctly take diacritics into account
|
||||
- Added highlighting in the note's path
|
||||
- Improved the selection of the chosen excerpt in the results list
|
||||
|
||||
## 1.19.x
|
||||
|
||||
- Various bugfixes and improvements
|
||||
|
||||
## 1.18.x
|
||||
|
||||
- Added a localhost server to use Omnisearch's API from outside Obsidian
|
||||
|
||||
## 1.17.x
|
||||
|
||||
- Added a shortcut to open files without closing Omnisearch
|
||||
- Prefill the search field with selected text
|
||||
- Improved highlighting
|
||||
|
||||
## 1.16.x
|
||||
|
||||
- Various indexing/tokenization improvements
|
||||
|
||||
## 1.15.x
|
||||
|
||||
- Added support of webp images
|
||||
- Configurable fuzziness
|
||||
- Added support for DataLoom plugin files
|
||||
- Unsupported files are now indexed by their path
|
||||
- Unmarked tags are now slightly boosted
|
||||
|
||||
## 1.14.x
|
||||
|
||||
- Added a `path:` option
|
||||
- Bugfixes
|
||||
|
||||
## 1.13.x
|
||||
|
||||
- CamelCaseWords are now indexed as 3 words
|
||||
- Reduced search freezes in some cases
|
||||
|
||||
## 1.12.x
|
||||
|
||||
- You can filter files by their extension
|
||||
- Refreshed UI
|
||||
- New API functions
|
||||
- Fixed some tokenization issues
|
||||
|
||||
## 1.10.x - 1.11.x
|
||||
|
||||
- Added support for Text Extractor; Omnisearch no longer extracts text itself
|
||||
- Added canvas indexing
|
||||
- Improved tags indexing
|
||||
|
||||
## 1.9.x
|
||||
|
||||
- PDFs are no longer indexed on mobile
|
||||
- Performance improvements
|
||||
- Various bugfixes
|
||||
|
||||
## 1.8.x
|
||||
|
||||
- Added OCR for images
|
||||
|
||||
@@ -39,6 +39,7 @@ You can check the [CHANGELOG](./CHANGELOG.md) for more information on the differ
|
||||
its filename, and its headings
|
||||
- Keyboard first: you never have to use your mouse
|
||||
- Workflow similar to the "Quick Switcher" core plugin
|
||||
- Opt-in local HTTP server to query Omnisearch from outside of Obsidian
|
||||
- Resistance to typos
|
||||
- Switch between Vault and In-file search to quickly skim multiple results in a single note
|
||||
- Supports `"expressions in quotes"` and `-exclusions`
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"id": "omnisearch",
|
||||
"name": "Omnisearch",
|
||||
"version": "1.20.0-beta.1",
|
||||
"version": "1.21.1",
|
||||
"minAppVersion": "1.3.0",
|
||||
"description": "A search engine that just works",
|
||||
"author": "Simon Cambier",
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"id": "omnisearch",
|
||||
"name": "Omnisearch",
|
||||
"version": "1.19.0",
|
||||
"version": "1.21.1",
|
||||
"minAppVersion": "1.3.0",
|
||||
"description": "A search engine that just works",
|
||||
"author": "Simon Cambier",
|
||||
|
||||
32
package.json
32
package.json
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "scambier.obsidian-search",
|
||||
"version": "1.20.0-beta.1",
|
||||
"version": "1.21.1",
|
||||
"description": "A search engine for Obsidian",
|
||||
"main": "dist/main.js",
|
||||
"scripts": {
|
||||
@@ -14,36 +14,36 @@
|
||||
"author": "Simon Cambier",
|
||||
"license": "GPL-3",
|
||||
"devDependencies": {
|
||||
"@babel/preset-env": "^7.20.2",
|
||||
"@babel/preset-typescript": "^7.18.6",
|
||||
"@testing-library/jest-dom": "^5.16.5",
|
||||
"@babel/preset-env": "^7.23.8",
|
||||
"@babel/preset-typescript": "^7.23.3",
|
||||
"@testing-library/jest-dom": "^5.17.0",
|
||||
"@tsconfig/svelte": "^3.0.0",
|
||||
"@types/jest": "^27.5.2",
|
||||
"@types/lodash-es": "^4.17.6",
|
||||
"@types/node": "^16.18.7",
|
||||
"@types/pako": "^2.0.0",
|
||||
"@types/lodash-es": "^4.17.12",
|
||||
"@types/node": "^16.18.74",
|
||||
"@types/pako": "^2.0.3",
|
||||
"babel-jest": "^27.5.1",
|
||||
"builtin-modules": "^3.3.0",
|
||||
"esbuild": "0.14.0",
|
||||
"esbuild-plugin-copy": "1.3.0",
|
||||
"esbuild-svelte": "0.7.1",
|
||||
"jest": "^27.5.1",
|
||||
"obsidian": "^1.4.11",
|
||||
"prettier": "^2.8.1",
|
||||
"prettier-plugin-svelte": "^2.8.1",
|
||||
"svelte": "^3.54.0",
|
||||
"svelte-check": "^2.10.2",
|
||||
"obsidian": "1.3.5",
|
||||
"prettier": "^2.8.8",
|
||||
"prettier-plugin-svelte": "^2.10.1",
|
||||
"svelte": "^3.59.2",
|
||||
"svelte-check": "^2.10.3",
|
||||
"svelte-jester": "^2.3.2",
|
||||
"svelte-preprocess": "^4.10.7",
|
||||
"tslib": "2.3.1",
|
||||
"typescript": "^4.9.4",
|
||||
"vite": "^3.2.5"
|
||||
"typescript": "^4.9.5",
|
||||
"vite": "^3.2.8"
|
||||
},
|
||||
"dependencies": {
|
||||
"cancelable-promise": "^4.3.1",
|
||||
"dexie": "^3.2.2",
|
||||
"dexie": "^3.2.4",
|
||||
"lodash-es": "4.17.21",
|
||||
"minisearch": "6.0.0-beta.1",
|
||||
"minisearch": "^6.3.0",
|
||||
"pure-md5": "^0.1.14",
|
||||
"search-query-parser": "^1.6.0"
|
||||
},
|
||||
|
||||
3281
pnpm-lock.yaml
generated
3281
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
@@ -13,10 +13,13 @@ import {
|
||||
isFileFromDataloomPlugin,
|
||||
isFileImage,
|
||||
isFilePDF,
|
||||
isFileOffice,
|
||||
isFilePlaintext,
|
||||
isFilenameIndexable,
|
||||
logDebug,
|
||||
makeMD5,
|
||||
removeDiacritics,
|
||||
stripMarkdownCharacters,
|
||||
} from './tools/utils'
|
||||
import type { CanvasData } from 'obsidian/canvas'
|
||||
import type { AsPlainObject } from 'minisearch'
|
||||
@@ -104,6 +107,15 @@ async function getAndMapIndexedDocument(
|
||||
content = await extractor.extractText(file)
|
||||
}
|
||||
|
||||
// ** Office document **
|
||||
else if (
|
||||
isFileOffice(path) &&
|
||||
settings.officeIndexing &&
|
||||
extractor?.canFileBeExtracted(path)
|
||||
) {
|
||||
content = await extractor.extractText(file)
|
||||
}
|
||||
|
||||
// ** Unsupported files **
|
||||
else if (isFilenameIndexable(path)) {
|
||||
content = file.path
|
||||
@@ -143,6 +155,8 @@ async function getAndMapIndexedDocument(
|
||||
return {
|
||||
basename: file.basename,
|
||||
content,
|
||||
/** Content without diacritics and markdown chars */
|
||||
cleanedContent: stripMarkdownCharacters(removeDiacritics(content)),
|
||||
path: file.path,
|
||||
mtime: file.stat.mtime,
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
import { Query } from 'src/search/query'
|
||||
import { openNote } from 'src/tools/notes'
|
||||
import { searchEngine } from 'src/search/omnisearch'
|
||||
import { stringsToRegex } from 'src/tools/text-processing'
|
||||
|
||||
export let modal: OmnisearchInFileModal
|
||||
export let parent: OmnisearchVaultModal | null = null
|
||||
@@ -64,10 +65,20 @@
|
||||
|
||||
$: {
|
||||
if (note) {
|
||||
const groups = getGroups(note.matches)
|
||||
groupedOffsets = groups.map(group =>
|
||||
Math.round((group.first()!.offset + group.last()!.offset) / 2)
|
||||
let groups = getGroups(note.matches)
|
||||
|
||||
// If there are quotes in the search,
|
||||
// only show results that match at least one of the quotes
|
||||
const exactTerms = query.getExactTerms()
|
||||
if (exactTerms.length) {
|
||||
groups = groups.filter(group =>
|
||||
exactTerms.every(exact =>
|
||||
group.some(match => match.match.includes(exact))
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
groupedOffsets = groups.map(group => Math.round(group.first()!.offset))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -77,13 +88,12 @@
|
||||
function getGroups(matches: SearchMatch[]): SearchMatch[][] {
|
||||
const groups: SearchMatch[][] = []
|
||||
let lastOffset = -1
|
||||
let count = 0 // TODO: FIXME: this is a hack to avoid infinite loops
|
||||
while (true) {
|
||||
let count = 0 // Avoid infinite loops
|
||||
while (++count < 100) {
|
||||
const group = getGroupedMatches(matches, lastOffset, excerptAfter)
|
||||
if (!group.length) break
|
||||
lastOffset = group.last()!.offset
|
||||
groups.push(group)
|
||||
if (++count > 100) break
|
||||
}
|
||||
return groups
|
||||
}
|
||||
@@ -121,7 +131,9 @@
|
||||
if (parent) parent.close()
|
||||
|
||||
// Open (or switch focus to) the note
|
||||
await openNote(note, newTab)
|
||||
const reg = stringsToRegex(note.foundWords)
|
||||
reg.exec(note.content)
|
||||
await openNote(note, reg.lastIndex, newTab)
|
||||
|
||||
// Move cursor to the match
|
||||
const view = app.workspace.getActiveViewOfType(MarkdownView)
|
||||
|
||||
@@ -116,7 +116,7 @@
|
||||
historySearchIndex = 0
|
||||
}
|
||||
searchQuery = history[historySearchIndex]
|
||||
refInput?.setInputValue(searchQuery)
|
||||
refInput?.setInputValue(searchQuery ?? '')
|
||||
}
|
||||
|
||||
async function nextSearchHistory() {
|
||||
@@ -125,7 +125,7 @@
|
||||
historySearchIndex = history.length ? history.length - 1 : 0
|
||||
}
|
||||
searchQuery = history[historySearchIndex]
|
||||
refInput?.setInputValue(searchQuery)
|
||||
refInput?.setInputValue(searchQuery ?? '')
|
||||
}
|
||||
|
||||
let cancelableQuery: CancelablePromise<ResultNote[]> | null = null
|
||||
@@ -186,7 +186,8 @@
|
||||
|
||||
function openSearchResult(note: ResultNote, newPane = false) {
|
||||
saveCurrentQuery()
|
||||
openNote(note, newPane)
|
||||
const offset = note.matches?.[0]?.offset ?? 0
|
||||
openNote(note, offset, newPane)
|
||||
}
|
||||
|
||||
async function onClickCreateNote(_e: MouseEvent) {
|
||||
|
||||
@@ -53,6 +53,7 @@ export type IndexedDocument = {
|
||||
mtime: number
|
||||
|
||||
content: string
|
||||
cleanedContent: string
|
||||
aliases: string
|
||||
tags: string[]
|
||||
unmarkedTags: string[]
|
||||
@@ -120,7 +121,7 @@ export function isCacheEnabled(): boolean {
|
||||
}
|
||||
|
||||
export const SEPARATORS =
|
||||
/[|\t\n\r\^= -#%-*,.`\/<>:;?@[-\]_{}\u00A0\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C77\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166E\u1680\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2000-\u200A\u2010-\u2029\u202F-\u2043\u2045-\u2051\u2053-\u205F\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4F\u3000-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]/
|
||||
/[|\t\n\r\^"= -#%-*,.`\/<>:;?@[-\]_{}\u00A0\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C77\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166E\u1680\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2000-\u200A\u2010-\u2029\u202F-\u2043\u2045-\u2051\u2053-\u205F\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4F\u3000-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]/
|
||||
.toString()
|
||||
.slice(1, -1)
|
||||
export const SPACE_OR_PUNCTUATION = new RegExp(`${SEPARATORS}+`, 'u')
|
||||
|
||||
26
src/main.ts
26
src/main.ts
@@ -14,6 +14,7 @@ import {
|
||||
import {
|
||||
eventBus,
|
||||
EventNames,
|
||||
getTextExtractor,
|
||||
indexingStep,
|
||||
IndexingStepType,
|
||||
isCacheEnabled,
|
||||
@@ -36,8 +37,8 @@ export default class OmnisearchPlugin extends Plugin {
|
||||
this.addSettingTab(new SettingsTab(this))
|
||||
|
||||
if (!Platform.isMobile) {
|
||||
import('./tools/api-server').then(m =>
|
||||
this.apiHttpServer = m.getServer()
|
||||
import('./tools/api-server').then(
|
||||
m => (this.apiHttpServer = m.getServer())
|
||||
)
|
||||
}
|
||||
|
||||
@@ -119,7 +120,7 @@ export default class OmnisearchPlugin extends Plugin {
|
||||
})
|
||||
)
|
||||
|
||||
this.executeFirstLaunchTasks()
|
||||
await this.executeFirstLaunchTasks()
|
||||
await this.populateIndex()
|
||||
|
||||
if (this.apiHttpServer && settings.httpApiEnabled) {
|
||||
@@ -128,18 +129,17 @@ export default class OmnisearchPlugin extends Plugin {
|
||||
})
|
||||
}
|
||||
|
||||
executeFirstLaunchTasks(): void {
|
||||
const code = '1.10.1'
|
||||
if (settings.welcomeMessage !== code) {
|
||||
// const welcome = new DocumentFragment()
|
||||
// welcome.createSpan({}, span => {
|
||||
// span.innerHTML = `🔎 Omnisearch now requires the <strong>Text Extractor</strong> plugin to index PDF and images. See Omnisearch settings for more information.`
|
||||
// })
|
||||
// new Notice(welcome, 20_000)
|
||||
async executeFirstLaunchTasks(): Promise<void> {
|
||||
const code = '1.21.0'
|
||||
if (settings.welcomeMessage !== code && getTextExtractor()) {
|
||||
const welcome = new DocumentFragment()
|
||||
welcome.createSpan({}, span => {
|
||||
span.innerHTML = `🔎 Omnisearch can now index .docx and .xlsx documents. Don't forget to update Text Extractor and enable the toggle in Omnisearch settings.`
|
||||
})
|
||||
new Notice(welcome, 20_000)
|
||||
}
|
||||
settings.welcomeMessage = code
|
||||
|
||||
this.saveData(settings)
|
||||
await this.saveData(settings)
|
||||
}
|
||||
|
||||
async onunload(): Promise<void> {
|
||||
|
||||
@@ -1,59 +1,18 @@
|
||||
import MiniSearch, { type Options, type SearchResult } from 'minisearch'
|
||||
import type { DocumentRef, IndexedDocument, ResultNote } from '../globals'
|
||||
import {
|
||||
BRACKETS_AND_SPACE,
|
||||
chsRegex,
|
||||
getChsSegmenter,
|
||||
SPACE_OR_PUNCTUATION,
|
||||
} from '../globals'
|
||||
|
||||
import { settings } from '../settings'
|
||||
import {
|
||||
chunkArray,
|
||||
logDebug,
|
||||
removeDiacritics,
|
||||
splitCamelCase,
|
||||
splitHyphens,
|
||||
stripMarkdownCharacters,
|
||||
} from '../tools/utils'
|
||||
import { chunkArray, logDebug, removeDiacritics } from '../tools/utils'
|
||||
import { Notice } from 'obsidian'
|
||||
import type { Query } from './query'
|
||||
import { cacheManager } from '../cache-manager'
|
||||
import { sortBy } from 'lodash-es'
|
||||
import { getMatches, stringsToRegex } from 'src/tools/text-processing'
|
||||
|
||||
const tokenize = (text: string): string[] => {
|
||||
const words = text.split(BRACKETS_AND_SPACE)
|
||||
|
||||
let tokens = text.split(SPACE_OR_PUNCTUATION)
|
||||
|
||||
// Split hyphenated tokens
|
||||
tokens = [...tokens, ...tokens.flatMap(splitHyphens)]
|
||||
|
||||
// Split camelCase tokens into "camel" and "case
|
||||
tokens = [...tokens, ...tokens.flatMap(splitCamelCase)]
|
||||
|
||||
// Add whole words (aka "not tokens")
|
||||
tokens = [...tokens, ...words]
|
||||
|
||||
// When enabled, we only use the chsSegmenter,
|
||||
// and not the other custom tokenizers
|
||||
const chsSegmenter = getChsSegmenter()
|
||||
if (chsSegmenter) {
|
||||
const chs = tokens.flatMap(word =>
|
||||
chsRegex.test(word) ? chsSegmenter.cut(word) : [word]
|
||||
)
|
||||
tokens = [...tokens, ...chs]
|
||||
}
|
||||
|
||||
// Remove duplicates
|
||||
tokens = [...new Set(tokens)]
|
||||
|
||||
return tokens
|
||||
}
|
||||
import { tokenizeForIndexing, tokenizeForSearch } from './tokenizer'
|
||||
|
||||
export class Omnisearch {
|
||||
public static readonly options: Options<IndexedDocument> = {
|
||||
tokenize,
|
||||
tokenize: tokenizeForIndexing,
|
||||
extractField: (doc, fieldName) => {
|
||||
if (fieldName === 'directory') {
|
||||
// return path without the filename
|
||||
@@ -87,6 +46,7 @@ export class Omnisearch {
|
||||
},
|
||||
}
|
||||
private minisearch: MiniSearch
|
||||
/** Map<path, mtime> */
|
||||
private indexedDocuments: Map<string, number> = new Map()
|
||||
// private previousResults: SearchResult[] = []
|
||||
// private previousQuery: Query | null = null
|
||||
@@ -212,14 +172,15 @@ export class Omnisearch {
|
||||
break
|
||||
}
|
||||
|
||||
let results = this.minisearch.search(query.segmentsToStr(), {
|
||||
const searchTokens = tokenizeForSearch(query.segmentsToStr())
|
||||
logDebug(JSON.stringify(searchTokens, null, 1))
|
||||
let results = this.minisearch.search(searchTokens, {
|
||||
prefix: term => term.length >= options.prefixLength,
|
||||
// length <= 3: no fuzziness
|
||||
// length <= 5: fuzziness of 10%
|
||||
// length > 5: fuzziness of 20%
|
||||
fuzzy: term =>
|
||||
term.length <= 3 ? 0 : term.length <= 5 ? fuzziness / 2 : fuzziness,
|
||||
combineWith: 'AND',
|
||||
boost: {
|
||||
basename: settings.weightBasename,
|
||||
directory: settings.weightDirectory,
|
||||
@@ -321,10 +282,10 @@ export class Omnisearch {
|
||||
results = results.filter(r => {
|
||||
const document = documents.find(d => d.path === r.id)
|
||||
const title = document?.path.toLowerCase() ?? ''
|
||||
const content = stripMarkdownCharacters(
|
||||
document?.content ?? ''
|
||||
).toLowerCase()
|
||||
return exactTerms.every(q => content.includes(q) || title.includes(q))
|
||||
const content = (document?.cleanedContent ?? '').toLowerCase()
|
||||
return exactTerms.every(
|
||||
q => content.includes(q) || removeDiacritics(title).includes(q)
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -333,7 +294,7 @@ export class Omnisearch {
|
||||
if (exclusions.length) {
|
||||
logDebug('Filtering with exclusions')
|
||||
results = results.filter(r => {
|
||||
const content = stripMarkdownCharacters(
|
||||
const content = (
|
||||
documents.find(d => d.path === r.id)?.content ?? ''
|
||||
).toLowerCase()
|
||||
return exclusions.every(q => !content.includes(q))
|
||||
@@ -402,7 +363,7 @@ export class Omnisearch {
|
||||
const foundWords = [
|
||||
// Matching terms from the result,
|
||||
// do not necessarily match the query
|
||||
...Object.keys(result.match),
|
||||
...result.terms,
|
||||
|
||||
// Quoted expressions
|
||||
...query.getExactTerms(),
|
||||
|
||||
@@ -46,6 +46,15 @@ export class Query {
|
||||
}
|
||||
this.query = parsed
|
||||
|
||||
// Extract keywords starting with a dot...
|
||||
const ext = this.query.text
|
||||
.filter(o => o.startsWith('.'))
|
||||
.map(o => o.slice(1))
|
||||
// add them to the ext field...
|
||||
this.query.ext = [...new Set([...ext, ...(this.query.ext ?? [])])]
|
||||
// and remove them from the text field
|
||||
this.query.text = this.query.text.filter(o => !o.startsWith('.'))
|
||||
|
||||
// Get strings in quotes, and remove the quotes
|
||||
this.#inQuotes =
|
||||
text.match(/"([^"]+)"/g)?.map(o => o.replace(/"/g, '')) ?? []
|
||||
@@ -89,4 +98,13 @@ export class Query {
|
||||
),
|
||||
]
|
||||
}
|
||||
|
||||
public getBestStringForExcerpt(): string {
|
||||
// If we have quoted expressions, return the longest one
|
||||
if (this.#inQuotes.length) {
|
||||
return this.#inQuotes.sort((a, b) => b.length - a.length)[0] ?? ''
|
||||
}
|
||||
// Otherwise, just return the query as is
|
||||
return this.segmentsToStr()
|
||||
}
|
||||
}
|
||||
|
||||
79
src/search/tokenizer.ts
Normal file
79
src/search/tokenizer.ts
Normal file
@@ -0,0 +1,79 @@
|
||||
import type { QueryCombination } from 'minisearch'
|
||||
import {
|
||||
BRACKETS_AND_SPACE,
|
||||
SPACE_OR_PUNCTUATION,
|
||||
chsRegex,
|
||||
getChsSegmenter,
|
||||
} from 'src/globals'
|
||||
import { logDebug, splitCamelCase, splitHyphens } from 'src/tools/utils'
|
||||
|
||||
function tokenizeWords(text: string): string[] {
|
||||
return text.split(BRACKETS_AND_SPACE)
|
||||
}
|
||||
|
||||
function tokenizeTokens(text: string): string[] {
|
||||
return text.split(SPACE_OR_PUNCTUATION)
|
||||
}
|
||||
|
||||
/**
|
||||
* Tokenization for indexing will possibly return more tokens than the original text.
|
||||
* This is because we combine different methods of tokenization to get the best results.
|
||||
* @param text
|
||||
* @returns
|
||||
*/
|
||||
export function tokenizeForIndexing(text: string): string[] {
|
||||
const words = tokenizeWords(text)
|
||||
|
||||
let tokens = tokenizeTokens(text)
|
||||
|
||||
// Split hyphenated tokens
|
||||
tokens = [...tokens, ...tokens.flatMap(splitHyphens)]
|
||||
|
||||
// Split camelCase tokens into "camel" and "case
|
||||
tokens = [...tokens, ...tokens.flatMap(splitCamelCase)]
|
||||
|
||||
// Add whole words (aka "not tokens")
|
||||
tokens = [...tokens, ...words]
|
||||
|
||||
const chsSegmenter = getChsSegmenter()
|
||||
if (chsSegmenter) {
|
||||
const chs = tokens.flatMap(word =>
|
||||
chsRegex.test(word) ? chsSegmenter.cut(word) : [word]
|
||||
)
|
||||
tokens = [...tokens, ...chs]
|
||||
}
|
||||
|
||||
// Remove duplicates
|
||||
tokens = [...new Set(tokens)]
|
||||
|
||||
return tokens
|
||||
}
|
||||
|
||||
/**
|
||||
* Search tokenization will use the same tokenization methods as indexing,
|
||||
* but will combine each group with "OR" operators
|
||||
* @param text
|
||||
* @returns
|
||||
*/
|
||||
export function tokenizeForSearch(text: string): QueryCombination {
|
||||
const tokens = tokenizeTokens(text)
|
||||
|
||||
let chs: string[] = []
|
||||
const chsSegmenter = getChsSegmenter()
|
||||
if (chsSegmenter) {
|
||||
chs = tokens.flatMap(word =>
|
||||
chsRegex.test(word) ? chsSegmenter.cut(word) : [word]
|
||||
)
|
||||
}
|
||||
|
||||
return {
|
||||
combineWith: 'OR',
|
||||
queries: [
|
||||
{ combineWith: 'AND', queries: tokens },
|
||||
{ combineWith: 'AND', queries: tokenizeWords(text) },
|
||||
{ combineWith: 'AND', queries: tokens.flatMap(splitHyphens) },
|
||||
{ combineWith: 'AND', queries: tokens.flatMap(splitCamelCase) },
|
||||
{ combineWith: 'AND', queries: chs },
|
||||
],
|
||||
}
|
||||
}
|
||||
@@ -37,6 +37,9 @@ export interface OmnisearchSettings extends WeightingSettings {
|
||||
PDFIndexing: boolean
|
||||
/** Enable Images indexing */
|
||||
imagesIndexing: boolean
|
||||
/** Enable Office documents indexing */
|
||||
officeIndexing: boolean
|
||||
|
||||
/** Enable indexing of unknown files */
|
||||
unsupportedFilesIndexing: 'yes' | 'no' | 'default'
|
||||
/** Activate the small 🔍 button on Obsidian's ribbon */
|
||||
@@ -99,7 +102,7 @@ export class SettingsTab extends PluginSettingTab {
|
||||
// Sponsor link - Thank you!
|
||||
const divSponsor = containerEl.createDiv()
|
||||
divSponsor.innerHTML = `
|
||||
<iframe src="https://github.com/sponsors/scambier/button" title="Sponsor scambier" height="35" width="116" style="border: 0;"></iframe>
|
||||
<iframe sandbox="allow-top-navigation-by-user-activation" src="https://github.com/sponsors/scambier/button" title="Sponsor scambier" height="35" width="116" style="border: 0;"></iframe>
|
||||
<a href='https://ko-fi.com/B0B6LQ2C' target='_blank'><img height='36' style='border:0px;height:36px;' src='https://cdn.ko-fi.com/cdn/kofi2.png?v=3' border='0' alt='Buy Me a Coffee at ko-fi.com' /></a>
|
||||
`
|
||||
|
||||
@@ -158,11 +161,30 @@ export class SettingsTab extends PluginSettingTab {
|
||||
)
|
||||
.setDisabled(!getTextExtractor())
|
||||
|
||||
// Office Documents Indexing
|
||||
const indexOfficesDesc = new DocumentFragment()
|
||||
indexOfficesDesc.createSpan({}, span => {
|
||||
span.innerHTML = `Omnisearch will use Text Extractor to index the content of your office documents (currently <pre style="display:inline">.docx</pre> and <pre style="display:inline">.xlsx</pre>)`
|
||||
})
|
||||
new Setting(containerEl)
|
||||
.setName(
|
||||
`Documents content indexing ${getTextExtractor() ? '' : '⚠️ Disabled'}`
|
||||
)
|
||||
.setDesc(indexOfficesDesc)
|
||||
.addToggle(toggle =>
|
||||
toggle.setValue(settings.officeIndexing).onChange(async v => {
|
||||
await database.clearCache()
|
||||
settings.officeIndexing = v
|
||||
await saveSettings(this.plugin)
|
||||
})
|
||||
)
|
||||
.setDisabled(!getTextExtractor())
|
||||
|
||||
// Index filenames of unsupported files
|
||||
const indexUnsupportedDesc = new DocumentFragment()
|
||||
indexUnsupportedDesc.createSpan({}, span => {
|
||||
span.innerHTML = `
|
||||
Omnisearch can index file<strong>names</strong> of "unsupported" files, such as e.g. <pre style="display:inline">.mp4</pre>, <pre style="display:inline">.xlsx</pre>,
|
||||
Omnisearch can index file<strong>names</strong> of "unsupported" files, such as e.g. <pre style="display:inline">.mp4</pre>
|
||||
or non-extracted PDFs & images.<br/>
|
||||
"Obsidian setting" will respect the value of "Files & Links > Detect all file extensions"`
|
||||
})
|
||||
@@ -185,7 +207,7 @@ export class SettingsTab extends PluginSettingTab {
|
||||
indexedFileTypesDesc.createSpan({}, span => {
|
||||
span.innerHTML = `In addition to standard <code>md</code> files, Omnisearch can also index other <strong style="color: var(--text-accent)">PLAINTEXT</strong> files.<br/>
|
||||
Add extensions separated by a space, without the dot. Example: "<code>txt org csv</code>".<br />
|
||||
⚠️ <span style="color: var(--text-accent)">Using extensions of non-plaintext files (like .docx or .pptx) WILL cause crashes,
|
||||
⚠️ <span style="color: var(--text-accent)">Using extensions of non-plaintext files (like .pptx) WILL cause crashes,
|
||||
because Omnisearch will try to index their content.</span>`
|
||||
})
|
||||
new Setting(containerEl)
|
||||
@@ -444,24 +466,6 @@ export class SettingsTab extends PluginSettingTab {
|
||||
|
||||
//#endregion Results Weighting
|
||||
|
||||
//#region Debugging
|
||||
|
||||
new Setting(containerEl).setName('Debugging').setHeading()
|
||||
|
||||
new Setting(containerEl)
|
||||
.setName('Enable verbose logging')
|
||||
.setDesc(
|
||||
"Adds a LOT of logs for debugging purposes. Don't forget to disable it."
|
||||
)
|
||||
.addToggle(toggle =>
|
||||
toggle.setValue(settings.verboseLogging).onChange(async v => {
|
||||
settings.verboseLogging = v
|
||||
await saveSettings(this.plugin)
|
||||
})
|
||||
)
|
||||
|
||||
//#endregion Debugging
|
||||
|
||||
//#region HTTP Server
|
||||
|
||||
if (!Platform.isMobile) {
|
||||
@@ -521,6 +525,24 @@ export class SettingsTab extends PluginSettingTab {
|
||||
|
||||
//#endregion HTTP Server
|
||||
|
||||
//#region Debugging
|
||||
|
||||
new Setting(containerEl).setName('Debugging').setHeading()
|
||||
|
||||
new Setting(containerEl)
|
||||
.setName('Enable verbose logging')
|
||||
.setDesc(
|
||||
"Adds a LOT of logs for debugging purposes. Don't forget to disable it."
|
||||
)
|
||||
.addToggle(toggle =>
|
||||
toggle.setValue(settings.verboseLogging).onChange(async v => {
|
||||
settings.verboseLogging = v
|
||||
await saveSettings(this.plugin)
|
||||
})
|
||||
)
|
||||
|
||||
//#endregion Debugging
|
||||
|
||||
//#region Danger Zone
|
||||
new Setting(containerEl).setName('Danger Zone').setHeading()
|
||||
|
||||
@@ -602,6 +624,7 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = {
|
||||
ignoreDiacritics: true,
|
||||
indexedFileTypes: [] as string[],
|
||||
PDFIndexing: false,
|
||||
officeIndexing: false,
|
||||
imagesIndexing: false,
|
||||
unsupportedFilesIndexing: 'no',
|
||||
splitCamelCase: false,
|
||||
|
||||
@@ -63,7 +63,7 @@ export function getServer() {
|
||||
close() {
|
||||
server.close()
|
||||
console.log(`Omnisearch - Terminated HTTP server`)
|
||||
if (settings.httpApiNotice) {
|
||||
if (settings.httpApiEnabled && settings.httpApiNotice) {
|
||||
new Notice(`Omnisearch - Terminated HTTP server`)
|
||||
}
|
||||
},
|
||||
|
||||
@@ -4,12 +4,9 @@ import { stringsToRegex } from './text-processing'
|
||||
|
||||
export async function openNote(
|
||||
item: ResultNote,
|
||||
offset = 0,
|
||||
newPane = false
|
||||
): Promise<void> {
|
||||
const reg = stringsToRegex(item.foundWords)
|
||||
reg.exec(item.content)
|
||||
const offset = reg.lastIndex
|
||||
|
||||
// Check if the note is already open,
|
||||
// to avoid opening it twice if the first one is pinned
|
||||
let alreadyOpenAndPinned = false
|
||||
|
||||
@@ -14,13 +14,6 @@ import type { Query } from 'src/search/query'
|
||||
import { Notice } from 'obsidian'
|
||||
import { escapeRegExp } from 'lodash-es'
|
||||
|
||||
export function highlighterGroups(_substring: string, ...args: any[]) {
|
||||
// args[0] is the single char preceding args[1], which is the word we want to highlight
|
||||
if (!!args[1].trim())
|
||||
return `<span>${args[0]}</span><span class="${highlightClass}">${args[1]}</span>`
|
||||
return '<no content>'
|
||||
}
|
||||
|
||||
/**
|
||||
* Wraps the matches in the text with a <span> element and a highlight class
|
||||
* @param text
|
||||
@@ -134,18 +127,18 @@ export function getMatches(
|
||||
.substring(matchStartIndex, matchEndIndex)
|
||||
.trim()
|
||||
if (originalMatch && match.index >= 0) {
|
||||
matches.push({ match: originalMatch, offset: match.index + 1 })
|
||||
matches.push({ match: originalMatch, offset: match.index })
|
||||
}
|
||||
}
|
||||
|
||||
// If the query is more than 1 token and can be found "as is" in the text, put this match first
|
||||
if (query && query.query.text.length > 1) {
|
||||
const best = text.indexOf(query.segmentsToStr())
|
||||
if (query && (query.query.text.length > 1 || query.getExactTerms().length > 0)) {
|
||||
const best = text.indexOf(query.getBestStringForExcerpt())
|
||||
if (best > -1 && matches.find(m => m.offset === best)) {
|
||||
matches = matches.filter(m => m.offset !== best)
|
||||
matches.unshift({
|
||||
offset: best,
|
||||
match: query.segmentsToStr(),
|
||||
match: query.getBestStringForExcerpt(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,10 +9,6 @@ import { canIndexUnsupportedFiles, settings } from '../settings'
|
||||
import { type BinaryLike, createHash } from 'crypto'
|
||||
import { md5 } from 'pure-md5'
|
||||
|
||||
// export function highlighter(str: string): string {
|
||||
// return `<span class="${highlightClass}">${str}</span>`
|
||||
// }
|
||||
|
||||
export function pathWithoutFilename(path: string): string {
|
||||
const split = path.split('/')
|
||||
split.pop()
|
||||
@@ -174,6 +170,11 @@ export function isFilePDF(path: string): boolean {
|
||||
return getExtension(path) === 'pdf'
|
||||
}
|
||||
|
||||
export function isFileOffice(path: string): boolean {
|
||||
const ext = getExtension(path)
|
||||
return ext === 'docx' || ext === 'xlsx'
|
||||
}
|
||||
|
||||
export function isFilePlaintext(path: string): boolean {
|
||||
return [...settings.indexedFileTypes, 'md'].some(t => path.endsWith(`.${t}`))
|
||||
}
|
||||
|
||||
@@ -122,5 +122,12 @@
|
||||
"1.18.1": "1.3.0",
|
||||
"1.19.0-beta.1": "1.3.0",
|
||||
"1.19.0": "1.3.0",
|
||||
"1.20.0-beta.1": "1.3.0"
|
||||
"1.20.0-beta.1": "1.3.0",
|
||||
"1.20.0": "1.3.0",
|
||||
"1.20.1": "1.3.0",
|
||||
"1.20.2": "1.3.0",
|
||||
"1.20.3": "1.3.0",
|
||||
"1.20.4": "1.3.0",
|
||||
"1.21.0": "1.3.0",
|
||||
"1.21.1": "1.3.0"
|
||||
}
|
||||
Reference in New Issue
Block a user