Merge branch 'master' into develop

This commit is contained in:
Simon Cambier
2024-03-11 10:34:04 +01:00
20 changed files with 1598 additions and 2138 deletions

View File

@@ -1,5 +1,72 @@
# Omnisearch Changelog
This changelog is not exhaustive.
## 1.21.x
- Added support for .docx and .xlsx
## 1.20.x
- Refactored indexing tokenization process to correctly take diacritics into account
- Added highlighting in the note's path
- Improved the selection of the chosen excerpt in the results list
## 1.19.x
- Various bugfixes and improvements
## 1.18.x
- Added a localhost server to use Omnisearch's API from outside Obsidian
## 1.17.x
- Added a shortcut to open files without closing Omnisearch
- Prefill the search field with selected text
- Improved highlighting
## 1.16.x
- Various indexing/tokenization improvements
## 1.15.x
- Added support of webp images
- Configurable fuzziness
- Added support for DataLoom plugin files
- Unsupported files are now indexed by their path
- Unmarked tags are now slightly boosted
## 1.14.x
- Added a `path:` option
- Bugfixes
## 1.13.x
- CamelCaseWords are now indexed as 3 words
- Reduced search freezes in some cases
## 1.12.x
- You can filter files by their extension
- Refreshed UI
- New API functions
- Fixed some tokenization issues
## 1.10.x - 1.11.x
- Added support for Text Extractor; Omnisearch no longer extracts text itself
- Added canvas indexing
- Improved tags indexing
## 1.9.x
- PDFs are no longer indexed on mobile
- Performance improvements
- Various bugfixes
## 1.8.x
- Added OCR for images

View File

@@ -39,6 +39,7 @@ You can check the [CHANGELOG](./CHANGELOG.md) for more information on the differ
its filename, and its headings
- Keyboard first: you never have to use your mouse
- Workflow similar to the "Quick Switcher" core plugin
- Opt-in local HTTP server to query Omnisearch from outside of Obsidian
- Resistance to typos
- Switch between Vault and In-file search to quickly skim multiple results in a single note
- Supports `"expressions in quotes"` and `-exclusions`

View File

@@ -1,7 +1,7 @@
{
"id": "omnisearch",
"name": "Omnisearch",
"version": "1.20.0-beta.1",
"version": "1.21.1",
"minAppVersion": "1.3.0",
"description": "A search engine that just works",
"author": "Simon Cambier",

View File

@@ -1,7 +1,7 @@
{
"id": "omnisearch",
"name": "Omnisearch",
"version": "1.19.0",
"version": "1.21.1",
"minAppVersion": "1.3.0",
"description": "A search engine that just works",
"author": "Simon Cambier",

View File

@@ -1,6 +1,6 @@
{
"name": "scambier.obsidian-search",
"version": "1.20.0-beta.1",
"version": "1.21.1",
"description": "A search engine for Obsidian",
"main": "dist/main.js",
"scripts": {
@@ -14,36 +14,36 @@
"author": "Simon Cambier",
"license": "GPL-3",
"devDependencies": {
"@babel/preset-env": "^7.20.2",
"@babel/preset-typescript": "^7.18.6",
"@testing-library/jest-dom": "^5.16.5",
"@babel/preset-env": "^7.23.8",
"@babel/preset-typescript": "^7.23.3",
"@testing-library/jest-dom": "^5.17.0",
"@tsconfig/svelte": "^3.0.0",
"@types/jest": "^27.5.2",
"@types/lodash-es": "^4.17.6",
"@types/node": "^16.18.7",
"@types/pako": "^2.0.0",
"@types/lodash-es": "^4.17.12",
"@types/node": "^16.18.74",
"@types/pako": "^2.0.3",
"babel-jest": "^27.5.1",
"builtin-modules": "^3.3.0",
"esbuild": "0.14.0",
"esbuild-plugin-copy": "1.3.0",
"esbuild-svelte": "0.7.1",
"jest": "^27.5.1",
"obsidian": "^1.4.11",
"prettier": "^2.8.1",
"prettier-plugin-svelte": "^2.8.1",
"svelte": "^3.54.0",
"svelte-check": "^2.10.2",
"obsidian": "1.3.5",
"prettier": "^2.8.8",
"prettier-plugin-svelte": "^2.10.1",
"svelte": "^3.59.2",
"svelte-check": "^2.10.3",
"svelte-jester": "^2.3.2",
"svelte-preprocess": "^4.10.7",
"tslib": "2.3.1",
"typescript": "^4.9.4",
"vite": "^3.2.5"
"typescript": "^4.9.5",
"vite": "^3.2.8"
},
"dependencies": {
"cancelable-promise": "^4.3.1",
"dexie": "^3.2.2",
"dexie": "^3.2.4",
"lodash-es": "4.17.21",
"minisearch": "6.0.0-beta.1",
"minisearch": "^6.3.0",
"pure-md5": "^0.1.14",
"search-query-parser": "^1.6.0"
},

3281
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

View File

@@ -13,10 +13,13 @@ import {
isFileFromDataloomPlugin,
isFileImage,
isFilePDF,
isFileOffice,
isFilePlaintext,
isFilenameIndexable,
logDebug,
makeMD5,
removeDiacritics,
stripMarkdownCharacters,
} from './tools/utils'
import type { CanvasData } from 'obsidian/canvas'
import type { AsPlainObject } from 'minisearch'
@@ -104,6 +107,15 @@ async function getAndMapIndexedDocument(
content = await extractor.extractText(file)
}
// ** Office document **
else if (
isFileOffice(path) &&
settings.officeIndexing &&
extractor?.canFileBeExtracted(path)
) {
content = await extractor.extractText(file)
}
// ** Unsupported files **
else if (isFilenameIndexable(path)) {
content = file.path
@@ -143,6 +155,8 @@ async function getAndMapIndexedDocument(
return {
basename: file.basename,
content,
/** Content without diacritics and markdown chars */
cleanedContent: stripMarkdownCharacters(removeDiacritics(content)),
path: file.path,
mtime: file.stat.mtime,

View File

@@ -19,6 +19,7 @@
import { Query } from 'src/search/query'
import { openNote } from 'src/tools/notes'
import { searchEngine } from 'src/search/omnisearch'
import { stringsToRegex } from 'src/tools/text-processing'
export let modal: OmnisearchInFileModal
export let parent: OmnisearchVaultModal | null = null
@@ -64,10 +65,20 @@
$: {
if (note) {
const groups = getGroups(note.matches)
groupedOffsets = groups.map(group =>
Math.round((group.first()!.offset + group.last()!.offset) / 2)
let groups = getGroups(note.matches)
// If there are quotes in the search,
// only show results that match at least one of the quotes
const exactTerms = query.getExactTerms()
if (exactTerms.length) {
groups = groups.filter(group =>
exactTerms.every(exact =>
group.some(match => match.match.includes(exact))
)
)
}
groupedOffsets = groups.map(group => Math.round(group.first()!.offset))
}
}
@@ -77,13 +88,12 @@
function getGroups(matches: SearchMatch[]): SearchMatch[][] {
const groups: SearchMatch[][] = []
let lastOffset = -1
let count = 0 // TODO: FIXME: this is a hack to avoid infinite loops
while (true) {
let count = 0 // Avoid infinite loops
while (++count < 100) {
const group = getGroupedMatches(matches, lastOffset, excerptAfter)
if (!group.length) break
lastOffset = group.last()!.offset
groups.push(group)
if (++count > 100) break
}
return groups
}
@@ -121,7 +131,9 @@
if (parent) parent.close()
// Open (or switch focus to) the note
await openNote(note, newTab)
const reg = stringsToRegex(note.foundWords)
reg.exec(note.content)
await openNote(note, reg.lastIndex, newTab)
// Move cursor to the match
const view = app.workspace.getActiveViewOfType(MarkdownView)

View File

@@ -116,7 +116,7 @@
historySearchIndex = 0
}
searchQuery = history[historySearchIndex]
refInput?.setInputValue(searchQuery)
refInput?.setInputValue(searchQuery ?? '')
}
async function nextSearchHistory() {
@@ -125,7 +125,7 @@
historySearchIndex = history.length ? history.length - 1 : 0
}
searchQuery = history[historySearchIndex]
refInput?.setInputValue(searchQuery)
refInput?.setInputValue(searchQuery ?? '')
}
let cancelableQuery: CancelablePromise<ResultNote[]> | null = null
@@ -186,7 +186,8 @@
function openSearchResult(note: ResultNote, newPane = false) {
saveCurrentQuery()
openNote(note, newPane)
const offset = note.matches?.[0]?.offset ?? 0
openNote(note, offset, newPane)
}
async function onClickCreateNote(_e: MouseEvent) {

View File

@@ -53,6 +53,7 @@ export type IndexedDocument = {
mtime: number
content: string
cleanedContent: string
aliases: string
tags: string[]
unmarkedTags: string[]
@@ -120,7 +121,7 @@ export function isCacheEnabled(): boolean {
}
export const SEPARATORS =
/[|\t\n\r\^= -#%-*,.`\/<>:;?@[-\]_{}\u00A0\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C77\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166E\u1680\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2000-\u200A\u2010-\u2029\u202F-\u2043\u2045-\u2051\u2053-\u205F\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4F\u3000-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]/
/[|\t\n\r\^"= -#%-*,.`\/<>:;?@[-\]_{}\u00A0\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C77\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166E\u1680\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2000-\u200A\u2010-\u2029\u202F-\u2043\u2045-\u2051\u2053-\u205F\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4F\u3000-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]/
.toString()
.slice(1, -1)
export const SPACE_OR_PUNCTUATION = new RegExp(`${SEPARATORS}+`, 'u')

View File

@@ -14,6 +14,7 @@ import {
import {
eventBus,
EventNames,
getTextExtractor,
indexingStep,
IndexingStepType,
isCacheEnabled,
@@ -36,8 +37,8 @@ export default class OmnisearchPlugin extends Plugin {
this.addSettingTab(new SettingsTab(this))
if (!Platform.isMobile) {
import('./tools/api-server').then(m =>
this.apiHttpServer = m.getServer()
import('./tools/api-server').then(
m => (this.apiHttpServer = m.getServer())
)
}
@@ -119,7 +120,7 @@ export default class OmnisearchPlugin extends Plugin {
})
)
this.executeFirstLaunchTasks()
await this.executeFirstLaunchTasks()
await this.populateIndex()
if (this.apiHttpServer && settings.httpApiEnabled) {
@@ -128,18 +129,17 @@ export default class OmnisearchPlugin extends Plugin {
})
}
executeFirstLaunchTasks(): void {
const code = '1.10.1'
if (settings.welcomeMessage !== code) {
// const welcome = new DocumentFragment()
// welcome.createSpan({}, span => {
// span.innerHTML = `🔎 Omnisearch now requires the <strong>Text Extractor</strong> plugin to index PDF and images. See Omnisearch settings for more information.`
// })
// new Notice(welcome, 20_000)
async executeFirstLaunchTasks(): Promise<void> {
const code = '1.21.0'
if (settings.welcomeMessage !== code && getTextExtractor()) {
const welcome = new DocumentFragment()
welcome.createSpan({}, span => {
span.innerHTML = `🔎 Omnisearch can now index .docx and .xlsx documents. Don't forget to update Text Extractor and enable the toggle in Omnisearch settings.`
})
new Notice(welcome, 20_000)
}
settings.welcomeMessage = code
this.saveData(settings)
await this.saveData(settings)
}
async onunload(): Promise<void> {

View File

@@ -1,59 +1,18 @@
import MiniSearch, { type Options, type SearchResult } from 'minisearch'
import type { DocumentRef, IndexedDocument, ResultNote } from '../globals'
import {
BRACKETS_AND_SPACE,
chsRegex,
getChsSegmenter,
SPACE_OR_PUNCTUATION,
} from '../globals'
import { settings } from '../settings'
import {
chunkArray,
logDebug,
removeDiacritics,
splitCamelCase,
splitHyphens,
stripMarkdownCharacters,
} from '../tools/utils'
import { chunkArray, logDebug, removeDiacritics } from '../tools/utils'
import { Notice } from 'obsidian'
import type { Query } from './query'
import { cacheManager } from '../cache-manager'
import { sortBy } from 'lodash-es'
import { getMatches, stringsToRegex } from 'src/tools/text-processing'
const tokenize = (text: string): string[] => {
const words = text.split(BRACKETS_AND_SPACE)
let tokens = text.split(SPACE_OR_PUNCTUATION)
// Split hyphenated tokens
tokens = [...tokens, ...tokens.flatMap(splitHyphens)]
// Split camelCase tokens into "camel" and "case
tokens = [...tokens, ...tokens.flatMap(splitCamelCase)]
// Add whole words (aka "not tokens")
tokens = [...tokens, ...words]
// When enabled, we only use the chsSegmenter,
// and not the other custom tokenizers
const chsSegmenter = getChsSegmenter()
if (chsSegmenter) {
const chs = tokens.flatMap(word =>
chsRegex.test(word) ? chsSegmenter.cut(word) : [word]
)
tokens = [...tokens, ...chs]
}
// Remove duplicates
tokens = [...new Set(tokens)]
return tokens
}
import { tokenizeForIndexing, tokenizeForSearch } from './tokenizer'
export class Omnisearch {
public static readonly options: Options<IndexedDocument> = {
tokenize,
tokenize: tokenizeForIndexing,
extractField: (doc, fieldName) => {
if (fieldName === 'directory') {
// return path without the filename
@@ -87,6 +46,7 @@ export class Omnisearch {
},
}
private minisearch: MiniSearch
/** Map<path, mtime> */
private indexedDocuments: Map<string, number> = new Map()
// private previousResults: SearchResult[] = []
// private previousQuery: Query | null = null
@@ -212,14 +172,15 @@ export class Omnisearch {
break
}
let results = this.minisearch.search(query.segmentsToStr(), {
const searchTokens = tokenizeForSearch(query.segmentsToStr())
logDebug(JSON.stringify(searchTokens, null, 1))
let results = this.minisearch.search(searchTokens, {
prefix: term => term.length >= options.prefixLength,
// length <= 3: no fuzziness
// length <= 5: fuzziness of 10%
// length > 5: fuzziness of 20%
fuzzy: term =>
term.length <= 3 ? 0 : term.length <= 5 ? fuzziness / 2 : fuzziness,
combineWith: 'AND',
boost: {
basename: settings.weightBasename,
directory: settings.weightDirectory,
@@ -321,10 +282,10 @@ export class Omnisearch {
results = results.filter(r => {
const document = documents.find(d => d.path === r.id)
const title = document?.path.toLowerCase() ?? ''
const content = stripMarkdownCharacters(
document?.content ?? ''
).toLowerCase()
return exactTerms.every(q => content.includes(q) || title.includes(q))
const content = (document?.cleanedContent ?? '').toLowerCase()
return exactTerms.every(
q => content.includes(q) || removeDiacritics(title).includes(q)
)
})
}
@@ -333,7 +294,7 @@ export class Omnisearch {
if (exclusions.length) {
logDebug('Filtering with exclusions')
results = results.filter(r => {
const content = stripMarkdownCharacters(
const content = (
documents.find(d => d.path === r.id)?.content ?? ''
).toLowerCase()
return exclusions.every(q => !content.includes(q))
@@ -402,7 +363,7 @@ export class Omnisearch {
const foundWords = [
// Matching terms from the result,
// do not necessarily match the query
...Object.keys(result.match),
...result.terms,
// Quoted expressions
...query.getExactTerms(),

View File

@@ -46,6 +46,15 @@ export class Query {
}
this.query = parsed
// Extract keywords starting with a dot...
const ext = this.query.text
.filter(o => o.startsWith('.'))
.map(o => o.slice(1))
// add them to the ext field...
this.query.ext = [...new Set([...ext, ...(this.query.ext ?? [])])]
// and remove them from the text field
this.query.text = this.query.text.filter(o => !o.startsWith('.'))
// Get strings in quotes, and remove the quotes
this.#inQuotes =
text.match(/"([^"]+)"/g)?.map(o => o.replace(/"/g, '')) ?? []
@@ -89,4 +98,13 @@ export class Query {
),
]
}
public getBestStringForExcerpt(): string {
// If we have quoted expressions, return the longest one
if (this.#inQuotes.length) {
return this.#inQuotes.sort((a, b) => b.length - a.length)[0] ?? ''
}
// Otherwise, just return the query as is
return this.segmentsToStr()
}
}

79
src/search/tokenizer.ts Normal file
View File

@@ -0,0 +1,79 @@
import type { QueryCombination } from 'minisearch'
import {
BRACKETS_AND_SPACE,
SPACE_OR_PUNCTUATION,
chsRegex,
getChsSegmenter,
} from 'src/globals'
import { logDebug, splitCamelCase, splitHyphens } from 'src/tools/utils'
function tokenizeWords(text: string): string[] {
return text.split(BRACKETS_AND_SPACE)
}
function tokenizeTokens(text: string): string[] {
return text.split(SPACE_OR_PUNCTUATION)
}
/**
* Tokenization for indexing will possibly return more tokens than the original text.
* This is because we combine different methods of tokenization to get the best results.
* @param text
* @returns
*/
export function tokenizeForIndexing(text: string): string[] {
const words = tokenizeWords(text)
let tokens = tokenizeTokens(text)
// Split hyphenated tokens
tokens = [...tokens, ...tokens.flatMap(splitHyphens)]
// Split camelCase tokens into "camel" and "case
tokens = [...tokens, ...tokens.flatMap(splitCamelCase)]
// Add whole words (aka "not tokens")
tokens = [...tokens, ...words]
const chsSegmenter = getChsSegmenter()
if (chsSegmenter) {
const chs = tokens.flatMap(word =>
chsRegex.test(word) ? chsSegmenter.cut(word) : [word]
)
tokens = [...tokens, ...chs]
}
// Remove duplicates
tokens = [...new Set(tokens)]
return tokens
}
/**
* Search tokenization will use the same tokenization methods as indexing,
* but will combine each group with "OR" operators
* @param text
* @returns
*/
export function tokenizeForSearch(text: string): QueryCombination {
const tokens = tokenizeTokens(text)
let chs: string[] = []
const chsSegmenter = getChsSegmenter()
if (chsSegmenter) {
chs = tokens.flatMap(word =>
chsRegex.test(word) ? chsSegmenter.cut(word) : [word]
)
}
return {
combineWith: 'OR',
queries: [
{ combineWith: 'AND', queries: tokens },
{ combineWith: 'AND', queries: tokenizeWords(text) },
{ combineWith: 'AND', queries: tokens.flatMap(splitHyphens) },
{ combineWith: 'AND', queries: tokens.flatMap(splitCamelCase) },
{ combineWith: 'AND', queries: chs },
],
}
}

View File

@@ -37,6 +37,9 @@ export interface OmnisearchSettings extends WeightingSettings {
PDFIndexing: boolean
/** Enable Images indexing */
imagesIndexing: boolean
/** Enable Office documents indexing */
officeIndexing: boolean
/** Enable indexing of unknown files */
unsupportedFilesIndexing: 'yes' | 'no' | 'default'
/** Activate the small 🔍 button on Obsidian's ribbon */
@@ -99,7 +102,7 @@ export class SettingsTab extends PluginSettingTab {
// Sponsor link - Thank you!
const divSponsor = containerEl.createDiv()
divSponsor.innerHTML = `
<iframe src="https://github.com/sponsors/scambier/button" title="Sponsor scambier" height="35" width="116" style="border: 0;"></iframe>
<iframe sandbox="allow-top-navigation-by-user-activation" src="https://github.com/sponsors/scambier/button" title="Sponsor scambier" height="35" width="116" style="border: 0;"></iframe>
<a href='https://ko-fi.com/B0B6LQ2C' target='_blank'><img height='36' style='border:0px;height:36px;' src='https://cdn.ko-fi.com/cdn/kofi2.png?v=3' border='0' alt='Buy Me a Coffee at ko-fi.com' /></a>
`
@@ -158,11 +161,30 @@ export class SettingsTab extends PluginSettingTab {
)
.setDisabled(!getTextExtractor())
// Office Documents Indexing
const indexOfficesDesc = new DocumentFragment()
indexOfficesDesc.createSpan({}, span => {
span.innerHTML = `Omnisearch will use Text Extractor to index the content of your office documents (currently <pre style="display:inline">.docx</pre> and <pre style="display:inline">.xlsx</pre>)`
})
new Setting(containerEl)
.setName(
`Documents content indexing ${getTextExtractor() ? '' : '⚠️ Disabled'}`
)
.setDesc(indexOfficesDesc)
.addToggle(toggle =>
toggle.setValue(settings.officeIndexing).onChange(async v => {
await database.clearCache()
settings.officeIndexing = v
await saveSettings(this.plugin)
})
)
.setDisabled(!getTextExtractor())
// Index filenames of unsupported files
const indexUnsupportedDesc = new DocumentFragment()
indexUnsupportedDesc.createSpan({}, span => {
span.innerHTML = `
Omnisearch can index file<strong>names</strong> of "unsupported" files, such as e.g. <pre style="display:inline">.mp4</pre>, <pre style="display:inline">.xlsx</pre>,
Omnisearch can index file<strong>names</strong> of "unsupported" files, such as e.g. <pre style="display:inline">.mp4</pre>
or non-extracted PDFs & images.<br/>
"Obsidian setting" will respect the value of "Files & Links > Detect all file extensions"`
})
@@ -185,7 +207,7 @@ export class SettingsTab extends PluginSettingTab {
indexedFileTypesDesc.createSpan({}, span => {
span.innerHTML = `In addition to standard <code>md</code> files, Omnisearch can also index other <strong style="color: var(--text-accent)">PLAINTEXT</strong> files.<br/>
Add extensions separated by a space, without the dot. Example: "<code>txt org csv</code>".<br />
⚠️ <span style="color: var(--text-accent)">Using extensions of non-plaintext files (like .docx or .pptx) WILL cause crashes,
⚠️ <span style="color: var(--text-accent)">Using extensions of non-plaintext files (like .pptx) WILL cause crashes,
because Omnisearch will try to index their content.</span>`
})
new Setting(containerEl)
@@ -444,24 +466,6 @@ export class SettingsTab extends PluginSettingTab {
//#endregion Results Weighting
//#region Debugging
new Setting(containerEl).setName('Debugging').setHeading()
new Setting(containerEl)
.setName('Enable verbose logging')
.setDesc(
"Adds a LOT of logs for debugging purposes. Don't forget to disable it."
)
.addToggle(toggle =>
toggle.setValue(settings.verboseLogging).onChange(async v => {
settings.verboseLogging = v
await saveSettings(this.plugin)
})
)
//#endregion Debugging
//#region HTTP Server
if (!Platform.isMobile) {
@@ -521,6 +525,24 @@ export class SettingsTab extends PluginSettingTab {
//#endregion HTTP Server
//#region Debugging
new Setting(containerEl).setName('Debugging').setHeading()
new Setting(containerEl)
.setName('Enable verbose logging')
.setDesc(
"Adds a LOT of logs for debugging purposes. Don't forget to disable it."
)
.addToggle(toggle =>
toggle.setValue(settings.verboseLogging).onChange(async v => {
settings.verboseLogging = v
await saveSettings(this.plugin)
})
)
//#endregion Debugging
//#region Danger Zone
new Setting(containerEl).setName('Danger Zone').setHeading()
@@ -602,6 +624,7 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = {
ignoreDiacritics: true,
indexedFileTypes: [] as string[],
PDFIndexing: false,
officeIndexing: false,
imagesIndexing: false,
unsupportedFilesIndexing: 'no',
splitCamelCase: false,

View File

@@ -63,7 +63,7 @@ export function getServer() {
close() {
server.close()
console.log(`Omnisearch - Terminated HTTP server`)
if (settings.httpApiNotice) {
if (settings.httpApiEnabled && settings.httpApiNotice) {
new Notice(`Omnisearch - Terminated HTTP server`)
}
},

View File

@@ -4,12 +4,9 @@ import { stringsToRegex } from './text-processing'
export async function openNote(
item: ResultNote,
offset = 0,
newPane = false
): Promise<void> {
const reg = stringsToRegex(item.foundWords)
reg.exec(item.content)
const offset = reg.lastIndex
// Check if the note is already open,
// to avoid opening it twice if the first one is pinned
let alreadyOpenAndPinned = false

View File

@@ -14,13 +14,6 @@ import type { Query } from 'src/search/query'
import { Notice } from 'obsidian'
import { escapeRegExp } from 'lodash-es'
export function highlighterGroups(_substring: string, ...args: any[]) {
// args[0] is the single char preceding args[1], which is the word we want to highlight
if (!!args[1].trim())
return `<span>${args[0]}</span><span class="${highlightClass}">${args[1]}</span>`
return '&lt;no content&gt;'
}
/**
* Wraps the matches in the text with a <span> element and a highlight class
* @param text
@@ -134,18 +127,18 @@ export function getMatches(
.substring(matchStartIndex, matchEndIndex)
.trim()
if (originalMatch && match.index >= 0) {
matches.push({ match: originalMatch, offset: match.index + 1 })
matches.push({ match: originalMatch, offset: match.index })
}
}
// If the query is more than 1 token and can be found "as is" in the text, put this match first
if (query && query.query.text.length > 1) {
const best = text.indexOf(query.segmentsToStr())
if (query && (query.query.text.length > 1 || query.getExactTerms().length > 0)) {
const best = text.indexOf(query.getBestStringForExcerpt())
if (best > -1 && matches.find(m => m.offset === best)) {
matches = matches.filter(m => m.offset !== best)
matches.unshift({
offset: best,
match: query.segmentsToStr(),
match: query.getBestStringForExcerpt(),
})
}
}

View File

@@ -9,10 +9,6 @@ import { canIndexUnsupportedFiles, settings } from '../settings'
import { type BinaryLike, createHash } from 'crypto'
import { md5 } from 'pure-md5'
// export function highlighter(str: string): string {
// return `<span class="${highlightClass}">${str}</span>`
// }
export function pathWithoutFilename(path: string): string {
const split = path.split('/')
split.pop()
@@ -174,6 +170,11 @@ export function isFilePDF(path: string): boolean {
return getExtension(path) === 'pdf'
}
export function isFileOffice(path: string): boolean {
const ext = getExtension(path)
return ext === 'docx' || ext === 'xlsx'
}
export function isFilePlaintext(path: string): boolean {
return [...settings.indexedFileTypes, 'md'].some(t => path.endsWith(`.${t}`))
}

View File

@@ -122,5 +122,12 @@
"1.18.1": "1.3.0",
"1.19.0-beta.1": "1.3.0",
"1.19.0": "1.3.0",
"1.20.0-beta.1": "1.3.0"
"1.20.0-beta.1": "1.3.0",
"1.20.0": "1.3.0",
"1.20.1": "1.3.0",
"1.20.2": "1.3.0",
"1.20.3": "1.3.0",
"1.20.4": "1.3.0",
"1.21.0": "1.3.0",
"1.21.1": "1.3.0"
}