Merge branch 'develop'

This commit is contained in:
Simon Cambier
2023-03-18 11:11:21 +01:00
9 changed files with 144 additions and 52 deletions

View File

@@ -1,6 +1,6 @@
{ {
"name": "scambier.obsidian-search", "name": "scambier.obsidian-search",
"version": "1.12.3", "version": "1.13.0-beta.2",
"description": "A search engine for Obsidian", "description": "A search engine for Obsidian",
"main": "dist/main.js", "main": "dist/main.js",
"scripts": { "scripts": {

View File

@@ -11,6 +11,7 @@ import {
getTagsFromMetadata, getTagsFromMetadata,
isFileCanvas, isFileCanvas,
isFilePlaintext, isFilePlaintext,
logDebug,
makeMD5, makeMD5,
removeDiacritics, removeDiacritics,
} from './tools/utils' } from './tools/utils'
@@ -150,6 +151,7 @@ class CacheManager {
if (this.documents.has(path)) { if (this.documents.has(path)) {
return this.documents.get(path)! return this.documents.get(path)!
} }
logDebug('Generating IndexedDocument from', path)
await this.addToLiveCache(path) await this.addToLiveCache(path)
return this.documents.get(path)! return this.documents.get(path)!
} }

View File

@@ -15,6 +15,7 @@
getCtrlKeyLabel, getCtrlKeyLabel,
getExtension, getExtension,
isFilePDF, isFilePDF,
logDebug,
loopIndex, loopIndex,
} from 'src/tools/utils' } from 'src/tools/utils'
import { import {
@@ -27,7 +28,6 @@
import * as NotesIndex from '../notes-index' import * as NotesIndex from '../notes-index'
import { cacheManager } from '../cache-manager' import { cacheManager } from '../cache-manager'
import { searchEngine } from 'src/search/omnisearch' import { searchEngine } from 'src/search/omnisearch'
import CancelablePromise, { cancelable } from 'cancelable-promise'
export let modal: OmnisearchVaultModal export let modal: OmnisearchVaultModal
export let previousQuery: string | undefined export let previousQuery: string | undefined
@@ -40,28 +40,13 @@
let searching = true let searching = true
let refInput: InputSearch | undefined let refInput: InputSearch | undefined
let pWaitingResults: CancelablePromise | null = null
$: selectedNote = resultNotes[selectedIndex] $: selectedNote = resultNotes[selectedIndex]
$: searchQuery = searchQuery ?? previousQuery $: searchQuery = searchQuery ?? previousQuery
$: if (searchQuery) { $: if (searchQuery) {
if (pWaitingResults) {
pWaitingResults.cancel()
pWaitingResults = null
}
searching = true searching = true
pWaitingResults = cancelable( updateResults().then(() => {
new Promise((resolve, reject) => {
updateResults()
.then(() => {
searching = false searching = false
resolve(null)
}) })
.catch(e => {
reject(e)
})
})
)
} else { } else {
searching = false searching = false
resultNotes = [] resultNotes = []
@@ -130,9 +115,7 @@
async function updateResults() { async function updateResults() {
query = new Query(searchQuery) query = new Query(searchQuery)
resultNotes = (await searchEngine.getSuggestions(query)).sort( resultNotes = await searchEngine.getSuggestions(query)
(a, b) => b.score - a.score
)
selectedIndex = 0 selectedIndex = 0
await scrollIntoView() await scrollIntoView()
} }

View File

@@ -14,7 +14,7 @@ export const excerptBefore = 100
export const excerptAfter = 300 export const excerptAfter = 300
export const highlightClass = `suggestion-highlight omnisearch-highlight ${ export const highlightClass = `suggestion-highlight omnisearch-highlight ${
settings.hightlight ? 'omnisearch-default-highlight' : '' settings.highlight ? 'omnisearch-default-highlight' : ''
}` }`
export const eventBus = new EventBus() export const eventBus = new EventBus()

View File

@@ -18,7 +18,7 @@ import {
isCacheEnabled, isCacheEnabled,
} from './globals' } from './globals'
import api, { notifyOnIndexed } from './tools/api' import api, { notifyOnIndexed } from './tools/api'
import { isFileIndexable } from './tools/utils' import { isFileIndexable, logDebug } from './tools/utils'
import { database, OmnisearchCache } from './database' import { database, OmnisearchCache } from './database'
import * as NotesIndex from './notes-index' import * as NotesIndex from './notes-index'
import { searchEngine } from './search/omnisearch' import { searchEngine } from './search/omnisearch'
@@ -69,6 +69,7 @@ export default class OmnisearchPlugin extends Plugin {
this.registerEvent( this.registerEvent(
this.app.vault.on('create', file => { this.app.vault.on('create', file => {
if (isFileIndexable(file.path)) { if (isFileIndexable(file.path)) {
logDebug('Indexing new file', file.path)
// await cacheManager.addToLiveCache(file.path) // await cacheManager.addToLiveCache(file.path)
searchEngine.addFromPaths([file.path]) searchEngine.addFromPaths([file.path])
} }
@@ -76,6 +77,7 @@ export default class OmnisearchPlugin extends Plugin {
) )
this.registerEvent( this.registerEvent(
this.app.vault.on('delete', file => { this.app.vault.on('delete', file => {
logDebug('Removing file', file.path)
cacheManager.removeFromLiveCache(file.path) cacheManager.removeFromLiveCache(file.path)
searchEngine.removeFromPaths([file.path]) searchEngine.removeFromPaths([file.path])
}) })
@@ -83,6 +85,7 @@ export default class OmnisearchPlugin extends Plugin {
this.registerEvent( this.registerEvent(
this.app.vault.on('modify', async file => { this.app.vault.on('modify', async file => {
if (isFileIndexable(file.path)) { if (isFileIndexable(file.path)) {
logDebug('Updating file', file.path)
await cacheManager.addToLiveCache(file.path) await cacheManager.addToLiveCache(file.path)
NotesIndex.markNoteForReindex(file) NotesIndex.markNoteForReindex(file)
} }
@@ -91,6 +94,7 @@ export default class OmnisearchPlugin extends Plugin {
this.registerEvent( this.registerEvent(
this.app.vault.on('rename', async (file, oldPath) => { this.app.vault.on('rename', async (file, oldPath) => {
if (isFileIndexable(file.path)) { if (isFileIndexable(file.path)) {
logDebug('Renaming file', file.path)
cacheManager.removeFromLiveCache(oldPath) cacheManager.removeFromLiveCache(oldPath)
cacheManager.addToLiveCache(file.path) cacheManager.addToLiveCache(file.path)
searchEngine.removeFromPaths([oldPath]) searchEngine.removeFromPaths([oldPath])

View File

@@ -9,9 +9,12 @@ import { chsRegex, getChsSegmenter, SPACE_OR_PUNCTUATION } from '../globals'
import { settings } from '../settings' import { settings } from '../settings'
import { import {
chunkArray, chunkArray,
logDebug,
removeDiacritics, removeDiacritics,
splitCamelCase,
stringsToRegex, stringsToRegex,
stripMarkdownCharacters, stripMarkdownCharacters,
warnDebug,
} from '../tools/utils' } from '../tools/utils'
import { Notice } from 'obsidian' import { Notice } from 'obsidian'
import type { Query } from './query' import type { Query } from './query'
@@ -25,7 +28,11 @@ const tokenize = (text: string): string[] => {
return tokens.flatMap(word => return tokens.flatMap(word =>
chsRegex.test(word) ? chsSegmenter.cut(word) : [word] chsRegex.test(word) ? chsSegmenter.cut(word) : [word]
) )
} else return tokens } else {
if (settings.splitCamelCase)
return [...tokens, ...tokens.flatMap(splitCamelCase)]
return tokens
}
} }
export class Omnisearch { export class Omnisearch {
@@ -117,11 +124,13 @@ export class Omnisearch {
* @param paths * @param paths
*/ */
public async addFromPaths(paths: string[]): Promise<void> { public async addFromPaths(paths: string[]): Promise<void> {
logDebug('Adding files', paths)
let documents = ( let documents = (
await Promise.all( await Promise.all(
paths.map(async path => await cacheManager.getDocument(path)) paths.map(async path => await cacheManager.getDocument(path))
) )
).filter(d => !!d?.path) ).filter(d => !!d?.path)
logDebug('Sorting documents to first index markdown')
// Index markdown files first // Index markdown files first
documents = sortBy(documents, d => (d.path.endsWith('.md') ? 0 : 1)) documents = sortBy(documents, d => (d.path.endsWith('.md') ? 0 : 1))
@@ -133,6 +142,7 @@ export class Omnisearch {
// Split the documents in smaller chunks to add them to minisearch // Split the documents in smaller chunks to add them to minisearch
const chunkedDocs = chunkArray(documents, 500) const chunkedDocs = chunkArray(documents, 500)
for (const docs of chunkedDocs) { for (const docs of chunkedDocs) {
logDebug('Indexing into search engine', docs)
// Update the list of indexed docs // Update the list of indexed docs
docs.forEach(doc => this.indexedDocuments.set(doc.path, doc.mtime)) docs.forEach(doc => this.indexedDocuments.set(doc.path, doc.mtime))
@@ -170,6 +180,8 @@ export class Omnisearch {
return [] return []
} }
logDebug('Starting search for', query)
let results = this.minisearch.search(query.segmentsToStr(), { let results = this.minisearch.search(query.segmentsToStr(), {
prefix: term => term.length >= options.prefixLength, prefix: term => term.length >= options.prefixLength,
// length <= 3: no fuzziness // length <= 3: no fuzziness
@@ -187,6 +199,8 @@ export class Omnisearch {
}, },
}) })
logDebug('Found', results.length, 'results')
// Filter query results to only keep files that match query.extensions (if any) // Filter query results to only keep files that match query.extensions (if any)
if (query.extensions.length) { if (query.extensions.length) {
results = results.filter(r => { results = results.filter(r => {
@@ -242,7 +256,10 @@ export class Omnisearch {
} }
} }
results = results.slice(0, 50) logDebug('Sorting and limiting results')
// Sort results and keep the 50 best
results = results.sort((a, b) => b.score - a.score).slice(0, 50)
const documents = await Promise.all( const documents = await Promise.all(
results.map(async result => await cacheManager.getDocument(result.id)) results.map(async result => await cacheManager.getDocument(result.id))
@@ -251,6 +268,7 @@ export class Omnisearch {
// If the search query contains quotes, filter out results that don't have the exact match // If the search query contains quotes, filter out results that don't have the exact match
const exactTerms = query.getExactTerms() const exactTerms = query.getExactTerms()
if (exactTerms.length) { if (exactTerms.length) {
logDebug('Filtering with quoted terms')
results = results.filter(r => { results = results.filter(r => {
const document = documents.find(d => d.path === r.id) const document = documents.find(d => d.path === r.id)
const title = document?.path.toLowerCase() ?? '' const title = document?.path.toLowerCase() ?? ''
@@ -264,6 +282,7 @@ export class Omnisearch {
// If the search query contains exclude terms, filter out results that have them // If the search query contains exclude terms, filter out results that have them
const exclusions = query.exclusions const exclusions = query.exclusions
if (exclusions.length) { if (exclusions.length) {
logDebug('Filtering with exclusions')
results = results.filter(r => { results = results.filter(r => {
const content = stripMarkdownCharacters( const content = stripMarkdownCharacters(
documents.find(d => d.path === r.id)?.content ?? '' documents.find(d => d.path === r.id)?.content ?? ''
@@ -271,6 +290,8 @@ export class Omnisearch {
return exclusions.every(q => !content.includes(q.value)) return exclusions.every(q => !content.includes(q.value))
}) })
} }
logDebug('Deduping')
// FIXME: // FIXME:
// Dedupe results - clutch for https://github.com/scambier/obsidian-omnisearch/issues/129 // Dedupe results - clutch for https://github.com/scambier/obsidian-omnisearch/issues/129
results = results.filter( results = results.filter(
@@ -284,11 +305,16 @@ export class Omnisearch {
} }
public getMatches(text: string, reg: RegExp, query: Query): SearchMatch[] { public getMatches(text: string, reg: RegExp, query: Query): SearchMatch[] {
const startTime = new Date().getTime()
let match: RegExpExecArray | null = null let match: RegExpExecArray | null = null
const matches: SearchMatch[] = [] const matches: SearchMatch[] = []
let count = 0 let count = 0
while ((match = reg.exec(text)) !== null) { while ((match = reg.exec(text)) !== null) {
if (++count >= 100) break // Avoid infinite loops, stop looking after 100 matches // Avoid infinite loops, stop looking after 100 matches or if we're taking too much time
if (++count >= 100 || new Date().getTime() - startTime > 50) {
warnDebug('Stopped getMatches at', count, 'results')
break
}
const m = match[0] const m = match[0]
if (m) matches.push({ match: m, offset: match.index }) if (m) matches.push({ match: m, offset: match.index })
} }
@@ -331,17 +357,13 @@ export class Omnisearch {
}) })
} }
// Extract tags from the query
const tags = query.segments
.filter(s => s.value.startsWith('#'))
.map(s => s.value)
const documents = await Promise.all( const documents = await Promise.all(
results.map(async result => await cacheManager.getDocument(result.id)) results.map(async result => await cacheManager.getDocument(result.id))
) )
// Map the raw results to get usable suggestions // Map the raw results to get usable suggestions
const resultNotes = results.map(result => { const resultNotes = results.map(result => {
logDebug('Locating matches for', result.id)
let note = documents.find(d => d.path === result.id) let note = documents.find(d => d.path === result.id)
if (!note) { if (!note) {
// throw new Error(`Omnisearch - Note "${result.id}" not indexed`) // throw new Error(`Omnisearch - Note "${result.id}" not indexed`)
@@ -357,6 +379,12 @@ export class Omnisearch {
query.segments.forEach(s => { query.segments.forEach(s => {
s.value = s.value.replace(/^#/, '') s.value = s.value.replace(/^#/, '')
}) })
// Extract tags from the query
const tags = query.segments
.filter(s => s.value.startsWith('#'))
.map(s => s.value)
// Clean search matches that match quoted expressions, // Clean search matches that match quoted expressions,
// and inject those expressions instead // and inject those expressions instead
const foundWords = [ const foundWords = [
@@ -370,13 +398,15 @@ export class Omnisearch {
// Tags, starting with # // Tags, starting with #
...tags, ...tags,
].filter(w => w.length > 1 || /\p{Emoji}/u.test(w)) ].filter(w => w.length > 1 || /\p{Emoji}/u.test(w))
logDebug('Matching tokens:', foundWords)
// console.log(foundWords) logDebug('Getting matches locations...')
const matches = this.getMatches( const matches = this.getMatches(
note.content, note.content,
stringsToRegex(foundWords), stringsToRegex(foundWords),
query query
) )
logDebug('Matches:', matches)
const resultNote: ResultNote = { const resultNote: ResultNote = {
score: result.score, score: result.score,
foundWords, foundWords,

View File

@@ -45,7 +45,9 @@ export interface OmnisearchSettings extends WeightingSettings {
welcomeMessage: string welcomeMessage: string
/** If a query returns 0 result, try again with more relax conditions */ /** If a query returns 0 result, try again with more relax conditions */
simpleSearch: boolean simpleSearch: boolean
hightlight: boolean highlight: boolean
splitCamelCase: boolean
verboseLogging: boolean
} }
/** /**
@@ -204,6 +206,25 @@ export class SettingsTab extends PluginSettingTab {
}) })
) )
// Split CamelCaseWords
const camelCaseDesc = new DocumentFragment()
camelCaseDesc.createSpan({}, span => {
span.innerHTML = `Enable this if you want to be able to search for CamelCaseWords as separate words.<br/>
⚠️ <span style="color: var(--text-accent)">Changing this setting will clear the cache.</span><br>
<strong style="color: var(--text-accent)">Needs a restart to fully take effect.</strong>
`
})
new Setting(containerEl)
.setName('Split CamelCaseWords')
.setDesc(camelCaseDesc)
.addToggle(toggle =>
toggle.setValue(settings.splitCamelCase).onChange(async v => {
await database.clearCache()
settings.splitCamelCase = v
await saveSettings(this.plugin)
})
)
// Simpler search // Simpler search
new Setting(containerEl) new Setting(containerEl)
.setName('Simpler search') .setName('Simpler search')
@@ -301,8 +322,8 @@ export class SettingsTab extends PluginSettingTab {
'Will highlight matching results when enabled. See README for more customization options.' 'Will highlight matching results when enabled. See README for more customization options.'
) )
.addToggle(toggle => .addToggle(toggle =>
toggle.setValue(settings.hightlight).onChange(async v => { toggle.setValue(settings.highlight).onChange(async v => {
settings.hightlight = v settings.highlight = v
await saveSettings(this.plugin) await saveSettings(this.plugin)
}) })
) )
@@ -337,6 +358,22 @@ export class SettingsTab extends PluginSettingTab {
//#endregion Results Weighting //#endregion Results Weighting
//#region Debugging
new Setting(containerEl).setName('Debugging').setHeading()
new Setting(containerEl)
.setName('Enable verbose logging')
.setDesc('Adds a LOT of logs for debugging purposes. Don\'t forget to disable it.')
.addToggle(toggle =>
toggle.setValue(settings.verboseLogging).onChange(async v => {
settings.verboseLogging = v
await saveSettings(this.plugin)
})
)
//#endregion Debugginh
//#region Danger Zone //#region Danger Zone
if (isCacheEnabled()) { if (isCacheEnabled()) {
new Setting(containerEl).setName('Danger Zone').setHeading() new Setting(containerEl).setName('Danger Zone').setHeading()
@@ -379,12 +416,13 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = {
indexedFileTypes: [] as string[], indexedFileTypes: [] as string[],
PDFIndexing: false, PDFIndexing: false,
imagesIndexing: false, imagesIndexing: false,
splitCamelCase: false,
ribbonIcon: true, ribbonIcon: true,
showExcerpt: true, showExcerpt: true,
renderLineReturnInExcerpts: true, renderLineReturnInExcerpts: true,
showCreateButton: false, showCreateButton: false,
hightlight: true, highlight: true,
showPreviousQueryResults: true, showPreviousQueryResults: true,
simpleSearch: false, simpleSearch: false,
@@ -395,6 +433,7 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = {
weightH3: 1.1, weightH3: 1.1,
welcomeMessage: '', welcomeMessage: '',
verboseLogging: false,
} as const } as const
export let settings = Object.assign({}, DEFAULT_SETTINGS) as OmnisearchSettings export let settings = Object.assign({}, DEFAULT_SETTINGS) as OmnisearchSettings

View File

@@ -91,14 +91,17 @@ export function getAllIndices(text: string, regex: RegExp): SearchMatch[] {
*/ */
export function stringsToRegex(strings: string[]): RegExp { export function stringsToRegex(strings: string[]): RegExp {
if (!strings.length) return /^$/g if (!strings.length) return /^$/g
// Default word split is not applied if the user uses the cm-chs-patch plugin
const joined = const joined =
'(' + '(' +
(getChsSegmenter() ? '' : `^|${SPACE_OR_PUNCTUATION.source}`) + // Default word split is not applied if the user uses the cm-chs-patch plugin
(getChsSegmenter()
? ''
: // Split on start of line, spaces, punctuation, or capital letters (for camelCase)
settings.splitCamelCase
? `^|${SPACE_OR_PUNCTUATION.source}|[A-Z]`
: `^|${SPACE_OR_PUNCTUATION.source}`) +
')' + ')' +
'(' + `(${strings.map(s => escapeRegex(s)).join('|')})`
strings.map(s => escapeRegex(s)).join('|') +
')'
const reg = new RegExp(`${joined}`, 'giu') const reg = new RegExp(`${joined}`, 'giu')
return reg return reg
@@ -313,3 +316,27 @@ export function chunkArray<T>(arr: T[], len: number): T[][] {
return chunks return chunks
} }
/**
* Converts a 'fooBarBAZLorem' into ['foo', 'Bar', 'BAZ', 'Lorem]
* @param text
*/
export function splitCamelCase(text: string): string[] {
return text.replace(/([a-z](?=[A-Z]))/g, '$1 ').split(' ')
}
export function logDebug(...args: any[]): void {
printDebug(console.log, ...args)
}
export function warnDebug(...args: any[]): void {
printDebug(console.warn, ...args)
}
function printDebug(fn: (...args: any[]) => any, ...args: any[]): void {
if (settings.verboseLogging) {
const t = new Date()
const ts = `${t.getMinutes()}:${t.getSeconds()}:${t.getMilliseconds()}`
fn(...['Omnisearch -', ts + ' -', ...args])
}
}

View File

@@ -6,6 +6,8 @@
* MIT Licensed * MIT Licensed
*/ */
import { warnDebug } from "../tools/utils";
interface SearchParserOptions { interface SearchParserOptions {
offsets?: boolean offsets?: boolean
tokenize: true tokenize: true
@@ -43,7 +45,7 @@ interface SearchParserResult extends ISearchParserDictionary {
export function parseQuery( export function parseQuery(
string: string, string: string,
options: SearchParserOptions options: SearchParserOptions,
): SearchParserResult { ): SearchParserResult {
// Set a default options object when none is provided // Set a default options object when none is provided
if (!options) { if (!options) {
@@ -74,9 +76,14 @@ export function parseQuery(
const regex = const regex =
/(\S+:'(?:[^'\\]|\\.)*')|(\S+:"(?:[^"\\]|\\.)*")|(-?"(?:[^"\\]|\\.)*")|(-?'(?:[^'\\]|\\.)*')|\S+|\S+:\S+/g /(\S+:'(?:[^'\\]|\\.)*')|(\S+:"(?:[^"\\]|\\.)*")|(-?"(?:[^"\\]|\\.)*")|(-?'(?:[^'\\]|\\.)*')|\S+|\S+:\S+/g
let match let match
let count = 0 // TODO: FIXME: this is a hack to avoid infinite loops let count = 0
const startTime = new Date().getTime()
while ((match = regex.exec(string)) !== null) { while ((match = regex.exec(string)) !== null) {
if (++count >= 100) break if (++count >= 100 || new Date().getTime() - startTime > 50) {
warnDebug('Stopped SearchParserResult at', count, 'results')
break
}
let term = match[0] let term = match[0]
const sepIndex = term.indexOf(':') const sepIndex = term.indexOf(':')