diff --git a/manifest-beta.json b/manifest-beta.json new file mode 100644 index 0000000..f7f61bd --- /dev/null +++ b/manifest-beta.json @@ -0,0 +1,10 @@ +{ + "id": "omnisearch", + "name": "Omnisearch", + "version": "1.0.1", + "minAppVersion": "0.14.2", + "description": "A search engine that just works", + "author": "Simon Cambier", + "authorUrl": "https://github.com/scambier/obsidian-omnisearch", + "isDesktopOnly": false +} \ No newline at end of file diff --git a/src/components/ModalInFile.svelte b/src/components/ModalInFile.svelte index cd6e047..83d35d9 100644 --- a/src/components/ModalInFile.svelte +++ b/src/components/ModalInFile.svelte @@ -42,14 +42,14 @@ onDestroy(() => { eventBus.enable("vault") }) -$: { +$: (async () => { if (searchQuery) { - note = getSuggestions(searchQuery, { singleFilePath })[0] ?? null + note = (await getSuggestions(searchQuery, { singleFilePath }))[0] ?? null lastSearch = searchQuery } selectedIndex = 0 scrollIntoView() -} +})() $: { if (note) { diff --git a/src/components/ModalVault.svelte b/src/components/ModalVault.svelte index 3e792ed..aadf23e 100644 --- a/src/components/ModalVault.svelte +++ b/src/components/ModalVault.svelte @@ -20,13 +20,10 @@ let searchQuery: string let resultNotes: ResultNote[] = [] $: selectedNote = resultNotes[selectedIndex] -$: { - if (searchQuery) { - resultNotes = getSuggestions(searchQuery) - lastSearch = searchQuery - } - selectedIndex = 0 - scrollIntoView() +$: if (searchQuery) { + updateResults() +} else { + resultNotes = [] } onMount(() => { @@ -39,6 +36,14 @@ onMount(() => { eventBus.on("vault", "arrow-down", () => moveIndex(1)) }) +async function updateResults() { + resultNotes = await getSuggestions(searchQuery) + lastSearch = searchQuery + selectedIndex = 0 + scrollIntoView() + // if (resultNotes.length) console.log(resultNotes[0]) +} + function onClick() { if (!selectedNote) return openNote(selectedNote) diff --git a/src/globals.ts b/src/globals.ts index f3067da..e7ba6a8 100644 --- a/src/globals.ts +++ b/src/globals.ts @@ -4,6 +4,7 @@ import { EventBus } from './event-bus' export const regexWikilink = /^!?\[\[(?.+?)(\|(?.+?))?\]\]/ export const regexLineSplit = /\r?\n|\r|((\.|\?|!)( |\r?\n|\r))/g export const regexYaml = /^---\s*\n(.*?)\n?^---\s?/ms +export const regexStripQuotes = /^"|"$|^'|'$/g export const excerptBefore = 100 export const excerptAfter = 180 @@ -12,8 +13,6 @@ export const highlightClass = 'suggestion-highlight omnisearch-highlight' export const eventBus = new EventBus() -// export const eventBus = new EventBus() - export type SearchNote = { path: string basename: string diff --git a/src/query.ts b/src/query.ts new file mode 100644 index 0000000..44c9fb4 --- /dev/null +++ b/src/query.ts @@ -0,0 +1,380 @@ +import { stripSurroundingQuotes } from './utils' + +type QueryToken = { + /** + * The query token string value + */ + value: string + + /** + * Was this token encased in quotes? + */ + exact: boolean +} + +/** + * This class is used to parse a query string into a structured object + */ +export class Query { + public words: QueryToken[] = [] + public exclusions: QueryToken[] = [] + + constructor(text: string) { + const tokens = parseQuery(text.toLowerCase(), { tokenize: true }) + this.exclusions = tokens.exclude.text + .map(this.formatToken) + .filter(o => !!o.value) + this.words = tokens.text.map(this.formatToken) + } + + public getWordsStr(): string { + return this.words.map(({ value }) => value).join(' ') + } + + /** + * Returns the terms that are encased in quotes + * @returns + */ + public getExactTerms(): string[] { + return this.words.filter(({ exact }) => exact).map(({ value }) => value) + } + + private formatToken(str: string): QueryToken { + const stripped = stripSurroundingQuotes(str) + return { + value: stripped, + exact: stripped !== str, + } + } +} + +/*! + * search-query-parser.js + * Original: https://github.com/nepsilon/search-query-parser + * Modified by Simon Cambier + * Copyright(c) 2014-2019 + * MIT Licensed + */ + +interface SearchParserOptions { + offsets?: boolean + tokenize: true + keywords?: string[] + ranges?: string[] + alwaysArray?: boolean +} + +interface ISearchParserDictionary { + [key: string]: any +} + +type SearchParserKeyWordOffset = { + keyword: string + value?: string +} + +type SearchParserTextOffset = { + text: string +} + +type SearchParserOffset = ( + | SearchParserKeyWordOffset + | SearchParserTextOffset +) & { + offsetStart: number + offsetEnd: number +} + +interface SearchParserResult extends ISearchParserDictionary { + text: string[] + offsets: SearchParserOffset[] + exclude: { text: string[] } +} + +function parseQuery( + string: string, + options: SearchParserOptions, +): SearchParserResult { + // Set a default options object when none is provided + if (!options) { + options = { offsets: true, tokenize: true } + } + else { + // If options offsets was't passed, set it to true + options.offsets = + typeof options.offsets === 'undefined' ? true : options.offsets + } + + if (!string) { + string = '' + } + + // Our object to store the query object + const query: SearchParserResult = { + text: [], + offsets: [], + exclude: { text: [] }, + } + // When offsets is true, create their array + if (options.offsets) { + query.offsets = [] + } + const exclusion: ISearchParserDictionary & { text: string[] } = { text: [] } + const terms = [] + // Get a list of search terms respecting single and double quotes + const regex = + /(\S+:'(?:[^'\\]|\\.)*')|(\S+:"(?:[^"\\]|\\.)*")|(-?"(?:[^"\\]|\\.)*")|(-?'(?:[^'\\]|\\.)*')|\S+|\S+:\S+/g + let match + while ((match = regex.exec(string)) !== null) { + let term = match[0] + const sepIndex = term.indexOf(':') + + // Terms that contain a `:` + if (sepIndex !== -1) { + const key = term.slice(0, sepIndex) + let val = term.slice(sepIndex + 1) + + // Strip backslashes respecting escapes + val = (val + '').replace(/\\(.?)/g, function (s, n1) { + switch (n1) { + case '\\': + return '\\' + case '0': + return '\u0000' + case '': + return '' + default: + return n1 + } + }) + terms.push({ + keyword: key, + value: val, + offsetStart: match.index, + offsetEnd: match.index + term.length, + }) + } + + // Other terms + else { + let isExcludedTerm = false + if (term[0] === '-') { + isExcludedTerm = true + term = term.slice(1) + } + + // Strip backslashes respecting escapes + term = (term + '').replace(/\\(.?)/g, function (s, n1) { + switch (n1) { + case '\\': + return '\\' + case '0': + return '\u0000' + case '': + return '' + default: + return n1 + } + }) + + if (isExcludedTerm) { + exclusion.text.push(term) + } + else { + terms.push({ + text: term, + offsetStart: match.index, + offsetEnd: match.index + term.length, + }) + } + } + } + // Reverse to ensure proper order when pop()'ing. + terms.reverse() + // For each search term + let term + while ((term = terms.pop())) { + // When just a simple term + if (term.text) { + // We add it as pure text + query.text.push(term.text) + // When offsets is true, push a new offset + if (options.offsets) { + query.offsets.push(term) + } + } + // We got an advanced search syntax + else if (term.keyword) { + let key = term.keyword + // Check if the key is a registered keyword + options.keywords = options.keywords || [] + let isKeyword = false + let isExclusion = false + if (!/^-/.test(key)) { + isKeyword = !(options.keywords.indexOf(key) === -1) + } + else if (key[0] === '-') { + const _key = key.slice(1) + isKeyword = !(options.keywords.indexOf(_key) === -1) + if (isKeyword) { + key = _key + isExclusion = true + } + } + + // Check if the key is a registered range + options.ranges = options.ranges || [] + const isRange = !(options.ranges.indexOf(key) === -1) + // When the key matches a keyword + if (isKeyword) { + // When offsets is true, push a new offset + if (options.offsets) { + query.offsets.push({ + keyword: key, + value: term.value, + offsetStart: isExclusion ? term.offsetStart + 1 : term.offsetStart, + offsetEnd: term.offsetEnd, + }) + } + + const value = term.value + // When value is a thing + if (value.length) { + // Get an array of values when several are there + const values = value.split(',') + if (isExclusion) { + if (exclusion[key]) { + // ...many times... + if (exclusion[key] instanceof Array) { + // ...and got several values this time... + if (values.length > 1) { + // ... concatenate both arrays. + exclusion[key] = exclusion[key].concat(values) + } + else { + // ... append the current single value. + exclusion[key].push(value) + } + } + // We saw that keyword only once before + else { + // Put both the current value and the new + // value in an array + exclusion[key] = [exclusion[key]] + exclusion[key].push(value) + } + } + // First time we see that keyword + else { + // ...and got several values this time... + if (values.length > 1) { + // ...add all values seen. + exclusion[key] = values + } + // Got only a single value this time + else { + // Record its value as a string + if (options.alwaysArray) { + // ...but we always return an array if option alwaysArray is true + exclusion[key] = [value] + } + else { + // Record its value as a string + exclusion[key] = value + } + } + } + } + else { + // If we already have seen that keyword... + if (query[key]) { + // ...many times... + if (query[key] instanceof Array) { + // ...and got several values this time... + if (values.length > 1) { + // ... concatenate both arrays. + query[key] = query[key].concat(values) + } + else { + // ... append the current single value. + query[key].push(value) + } + } + // We saw that keyword only once before + else { + // Put both the current value and the new + // value in an array + query[key] = [query[key]] + query[key].push(value) + } + } + // First time we see that keyword + else { + // ...and got several values this time... + if (values.length > 1) { + // ...add all values seen. + query[key] = values + } + // Got only a single value this time + else { + if (options.alwaysArray) { + // ...but we always return an array if option alwaysArray is true + query[key] = [value] + } + else { + // Record its value as a string + query[key] = value + } + } + } + } + } + } + // The key allows a range + else if (isRange) { + // When offsets is true, push a new offset + if (options.offsets) { + query.offsets.push(term) + } + + const value = term.value + // Range are separated with a dash + const rangeValues = value.split('-') + // When both end of the range are specified + // keyword:XXXX-YYYY + query[key] = {} + if (rangeValues.length === 2) { + query[key].from = rangeValues[0] + query[key].to = rangeValues[1] + } + // When pairs of ranges are specified + // keyword:XXXX-YYYY,AAAA-BBBB + // else if (!rangeValues.length % 2) { + // } + // When only getting a single value, + // or an odd number of values + else { + query[key].from = value + } + } + else { + // We add it as pure text + const text = term.keyword + ':' + term.value + query.text.push(text) + + // When offsets is true, push a new offset + if (options.offsets) { + query.offsets.push({ + text: text, + offsetStart: term.offsetStart, + offsetEnd: term.offsetEnd, + }) + } + } + } + } + + // Return forged query object + query.exclude = exclusion + return query +} diff --git a/src/search.ts b/src/search.ts index 94cb4de..78865d9 100644 --- a/src/search.ts +++ b/src/search.ts @@ -6,7 +6,14 @@ import { type ResultNote, type SearchMatch, } from './globals' -import { extractHeadingsFromCache, stringsToRegex, wait } from './utils' +import { + extractHeadingsFromCache, + splitQuotes, + stringsToRegex, + stripMarkdownCharacters, + wait, +} from './utils' +import { Query } from './query' let minisearchInstance: MiniSearch @@ -54,12 +61,12 @@ export async function initGlobalSearchIndex(): Promise { /** * Searches the index for the given query, * and returns an array of raw results - * @param query + * @param text * @returns */ -function search(query: string): SearchResult[] { - if (!query) return [] - return minisearchInstance.search(query, { +async function search(query: Query): Promise { + if (!query.getWordsStr()) return [] + let results = minisearchInstance.search(query.getWordsStr(), { prefix: true, fuzzy: term => (term.length > 4 ? 0.2 : false), combineWith: 'AND', @@ -70,6 +77,29 @@ function search(query: string): SearchResult[] { headings3: 1.1, }, }) + + // If the search query contains quotes, filter out results that don't have the exact match + const exactTerms = query.getExactTerms() + if (exactTerms.length) { + results = results.filter(r => { + const content = stripMarkdownCharacters( + indexedNotes[r.id]?.content ?? '', + ).toLowerCase() + return exactTerms.every(q => content.includes(q)) + }) + } + + // // If the search query contains exclude terms, filter out results that have them + const exclusions = query.exclusions + if (exclusions.length) { + results = results.filter(r => { + const content = stripMarkdownCharacters( + indexedNotes[r.id]?.content ?? '', + ).toLowerCase() + return exclusions.every(q => !content.includes(q.value)) + }) + } + return results } /** @@ -96,12 +126,13 @@ export function getMatches(text: string, reg: RegExp): SearchMatch[] { * @param options * @returns */ -export function getSuggestions( - query: string, +export async function getSuggestions( + queryStr: string, options?: Partial<{ singleFilePath: string | null }>, -): ResultNote[] { +): Promise { // Get the raw results - let results = search(query) + const query = new Query(queryStr) + let results = await search(query) if (!results.length) return [] // Either keep the 50 first results, @@ -121,7 +152,17 @@ export function getSuggestions( if (!note) { throw new Error(`Note "${result.id}" not indexed`) } - const words = Object.keys(result.match) + + // Clean search matches that match quoted expresins, + // and inject those expressions instead + let words = Object.keys(result.match) + const quoted = splitQuotes(query.getWordsStr()) + for (const quote of quoted) { + for (const q of quote.toLowerCase()) { + words = words.filter(w => !w.toLowerCase().startsWith(q)) + } + words.push(quote) + } const matches = getMatches(note.content, stringsToRegex(words)) const resultNote: ResultNote = { score: result.score, diff --git a/src/utils.ts b/src/utils.ts index f152f9a..eb16307 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -5,6 +5,7 @@ import { highlightClass, isSearchMatch, regexLineSplit, + regexStripQuotes, regexYaml, } from './globals' import type { SearchMatch } from './globals' @@ -89,7 +90,7 @@ export function makeExcerpt(content: string, offset: number): string { const pos = offset ?? -1 if (pos > -1) { const from = Math.max(0, pos - excerptBefore) - const to = Math.min(content.length - 1, pos + excerptAfter) + const to = Math.min(content.length, pos + excerptAfter) content = (from > 0 ? '…' : '') + content.slice(from, to).trim() + @@ -97,3 +98,51 @@ export function makeExcerpt(content: string, offset: number): string { } return escapeHTML(content) } + +/** + * splits a string in words or "expressions in quotes" + * @param str + * @returns + */ +export function splitQuotes(str: string): string[] { + return ( + str + .match(/"(.*?)"/g) + ?.map(s => s.replace(/"/g, '')) + .filter(q => !!q) ?? [] + ) +} + +export function stripSurroundingQuotes(str: string): string { + return str.replace(regexStripQuotes, '') +} + +function mapAsync( + array: T[], + callbackfn: (value: T, index: number, array: T[]) => Promise, +): Promise { + return Promise.all(array.map(callbackfn)) +} + +/** + * https://stackoverflow.com/a/53508547 + * @param arr + * @param callback + * @returns + */ +export async function filterAsync( + array: T[], + callbackfn: (value: T, index: number, array: T[]) => Promise, +): Promise { + const filterMap = await mapAsync(array, callbackfn) + return array.filter((value, index) => filterMap[index]) +} + +/** + * A simple function to strip bold and italic markdown chars from a string + * @param text + * @returns + */ +export function stripMarkdownCharacters(text: string): string { + return text.replace(/(\*|_)+(.+?)(\*|_)+/g, (match, p1, p2) => p2) +}