From 0f7036abae1aaa42454ee478fc1ccc2c17714149 Mon Sep 17 00:00:00 2001 From: Simon Cambier Date: Fri, 29 Apr 2022 22:56:41 +0200 Subject: [PATCH] #25 - quoted terms and exclusions --- src/components/ModalVault.svelte | 2 +- src/globals.ts | 1 + src/query.ts | 380 +++++++++++++++++++++++++++++++ src/search.ts | 32 ++- src/utils.ts | 14 +- 5 files changed, 416 insertions(+), 13 deletions(-) create mode 100644 src/query.ts diff --git a/src/components/ModalVault.svelte b/src/components/ModalVault.svelte index 6aaa947..aadf23e 100644 --- a/src/components/ModalVault.svelte +++ b/src/components/ModalVault.svelte @@ -41,7 +41,7 @@ async function updateResults() { lastSearch = searchQuery selectedIndex = 0 scrollIntoView() - if (resultNotes.length) console.log(resultNotes[0]) + // if (resultNotes.length) console.log(resultNotes[0]) } function onClick() { diff --git a/src/globals.ts b/src/globals.ts index f3067da..af64743 100644 --- a/src/globals.ts +++ b/src/globals.ts @@ -4,6 +4,7 @@ import { EventBus } from './event-bus' export const regexWikilink = /^!?\[\[(?.+?)(\|(?.+?))?\]\]/ export const regexLineSplit = /\r?\n|\r|((\.|\?|!)( |\r?\n|\r))/g export const regexYaml = /^---\s*\n(.*?)\n?^---\s?/ms +export const regexStripQuotes = /^"|"$|^'|'$/g export const excerptBefore = 100 export const excerptAfter = 180 diff --git a/src/query.ts b/src/query.ts new file mode 100644 index 0000000..44c9fb4 --- /dev/null +++ b/src/query.ts @@ -0,0 +1,380 @@ +import { stripSurroundingQuotes } from './utils' + +type QueryToken = { + /** + * The query token string value + */ + value: string + + /** + * Was this token encased in quotes? + */ + exact: boolean +} + +/** + * This class is used to parse a query string into a structured object + */ +export class Query { + public words: QueryToken[] = [] + public exclusions: QueryToken[] = [] + + constructor(text: string) { + const tokens = parseQuery(text.toLowerCase(), { tokenize: true }) + this.exclusions = tokens.exclude.text + .map(this.formatToken) + .filter(o => !!o.value) + this.words = tokens.text.map(this.formatToken) + } + + public getWordsStr(): string { + return this.words.map(({ value }) => value).join(' ') + } + + /** + * Returns the terms that are encased in quotes + * @returns + */ + public getExactTerms(): string[] { + return this.words.filter(({ exact }) => exact).map(({ value }) => value) + } + + private formatToken(str: string): QueryToken { + const stripped = stripSurroundingQuotes(str) + return { + value: stripped, + exact: stripped !== str, + } + } +} + +/*! + * search-query-parser.js + * Original: https://github.com/nepsilon/search-query-parser + * Modified by Simon Cambier + * Copyright(c) 2014-2019 + * MIT Licensed + */ + +interface SearchParserOptions { + offsets?: boolean + tokenize: true + keywords?: string[] + ranges?: string[] + alwaysArray?: boolean +} + +interface ISearchParserDictionary { + [key: string]: any +} + +type SearchParserKeyWordOffset = { + keyword: string + value?: string +} + +type SearchParserTextOffset = { + text: string +} + +type SearchParserOffset = ( + | SearchParserKeyWordOffset + | SearchParserTextOffset +) & { + offsetStart: number + offsetEnd: number +} + +interface SearchParserResult extends ISearchParserDictionary { + text: string[] + offsets: SearchParserOffset[] + exclude: { text: string[] } +} + +function parseQuery( + string: string, + options: SearchParserOptions, +): SearchParserResult { + // Set a default options object when none is provided + if (!options) { + options = { offsets: true, tokenize: true } + } + else { + // If options offsets was't passed, set it to true + options.offsets = + typeof options.offsets === 'undefined' ? true : options.offsets + } + + if (!string) { + string = '' + } + + // Our object to store the query object + const query: SearchParserResult = { + text: [], + offsets: [], + exclude: { text: [] }, + } + // When offsets is true, create their array + if (options.offsets) { + query.offsets = [] + } + const exclusion: ISearchParserDictionary & { text: string[] } = { text: [] } + const terms = [] + // Get a list of search terms respecting single and double quotes + const regex = + /(\S+:'(?:[^'\\]|\\.)*')|(\S+:"(?:[^"\\]|\\.)*")|(-?"(?:[^"\\]|\\.)*")|(-?'(?:[^'\\]|\\.)*')|\S+|\S+:\S+/g + let match + while ((match = regex.exec(string)) !== null) { + let term = match[0] + const sepIndex = term.indexOf(':') + + // Terms that contain a `:` + if (sepIndex !== -1) { + const key = term.slice(0, sepIndex) + let val = term.slice(sepIndex + 1) + + // Strip backslashes respecting escapes + val = (val + '').replace(/\\(.?)/g, function (s, n1) { + switch (n1) { + case '\\': + return '\\' + case '0': + return '\u0000' + case '': + return '' + default: + return n1 + } + }) + terms.push({ + keyword: key, + value: val, + offsetStart: match.index, + offsetEnd: match.index + term.length, + }) + } + + // Other terms + else { + let isExcludedTerm = false + if (term[0] === '-') { + isExcludedTerm = true + term = term.slice(1) + } + + // Strip backslashes respecting escapes + term = (term + '').replace(/\\(.?)/g, function (s, n1) { + switch (n1) { + case '\\': + return '\\' + case '0': + return '\u0000' + case '': + return '' + default: + return n1 + } + }) + + if (isExcludedTerm) { + exclusion.text.push(term) + } + else { + terms.push({ + text: term, + offsetStart: match.index, + offsetEnd: match.index + term.length, + }) + } + } + } + // Reverse to ensure proper order when pop()'ing. + terms.reverse() + // For each search term + let term + while ((term = terms.pop())) { + // When just a simple term + if (term.text) { + // We add it as pure text + query.text.push(term.text) + // When offsets is true, push a new offset + if (options.offsets) { + query.offsets.push(term) + } + } + // We got an advanced search syntax + else if (term.keyword) { + let key = term.keyword + // Check if the key is a registered keyword + options.keywords = options.keywords || [] + let isKeyword = false + let isExclusion = false + if (!/^-/.test(key)) { + isKeyword = !(options.keywords.indexOf(key) === -1) + } + else if (key[0] === '-') { + const _key = key.slice(1) + isKeyword = !(options.keywords.indexOf(_key) === -1) + if (isKeyword) { + key = _key + isExclusion = true + } + } + + // Check if the key is a registered range + options.ranges = options.ranges || [] + const isRange = !(options.ranges.indexOf(key) === -1) + // When the key matches a keyword + if (isKeyword) { + // When offsets is true, push a new offset + if (options.offsets) { + query.offsets.push({ + keyword: key, + value: term.value, + offsetStart: isExclusion ? term.offsetStart + 1 : term.offsetStart, + offsetEnd: term.offsetEnd, + }) + } + + const value = term.value + // When value is a thing + if (value.length) { + // Get an array of values when several are there + const values = value.split(',') + if (isExclusion) { + if (exclusion[key]) { + // ...many times... + if (exclusion[key] instanceof Array) { + // ...and got several values this time... + if (values.length > 1) { + // ... concatenate both arrays. + exclusion[key] = exclusion[key].concat(values) + } + else { + // ... append the current single value. + exclusion[key].push(value) + } + } + // We saw that keyword only once before + else { + // Put both the current value and the new + // value in an array + exclusion[key] = [exclusion[key]] + exclusion[key].push(value) + } + } + // First time we see that keyword + else { + // ...and got several values this time... + if (values.length > 1) { + // ...add all values seen. + exclusion[key] = values + } + // Got only a single value this time + else { + // Record its value as a string + if (options.alwaysArray) { + // ...but we always return an array if option alwaysArray is true + exclusion[key] = [value] + } + else { + // Record its value as a string + exclusion[key] = value + } + } + } + } + else { + // If we already have seen that keyword... + if (query[key]) { + // ...many times... + if (query[key] instanceof Array) { + // ...and got several values this time... + if (values.length > 1) { + // ... concatenate both arrays. + query[key] = query[key].concat(values) + } + else { + // ... append the current single value. + query[key].push(value) + } + } + // We saw that keyword only once before + else { + // Put both the current value and the new + // value in an array + query[key] = [query[key]] + query[key].push(value) + } + } + // First time we see that keyword + else { + // ...and got several values this time... + if (values.length > 1) { + // ...add all values seen. + query[key] = values + } + // Got only a single value this time + else { + if (options.alwaysArray) { + // ...but we always return an array if option alwaysArray is true + query[key] = [value] + } + else { + // Record its value as a string + query[key] = value + } + } + } + } + } + } + // The key allows a range + else if (isRange) { + // When offsets is true, push a new offset + if (options.offsets) { + query.offsets.push(term) + } + + const value = term.value + // Range are separated with a dash + const rangeValues = value.split('-') + // When both end of the range are specified + // keyword:XXXX-YYYY + query[key] = {} + if (rangeValues.length === 2) { + query[key].from = rangeValues[0] + query[key].to = rangeValues[1] + } + // When pairs of ranges are specified + // keyword:XXXX-YYYY,AAAA-BBBB + // else if (!rangeValues.length % 2) { + // } + // When only getting a single value, + // or an odd number of values + else { + query[key].from = value + } + } + else { + // We add it as pure text + const text = term.keyword + ':' + term.value + query.text.push(text) + + // When offsets is true, push a new offset + if (options.offsets) { + query.offsets.push({ + text: text, + offsetStart: term.offsetStart, + offsetEnd: term.offsetEnd, + }) + } + } + } + } + + // Return forged query object + query.exclude = exclusion + return query +} diff --git a/src/search.ts b/src/search.ts index e1686e4..78865d9 100644 --- a/src/search.ts +++ b/src/search.ts @@ -13,6 +13,7 @@ import { stripMarkdownCharacters, wait, } from './utils' +import { Query } from './query' let minisearchInstance: MiniSearch @@ -60,12 +61,12 @@ export async function initGlobalSearchIndex(): Promise { /** * Searches the index for the given query, * and returns an array of raw results - * @param query + * @param text * @returns */ -async function search(query: string): Promise { - if (!query) return [] - let results = minisearchInstance.search(query, { +async function search(query: Query): Promise { + if (!query.getWordsStr()) return [] + let results = minisearchInstance.search(query.getWordsStr(), { prefix: true, fuzzy: term => (term.length > 4 ? 0.2 : false), combineWith: 'AND', @@ -78,13 +79,24 @@ async function search(query: string): Promise { }) // If the search query contains quotes, filter out results that don't have the exact match - const quoted = splitQuotes(query.toLowerCase()) - if (quoted.length) { + const exactTerms = query.getExactTerms() + if (exactTerms.length) { results = results.filter(r => { const content = stripMarkdownCharacters( indexedNotes[r.id]?.content ?? '', ).toLowerCase() - return quoted.every(q => content.includes(q)) + return exactTerms.every(q => content.includes(q)) + }) + } + + // // If the search query contains exclude terms, filter out results that have them + const exclusions = query.exclusions + if (exclusions.length) { + results = results.filter(r => { + const content = stripMarkdownCharacters( + indexedNotes[r.id]?.content ?? '', + ).toLowerCase() + return exclusions.every(q => !content.includes(q.value)) }) } return results @@ -115,11 +127,11 @@ export function getMatches(text: string, reg: RegExp): SearchMatch[] { * @returns */ export async function getSuggestions( - query: string, + queryStr: string, options?: Partial<{ singleFilePath: string | null }>, ): Promise { - query = query.toLowerCase() // Get the raw results + const query = new Query(queryStr) let results = await search(query) if (!results.length) return [] @@ -143,8 +155,8 @@ export async function getSuggestions( // Clean search matches that match quoted expresins, // and inject those expressions instead - const quoted = splitQuotes(query) let words = Object.keys(result.match) + const quoted = splitQuotes(query.getWordsStr()) for (const quote of quoted) { for (const q of quote.toLowerCase()) { words = words.filter(w => !w.toLowerCase().startsWith(q)) diff --git a/src/utils.ts b/src/utils.ts index ee7284c..eb16307 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -5,6 +5,7 @@ import { highlightClass, isSearchMatch, regexLineSplit, + regexStripQuotes, regexYaml, } from './globals' import type { SearchMatch } from './globals' @@ -104,7 +105,16 @@ export function makeExcerpt(content: string, offset: number): string { * @returns */ export function splitQuotes(str: string): string[] { - return str.match(/"(.*?)"/g)?.map(s => s.replace(/"/g, '')) ?? [] + return ( + str + .match(/"(.*?)"/g) + ?.map(s => s.replace(/"/g, '')) + .filter(q => !!q) ?? [] + ) +} + +export function stripSurroundingQuotes(str: string): string { + return str.replace(regexStripQuotes, '') } function mapAsync( @@ -134,5 +144,5 @@ export async function filterAsync( * @returns */ export function stripMarkdownCharacters(text: string): string { - return text.replace(/(\*|\_)+(.+?)(\*|\_)+/g, (match, p1, p2) => p2) + return text.replace(/(\*|_)+(.+?)(\*|_)+/g, (match, p1, p2) => p2) }