Merge pull request #38 from scambier/feature/25-search-filters

Feature/25 search filters
2022-04-30 17:21:05 +02:00
parent 11a7ad165a 0f1686420d
commit f2938cde88
7 changed files with 507 additions and 23 deletions
--- a/manifest-beta.json
+++ b/manifest-beta.json
@@ -0,0 +1,10 @@
+{
+	"id": "omnisearch",
+	"name": "Omnisearch",
+	"version": "1.0.1",
+	"minAppVersion": "0.14.2",
+	"description": "A search engine that just works",
+	"author": "Simon Cambier",
+	"authorUrl": "https://github.com/scambier/obsidian-omnisearch",
+	"isDesktopOnly": false
+}
--- a/src/components/ModalInFile.svelte
+++ b/src/components/ModalInFile.svelte
@@ -42,14 +42,14 @@ onDestroy(() => {
  eventBus.enable("vault")
 })

-$: {
+$: (async () => {
  if (searchQuery) {
-    note = getSuggestions(searchQuery, { singleFilePath })[0] ?? null
+    note = (await getSuggestions(searchQuery, { singleFilePath }))[0] ?? null
    lastSearch = searchQuery
  }
  selectedIndex = 0
  scrollIntoView()
-}
+})()

 $: {
  if (note) {
--- a/src/components/ModalVault.svelte
+++ b/src/components/ModalVault.svelte
@@ -20,13 +20,10 @@ let searchQuery: string
 let resultNotes: ResultNote[] = []
 $: selectedNote = resultNotes[selectedIndex]

-$: {
-  if (searchQuery) {
-    resultNotes = getSuggestions(searchQuery)
-    lastSearch = searchQuery
-  }
-  selectedIndex = 0
-  scrollIntoView()
+$: if (searchQuery) {
+  updateResults()
+} else {
+  resultNotes = []
 }

 onMount(() => {
@@ -39,6 +36,14 @@ onMount(() => {
  eventBus.on("vault", "arrow-down", () => moveIndex(1))
 })

+async function updateResults() {
+  resultNotes = await getSuggestions(searchQuery)
+  lastSearch = searchQuery
+  selectedIndex = 0
+  scrollIntoView()
+  // if (resultNotes.length) console.log(resultNotes[0])
+}
+
 function onClick() {
  if (!selectedNote) return
  openNote(selectedNote)
--- a/src/globals.ts
+++ b/src/globals.ts
@@ -4,6 +4,7 @@ import { EventBus } from './event-bus'
 export const regexWikilink = /^!?\[\[(?<name>.+?)(\|(?<alias>.+?))?\]\]/
 export const regexLineSplit = /\r?\n|\r|((\.|\?|!)( |\r?\n|\r))/g
 export const regexYaml = /^---\s*\n(.*?)\n?^---\s?/ms
+export const regexStripQuotes = /^"|"$|^'|'$/g

 export const excerptBefore = 100
 export const excerptAfter = 180
@@ -12,8 +13,6 @@ export const highlightClass = 'suggestion-highlight omnisearch-highlight'

 export const eventBus = new EventBus()

-// export const eventBus = new EventBus()
-
 export type SearchNote = {
  path: string
  basename: string
--- a/src/query.ts
+++ b/src/query.ts
@@ -0,0 +1,380 @@
+import { stripSurroundingQuotes } from './utils'
+
+type QueryToken = {
+  /**
+   * The query token string value
+   */
+  value: string
+
+  /**
+   * Was this token encased in quotes?
+   */
+  exact: boolean
+}
+
+/**
+ * This class is used to parse a query string into a structured object
+ */
+export class Query {
+  public words: QueryToken[] = []
+  public exclusions: QueryToken[] = []
+
+  constructor(text: string) {
+    const tokens = parseQuery(text.toLowerCase(), { tokenize: true })
+    this.exclusions = tokens.exclude.text
+      .map(this.formatToken)
+      .filter(o => !!o.value)
+    this.words = tokens.text.map(this.formatToken)
+  }
+
+  public getWordsStr(): string {
+    return this.words.map(({ value }) => value).join(' ')
+  }
+
+  /**
+   * Returns the terms that are encased in quotes
+   * @returns
+   */
+  public getExactTerms(): string[] {
+    return this.words.filter(({ exact }) => exact).map(({ value }) => value)
+  }
+
+  private formatToken(str: string): QueryToken {
+    const stripped = stripSurroundingQuotes(str)
+    return {
+      value: stripped,
+      exact: stripped !== str,
+    }
+  }
+}
+
+/*!
+ * search-query-parser.js
+ * Original: https://github.com/nepsilon/search-query-parser
+ * Modified by Simon Cambier
+ * Copyright(c) 2014-2019
+ * MIT Licensed
+ */
+
+interface SearchParserOptions {
+  offsets?: boolean
+  tokenize: true
+  keywords?: string[]
+  ranges?: string[]
+  alwaysArray?: boolean
+}
+
+interface ISearchParserDictionary {
+  [key: string]: any
+}
+
+type SearchParserKeyWordOffset = {
+  keyword: string
+  value?: string
+}
+
+type SearchParserTextOffset = {
+  text: string
+}
+
+type SearchParserOffset = (
+  | SearchParserKeyWordOffset
+  | SearchParserTextOffset
+) & {
+  offsetStart: number
+  offsetEnd: number
+}
+
+interface SearchParserResult extends ISearchParserDictionary {
+  text: string[]
+  offsets: SearchParserOffset[]
+  exclude: { text: string[] }
+}
+
+function parseQuery(
+  string: string,
+  options: SearchParserOptions,
+): SearchParserResult {
+  // Set a default options object when none is provided
+  if (!options) {
+    options = { offsets: true, tokenize: true }
+  }
+  else {
+    // If options offsets was't passed, set it to true
+    options.offsets =
+      typeof options.offsets === 'undefined' ? true : options.offsets
+  }
+
+  if (!string) {
+    string = ''
+  }
+
+  // Our object to store the query object
+  const query: SearchParserResult = {
+    text: [],
+    offsets: [],
+    exclude: { text: [] },
+  }
+  // When offsets is true, create their array
+  if (options.offsets) {
+    query.offsets = []
+  }
+  const exclusion: ISearchParserDictionary & { text: string[] } = { text: [] }
+  const terms = []
+  // Get a list of search terms respecting single and double quotes
+  const regex =
+    /(\S+:'(?:[^'\\]|\\.)*')|(\S+:"(?:[^"\\]|\\.)*")|(-?"(?:[^"\\]|\\.)*")|(-?'(?:[^'\\]|\\.)*')|\S+|\S+:\S+/g
+  let match
+  while ((match = regex.exec(string)) !== null) {
+    let term = match[0]
+    const sepIndex = term.indexOf(':')
+
+    // Terms that contain a `:`
+    if (sepIndex !== -1) {
+      const key = term.slice(0, sepIndex)
+      let val = term.slice(sepIndex + 1)
+
+      // Strip backslashes respecting escapes
+      val = (val + '').replace(/\\(.?)/g, function (s, n1) {
+        switch (n1) {
+          case '\\':
+            return '\\'
+          case '0':
+            return '\u0000'
+          case '':
+            return ''
+          default:
+            return n1
+        }
+      })
+      terms.push({
+        keyword: key,
+        value: val,
+        offsetStart: match.index,
+        offsetEnd: match.index + term.length,
+      })
+    }
+
+    // Other terms
+    else {
+      let isExcludedTerm = false
+      if (term[0] === '-') {
+        isExcludedTerm = true
+        term = term.slice(1)
+      }
+
+      // Strip backslashes respecting escapes
+      term = (term + '').replace(/\\(.?)/g, function (s, n1) {
+        switch (n1) {
+          case '\\':
+            return '\\'
+          case '0':
+            return '\u0000'
+          case '':
+            return ''
+          default:
+            return n1
+        }
+      })
+
+      if (isExcludedTerm) {
+        exclusion.text.push(term)
+      }
+      else {
+        terms.push({
+          text: term,
+          offsetStart: match.index,
+          offsetEnd: match.index + term.length,
+        })
+      }
+    }
+  }
+  // Reverse to ensure proper order when pop()'ing.
+  terms.reverse()
+  // For each search term
+  let term
+  while ((term = terms.pop())) {
+    // When just a simple term
+    if (term.text) {
+      // We add it as pure text
+      query.text.push(term.text)
+      // When offsets is true, push a new offset
+      if (options.offsets) {
+        query.offsets.push(term)
+      }
+    }
+    // We got an advanced search syntax
+    else if (term.keyword) {
+      let key = term.keyword
+      // Check if the key is a registered keyword
+      options.keywords = options.keywords || []
+      let isKeyword = false
+      let isExclusion = false
+      if (!/^-/.test(key)) {
+        isKeyword = !(options.keywords.indexOf(key) === -1)
+      }
+      else if (key[0] === '-') {
+        const _key = key.slice(1)
+        isKeyword = !(options.keywords.indexOf(_key) === -1)
+        if (isKeyword) {
+          key = _key
+          isExclusion = true
+        }
+      }
+
+      // Check if the key is a registered range
+      options.ranges = options.ranges || []
+      const isRange = !(options.ranges.indexOf(key) === -1)
+      // When the key matches a keyword
+      if (isKeyword) {
+        // When offsets is true, push a new offset
+        if (options.offsets) {
+          query.offsets.push({
+            keyword: key,
+            value: term.value,
+            offsetStart: isExclusion ? term.offsetStart + 1 : term.offsetStart,
+            offsetEnd: term.offsetEnd,
+          })
+        }
+
+        const value = term.value
+        // When value is a thing
+        if (value.length) {
+          // Get an array of values when several are there
+          const values = value.split(',')
+          if (isExclusion) {
+            if (exclusion[key]) {
+              // ...many times...
+              if (exclusion[key] instanceof Array) {
+                // ...and got several values this time...
+                if (values.length > 1) {
+                  // ... concatenate both arrays.
+                  exclusion[key] = exclusion[key].concat(values)
+                }
+                else {
+                  // ... append the current single value.
+                  exclusion[key].push(value)
+                }
+              }
+              // We saw that keyword only once before
+              else {
+                // Put both the current value and the new
+                // value in an array
+                exclusion[key] = [exclusion[key]]
+                exclusion[key].push(value)
+              }
+            }
+            // First time we see that keyword
+            else {
+              // ...and got several values this time...
+              if (values.length > 1) {
+                // ...add all values seen.
+                exclusion[key] = values
+              }
+              // Got only a single value this time
+              else {
+                // Record its value as a string
+                if (options.alwaysArray) {
+                  // ...but we always return an array if option alwaysArray is true
+                  exclusion[key] = [value]
+                }
+                else {
+                  // Record its value as a string
+                  exclusion[key] = value
+                }
+              }
+            }
+          }
+          else {
+            // If we already have seen that keyword...
+            if (query[key]) {
+              // ...many times...
+              if (query[key] instanceof Array) {
+                // ...and got several values this time...
+                if (values.length > 1) {
+                  // ... concatenate both arrays.
+                  query[key] = query[key].concat(values)
+                }
+                else {
+                  // ... append the current single value.
+                  query[key].push(value)
+                }
+              }
+              // We saw that keyword only once before
+              else {
+                // Put both the current value and the new
+                // value in an array
+                query[key] = [query[key]]
+                query[key].push(value)
+              }
+            }
+            // First time we see that keyword
+            else {
+              // ...and got several values this time...
+              if (values.length > 1) {
+                // ...add all values seen.
+                query[key] = values
+              }
+              // Got only a single value this time
+              else {
+                if (options.alwaysArray) {
+                  // ...but we always return an array if option alwaysArray is true
+                  query[key] = [value]
+                }
+                else {
+                  // Record its value as a string
+                  query[key] = value
+                }
+              }
+            }
+          }
+        }
+      }
+      // The key allows a range
+      else if (isRange) {
+        // When offsets is true, push a new offset
+        if (options.offsets) {
+          query.offsets.push(term)
+        }
+
+        const value = term.value
+        // Range are separated with a dash
+        const rangeValues = value.split('-')
+        // When both end of the range are specified
+        // keyword:XXXX-YYYY
+        query[key] = {}
+        if (rangeValues.length === 2) {
+          query[key].from = rangeValues[0]
+          query[key].to = rangeValues[1]
+        }
+        // When pairs of ranges are specified
+        // keyword:XXXX-YYYY,AAAA-BBBB
+        // else if (!rangeValues.length % 2) {
+        // }
+        // When only getting a single value,
+        // or an odd number of values
+        else {
+          query[key].from = value
+        }
+      }
+      else {
+        // We add it as pure text
+        const text = term.keyword + ':' + term.value
+        query.text.push(text)
+
+        // When offsets is true, push a new offset
+        if (options.offsets) {
+          query.offsets.push({
+            text: text,
+            offsetStart: term.offsetStart,
+            offsetEnd: term.offsetEnd,
+          })
+        }
+      }
+    }
+  }
+
+  // Return forged query object
+  query.exclude = exclusion
+  return query
+}
--- a/src/search.ts
+++ b/src/search.ts
@@ -6,7 +6,14 @@ import {
  type ResultNote,
  type SearchMatch,
 } from './globals'
-import { extractHeadingsFromCache, stringsToRegex, wait } from './utils'
+import {
+  extractHeadingsFromCache,
+  splitQuotes,
+  stringsToRegex,
+  stripMarkdownCharacters,
+  wait,
+} from './utils'
+import { Query } from './query'

 let minisearchInstance: MiniSearch<IndexedNote>

@@ -54,12 +61,12 @@ export async function initGlobalSearchIndex(): Promise<void> {
 /**
 * Searches the index for the given query,
 * and returns an array of raw results
- * @param query
+ * @param text
 * @returns
 */
-function search(query: string): SearchResult[] {
-  if (!query) return []
-  return minisearchInstance.search(query, {
+async function search(query: Query): Promise<SearchResult[]> {
+  if (!query.getWordsStr()) return []
+  let results = minisearchInstance.search(query.getWordsStr(), {
    prefix: true,
    fuzzy: term => (term.length > 4 ? 0.2 : false),
    combineWith: 'AND',
@@ -70,6 +77,29 @@ function search(query: string): SearchResult[] {
      headings3: 1.1,
    },
  })
+
+  // If the search query contains quotes, filter out results that don't have the exact match
+  const exactTerms = query.getExactTerms()
+  if (exactTerms.length) {
+    results = results.filter(r => {
+      const content = stripMarkdownCharacters(
+        indexedNotes[r.id]?.content ?? '',
+      ).toLowerCase()
+      return exactTerms.every(q => content.includes(q))
+    })
+  }
+
+  // // If the search query contains exclude terms, filter out results that have them
+  const exclusions = query.exclusions
+  if (exclusions.length) {
+    results = results.filter(r => {
+      const content = stripMarkdownCharacters(
+        indexedNotes[r.id]?.content ?? '',
+      ).toLowerCase()
+      return exclusions.every(q => !content.includes(q.value))
+    })
+  }
+  return results
 }

 /**
@@ -96,12 +126,13 @@ export function getMatches(text: string, reg: RegExp): SearchMatch[] {
 * @param options
 * @returns
 */
-export function getSuggestions(
-  query: string,
+export async function getSuggestions(
+  queryStr: string,
  options?: Partial<{ singleFilePath: string | null }>,
-): ResultNote[] {
+): Promise<ResultNote[]> {
  // Get the raw results
-  let results = search(query)
+  const query = new Query(queryStr)
+  let results = await search(query)
  if (!results.length) return []

  // Either keep the 50 first results,
@@ -121,7 +152,17 @@ export function getSuggestions(
    if (!note) {
      throw new Error(`Note "${result.id}" not indexed`)
    }
-    const words = Object.keys(result.match)
+
+    // Clean search matches that match quoted expresins,
+    // and inject those expressions instead
+    let words = Object.keys(result.match)
+    const quoted = splitQuotes(query.getWordsStr())
+    for (const quote of quoted) {
+      for (const q of quote.toLowerCase()) {
+        words = words.filter(w => !w.toLowerCase().startsWith(q))
+      }
+      words.push(quote)
+    }
    const matches = getMatches(note.content, stringsToRegex(words))
    const resultNote: ResultNote = {
      score: result.score,
--- a/src/utils.ts
+++ b/src/utils.ts
@@ -5,6 +5,7 @@ import {
  highlightClass,
  isSearchMatch,
  regexLineSplit,
+  regexStripQuotes,
  regexYaml,
 } from './globals'
 import type { SearchMatch } from './globals'
@@ -89,7 +90,7 @@ export function makeExcerpt(content: string, offset: number): string {
  const pos = offset ?? -1
  if (pos > -1) {
    const from = Math.max(0, pos - excerptBefore)
-    const to = Math.min(content.length - 1, pos + excerptAfter)
+    const to = Math.min(content.length, pos + excerptAfter)
    content =
      (from > 0 ? '…' : '') +
      content.slice(from, to).trim() +
@@ -97,3 +98,51 @@ export function makeExcerpt(content: string, offset: number): string {
  }
  return escapeHTML(content)
 }
+
+/**
+ * splits a string in words or "expressions in quotes"
+ * @param str
+ * @returns
+ */
+export function splitQuotes(str: string): string[] {
+  return (
+    str
+      .match(/"(.*?)"/g)
+      ?.map(s => s.replace(/"/g, ''))
+      .filter(q => !!q) ?? []
+  )
+}
+
+export function stripSurroundingQuotes(str: string): string {
+  return str.replace(regexStripQuotes, '')
+}
+
+function mapAsync<T, U>(
+  array: T[],
+  callbackfn: (value: T, index: number, array: T[]) => Promise<U>,
+): Promise<U[]> {
+  return Promise.all(array.map(callbackfn))
+}
+
+/**
+ * https://stackoverflow.com/a/53508547
+ * @param arr
+ * @param callback
+ * @returns
+ */
+export async function filterAsync<T>(
+  array: T[],
+  callbackfn: (value: T, index: number, array: T[]) => Promise<boolean>,
+): Promise<T[]> {
+  const filterMap = await mapAsync(array, callbackfn)
+  return array.filter((value, index) => filterMap[index])
+}
+
+/**
+ * A simple function to strip bold and italic markdown chars from a string
+ * @param text
+ * @returns
+ */
+export function stripMarkdownCharacters(text: string): string {
+  return text.replace(/(\*|_)+(.+?)(\*|_)+/g, (match, p1, p2) => p2)
+}