Feature/40 key value current folder (#218)

* #40 - Reworked Query * #40 - added a "path:" option in the query field * #40 - folder exclusion * Cleaner code
2023-04-02 13:00:52 +02:00
parent 60f56452dc
commit 56fc8157fb
7 changed files with 95 additions and 438 deletions
--- a/package.json
+++ b/package.json
@@ -44,7 +44,8 @@
    "dexie": "^3.2.2",
    "lodash-es": "4.17.21",
    "minisearch": "6.0.0-beta.1",
-    "pure-md5": "^0.1.14"
+    "pure-md5": "^0.1.14",
+    "search-query-parser": "^1.6.0"
  },
  "pnpm": {
    "overrides": {
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -26,6 +26,7 @@ specifiers:
  prettier: ^2.8.1
  prettier-plugin-svelte: ^2.8.1
  pure-md5: ^0.1.14
+  search-query-parser: ^1.6.0
  svelte: ^3.54.0
  svelte-check: ^2.10.2
  svelte-jester: ^2.3.2
@@ -40,6 +41,7 @@ dependencies:
  lodash-es: 4.17.21
  minisearch: 6.0.0-beta.1
  pure-md5: 0.1.14
+  search-query-parser: 1.6.0

 devDependencies:
  '@babel/preset-env': 7.20.2
@@ -4477,6 +4479,10 @@ packages:
      xmlchars: 2.2.0
    dev: true

+  /search-query-parser/1.6.0:
+    resolution: {integrity: sha512-bhf+phLlKF38nuniwLcVHWPArHGdzenlPhPi955CR3vm1QQifXIuPHwAffhjapojdVVzmv4hgIJ6NOX1d/w+Uw==}
+    dev: false
+
  /semver/6.3.0:
    resolution: {integrity: sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==}
    hasBin: true
--- a/src/tests/query-tests.ts
+++ b/src/tests/query-tests.ts
@@ -9,7 +9,7 @@ describe('The Query class', () => {
    const query = new Query(stringQuery)

    // Assert
-    const segments = query.segments.map(s => s.value)
+    const segments = query.query.text
    expect(segments).toHaveLength(5)
    expect(segments).toContain('foo')
    expect(segments).toContain('bar')
@@ -17,35 +17,18 @@ describe('The Query class', () => {
    expect(segments).toContain('dolor')
    expect(segments).toContain('sit amet')

-    const exclusions = query.exclusions.map(s => s.value)
+    const exclusions = query.query.exclude.text
    expect(exclusions).toHaveLength(2)
    expect(exclusions).toContain('baz')
    expect(exclusions).toContain('quoted exclusion')
  })

-  it('should mark quoted segments & exclusions as "exact"', () => {
-    // Act
-    const query = new Query(stringQuery)
-
-    // Assert
-    expect(query.segments.filter(s => s.exact)).toHaveLength(2)
-    expect(
-      query.segments.find(o => o.value === 'lorem ipsum')!.exact
-    ).toBeTruthy()
-    expect(query.segments.find(o => o.value === 'sit amet')!.exact).toBeTruthy()
-
-    expect(query.exclusions.filter(s => s.exact)).toHaveLength(1)
-    expect(
-      query.exclusions.find(o => o.value === 'quoted exclusion')!.exact
-    ).toBeTruthy()
-  })
-
  it('should not exclude words when there is no space before', () => {
    // Act
    const query = new Query('foo bar-baz')

    // Assert
-    expect(query.exclusions).toHaveLength(0)
+    expect(query.query.exclude.text).toHaveLength(0)
  })

  describe('.getExactTerms()', () => {
--- a/src/components/ModalVault.svelte
+++ b/src/components/ModalVault.svelte
@@ -15,7 +15,6 @@
    getCtrlKeyLabel,
    getExtension,
    isFilePDF,
-    logDebug,
    loopIndex,
  } from 'src/tools/utils'
  import {
--- a/src/search/omnisearch.ts
+++ b/src/search/omnisearch.ts
@@ -72,8 +72,8 @@ export class Omnisearch {
  }
  private minisearch: MiniSearch
  private indexedDocuments: Map<string, number> = new Map()
-  private previousResults: SearchResult[] = []
-  private previousQuery: Query | null = null
+  // private previousResults: SearchResult[] = []
+  // private previousQuery: Query | null = null

  constructor() {
    this.minisearch = new MiniSearch(Omnisearch.options)
@@ -175,8 +175,8 @@ export class Omnisearch {
    options: { prefixLength: number; singleFilePath?: string }
  ): Promise<SearchResult[]> {
    if (query.isEmpty()) {
-      this.previousResults = []
-      this.previousQuery = null
+      // this.previousResults = []
+      // this.previousQuery = null
      return []
    }

@@ -210,6 +210,22 @@ export class Omnisearch {
      })
    }

+    // Filter query results that match the path
+    if (query.query.path) {
+      results = results.filter(r =>
+        query.query.path?.some(p =>
+          (r.id as string).toLowerCase().includes(p.toLowerCase())
+        )
+      )
+    }
+    if (query.query.exclude.path) {
+      results = results.filter(r =>
+        !query.query.exclude.path?.some(p =>
+          (r.id as string).toLowerCase().includes(p.toLowerCase())
+        )
+      )
+    }
+
    // If the query does not return any result,
    // retry but with a shorter prefix limit
    if (!results.length) {
@@ -243,9 +259,7 @@ export class Omnisearch {
    }

    // Extract tags from the query
-    const tags = query.segments
-      .filter(s => s.value.startsWith('#'))
-      .map(s => s.value)
+    const tags = query.getTags()

    // Put the results with tags on top
    for (const tag of tags) {
@@ -280,14 +294,14 @@ export class Omnisearch {
    }

    // If the search query contains exclude terms, filter out results that have them
-    const exclusions = query.exclusions
+    const exclusions = query.query.exclude.text
    if (exclusions.length) {
      logDebug('Filtering with exclusions')
      results = results.filter(r => {
        const content = stripMarkdownCharacters(
          documents.find(d => d.path === r.id)?.content ?? ''
        ).toLowerCase()
-        return exclusions.every(q => !content.includes(q.value))
+        return exclusions.every(q => !content.includes(q))
      })
    }

@@ -298,8 +312,8 @@ export class Omnisearch {
      (result, index, arr) => arr.findIndex(t => t.id === result.id) === index
    )

-    this.previousQuery = query
-    this.previousResults = results
+    // this.previousQuery = query
+    // this.previousResults = results

    return results
  }
@@ -375,16 +389,6 @@ export class Omnisearch {
        } as IndexedDocument
      }

-      // Remove '#' from tags, for highlighting
-      query.segments.forEach(s => {
-        s.value = s.value.replace(/^#/, '')
-      })
-
-      // Extract tags from the query
-      const tags = query.segments
-        .filter(s => s.value.startsWith('#'))
-        .map(s => s.value)
-
      // Clean search matches that match quoted expressions,
      // and inject those expressions instead
      const foundWords = [
@@ -393,10 +397,10 @@ export class Omnisearch {
        ...Object.keys(result.match),

        // Quoted expressions
-        ...query.segments.filter(s => s.exact).map(s => s.value),
+        ...query.getExactTerms(),

        // Tags, starting with #
-        ...tags,
+        ...query.getTags(),
      ].filter(w => w.length > 1 || /\p{Emoji}/u.test(w))
      logDebug('Matching tokens:', foundWords)

--- a/src/search/query.ts
+++ b/src/search/query.ts
@@ -1,83 +1,77 @@
 import { settings } from '../settings'
-import { removeDiacritics, stripSurroundingQuotes } from '../tools/utils'
-import { parseQuery } from '../vendor/parse-query'
-import { regexExtensions } from '../globals'
+import { removeDiacritics } from '../tools/utils'
+import { parse } from 'search-query-parser'

-type QueryToken = {
-  /**
-   * The query token string value
-   */
-  value: string
+const keywords = ['ext', 'path'] as const

-  /**
-   * Was this token encased in quotes?
-   */
-  exact: boolean
-}
+type Keywords = {
+  [K in typeof keywords[number]]?: string[]
+} & { text: string[] }

-/**
- * This class is used to parse a query string into a structured object
- */
 export class Query {
-  public segments: QueryToken[] = []
-  public exclusions: QueryToken[] = []
-  public extensions: string[] = []
+  query: Keywords & { exclude: Keywords }
+  /**
+   * @deprecated
+   */
+  extensions: string[] = []

  constructor(text = '') {
-    // Extract & remove extensions from the query
-    this.extensions = this.extractExtensions(text)
-    text = this.removeExtensions(text)
-
-    if (settings.ignoreDiacritics) text = removeDiacritics(text)
-    const tokens = parseQuery(text.toLowerCase(), { tokenize: true })
-    this.exclusions = tokens.exclude.text
-      .map(this.formatToken)
-      .filter(o => !!o.value)
-    this.segments = tokens.text.reduce<QueryToken[]>((prev, curr) => {
-      const formatted = this.formatToken(curr)
-      if (formatted.value) {
-        prev.push(formatted)
+    if (settings.ignoreDiacritics) {
+      text = removeDiacritics(text)
    }
-      return prev
-    }, [])
+    const parsed = parse(text.toLowerCase(), {
+      tokenize: true,
+      keywords: keywords as unknown as string[],
+    }) as unknown as typeof this.query
+
+    // Default values
+    parsed.text = parsed.text ?? []
+    parsed.exclude = parsed.exclude ?? {}
+    parsed.exclude.text = parsed.exclude.text ?? []
+    if (!Array.isArray(parsed.exclude.text)) {
+      parsed.exclude.text = [parsed.exclude.text]
+    }
+
+    // Make sure that all fields are string[]
+    for (const k of keywords) {
+      const v = parsed[k]
+      if (v) {
+        parsed[k] = Array.isArray(v) ? v : [v]
+      }
+      const e = parsed.exclude[k]
+      if (e) {
+        parsed.exclude[k] = Array.isArray(e) ? e : [e]
+      }
+    }
+    this.query = parsed
+    this.extensions = this.query.ext ?? []
  }

  public isEmpty(): boolean {
-    return this.segments.length === 0
+    for (const k of keywords) {
+      if (this.query[k]?.length) {
+        return false
+      }
+      if (this.query.text.length) {
+        return false
+      }
+    }
+    return true
  }

  public segmentsToStr(): string {
-    return this.segments.map(({ value }) => value).join(' ')
+    return this.query.text.join(' ')
+  }
+  
+  public getTags(): string[] {
+    return this.query.text.filter(o => o.startsWith('#'))
+  }
+
+  public getTagsWithoutHashtag(): string[] {
+    return this.getTags().map(o => o.replace(/^#/, ''))
  }

-  /**
-   * Returns the terms that are encased in quotes
-   * @returns
-   */
  public getExactTerms(): string[] {
-    return this.segments.filter(({ exact }) => exact).map(({ value }) => value)
-  }
-
-  private formatToken(str: string): QueryToken {
-    const stripped = stripSurroundingQuotes(str)
-    return {
-      value: stripped,
-      exact: stripped !== str,
-    }
-  }
-
-  /**
-   * Extracts an array of extensions like ".png" from a string
-   */
-  private extractExtensions(str: string): string[] {
-    const extensions = (str.match(regexExtensions) ?? []).map(o => o.trim())
-    if (extensions) {
-      return extensions.map(ext => ext.toLowerCase())
-    }
-    return []
-  }
-
-  private removeExtensions(str: string): string {
-    return str.replace(regexExtensions, '')
+    return this.query.text.filter(o => o.split(' ').length > 1)
  }
 }
--- a/src/vendor/parse-query.ts
+++ b/src/vendor/parse-query.ts
@@ -1,330 +0,0 @@
-/*!
- * search-query-parser.js
- * Original: https://github.com/nepsilon/search-query-parser
- * Modified by Simon Cambier
- * Copyright(c) 2014-2019
- * MIT Licensed
- */
-
-import { warnDebug } from "../tools/utils";
-
-interface SearchParserOptions {
-  offsets?: boolean
-  tokenize: true
-  keywords?: string[]
-  ranges?: string[]
-  alwaysArray?: boolean
-}
-
-interface ISearchParserDictionary {
-  [key: string]: any
-}
-
-type SearchParserKeyWordOffset = {
-  keyword: string
-  value?: string
-}
-
-type SearchParserTextOffset = {
-  text: string
-}
-
-type SearchParserOffset = (
-  | SearchParserKeyWordOffset
-  | SearchParserTextOffset
-  ) & {
-  offsetStart: number
-  offsetEnd: number
-}
-
-interface SearchParserResult extends ISearchParserDictionary {
-  text: string[]
-  offsets: SearchParserOffset[]
-  exclude: { text: string[] }
-}
-
-export function parseQuery(
-  string: string,
-  options: SearchParserOptions,
-): SearchParserResult {
-  // Set a default options object when none is provided
-  if (!options) {
-    options = { offsets: true, tokenize: true }
-  } else {
-    // If options.offsets wasn't passed, set it to true
-    options.offsets =
-      typeof options.offsets === 'undefined' ? true : options.offsets
-  }
-
-  if (!string) {
-    string = ''
-  }
-
-  // Our object to store the query object
-  const query: SearchParserResult = {
-    text: [],
-    offsets: [],
-    exclude: { text: [] },
-  }
-  // When offsets is true, create their array
-  if (options.offsets) {
-    query.offsets = []
-  }
-  const exclusion: ISearchParserDictionary & { text: string[] } = { text: [] }
-  const terms = []
-  // Get a list of search terms respecting single and double quotes
-  const regex =
-    /(\S+:'(?:[^'\\]|\\.)*')|(\S+:"(?:[^"\\]|\\.)*")|(-?"(?:[^"\\]|\\.)*")|(-?'(?:[^'\\]|\\.)*')|\S+|\S+:\S+/g
-  let match
-  let count = 0
-  const startTime = new Date().getTime()
-
-  while ((match = regex.exec(string)) !== null) {
-    if (++count >= 100 || new Date().getTime() - startTime > 50) {
-      warnDebug('Stopped SearchParserResult at', count, 'results')
-      break
-    }
-    let term = match[0]
-    const sepIndex = term.indexOf(':')
-
-    // Terms that contain a `:`
-    if (sepIndex !== -1) {
-      const key = term.slice(0, sepIndex)
-      let val = term.slice(sepIndex + 1)
-
-      // Strip backslashes respecting escapes
-      val = (val + '').replace(/\\(.?)/g, function (_s, n1) {
-        switch (n1) {
-          case '\\':
-            return '\\'
-          case '0':
-            return '\u0000'
-          case '':
-            return ''
-          default:
-            return n1
-        }
-      })
-      terms.push({
-        keyword: key,
-        value: val,
-        offsetStart: match.index,
-        offsetEnd: match.index + term.length,
-      })
-    }
-
-    // Other terms
-    else {
-      let isExcludedTerm = false
-      if (term[0] === '-') {
-        isExcludedTerm = true
-        term = term.slice(1)
-      }
-
-      // Strip backslashes respecting escapes
-      term = (term + '').replace(/\\(.?)/g, function (_s, n1) {
-        switch (n1) {
-          case '\\':
-            return '\\'
-          case '0':
-            return '\u0000'
-          case '':
-            return ''
-          default:
-            return n1
-        }
-      })
-
-      if (isExcludedTerm) {
-        exclusion.text.push(term)
-      } else {
-        terms.push({
-          text: term,
-          offsetStart: match.index,
-          offsetEnd: match.index + term.length,
-        })
-      }
-    }
-  }
-  // Reverse to ensure proper order when pop()'ing.
-  terms.reverse()
-  // For each search term
-  let term
-  while ((term = terms.pop())) {
-    // When just a simple term
-    if (term.text) {
-      // We add it as pure text
-      query.text.push(term.text)
-      // When offsets is true, push a new offset
-      if (options.offsets) {
-        query.offsets.push(term)
-      }
-    }
-    // We got an advanced search syntax
-    else if (term.keyword) {
-      let key = term.keyword
-      // Check if the key is a registered keyword
-      options.keywords = options.keywords || []
-      let isKeyword = false
-      let isExclusion = false
-      if (!/^-/.test(key)) {
-        isKeyword = !(options.keywords.indexOf(key) === -1)
-      } else if (key[0] === '-') {
-        const _key = key.slice(1)
-        isKeyword = !(options.keywords.indexOf(_key) === -1)
-        if (isKeyword) {
-          key = _key
-          isExclusion = true
-        }
-      }
-
-      // Check if the key is a registered range
-      options.ranges = options.ranges || []
-      const isRange = !(options.ranges.indexOf(key) === -1)
-      // When the key matches a keyword
-      if (isKeyword) {
-        // When offsets is true, push a new offset
-        if (options.offsets) {
-          query.offsets.push({
-            keyword: key,
-            value: term.value,
-            offsetStart: isExclusion ? term.offsetStart + 1 : term.offsetStart,
-            offsetEnd: term.offsetEnd,
-          })
-        }
-
-        const value = term.value
-        // When value is a thing
-        if (value.length) {
-          // Get an array of values when several are there
-          const values = value.split(',')
-          if (isExclusion) {
-            if (exclusion[key]) {
-              // ...many times...
-              if (exclusion[key] instanceof Array) {
-                // ...and got several values this time...
-                if (values.length > 1) {
-                  // ... concatenate both arrays.
-                  exclusion[key] = exclusion[key].concat(values)
-                } else {
-                  // ... append the current single value.
-                  exclusion[key].push(value)
-                }
-              }
-              // We saw that keyword only once before
-              else {
-                // Put both the current value and the new
-                // value in an array
-                exclusion[key] = [exclusion[key]]
-                exclusion[key].push(value)
-              }
-            }
-            // First time we see that keyword
-            else {
-              // ...and got several values this time...
-              if (values.length > 1) {
-                // ...add all values seen.
-                exclusion[key] = values
-              }
-              // Got only a single value this time
-              else {
-                // Record its value as a string
-                if (options.alwaysArray) {
-                  // ...but we always return an array if option alwaysArray is true
-                  exclusion[key] = [value]
-                } else {
-                  // Record its value as a string
-                  exclusion[key] = value
-                }
-              }
-            }
-          } else {
-            // If we already have seen that keyword...
-            if (query[key]) {
-              // ...many times...
-              if (query[key] instanceof Array) {
-                // ...and got several values this time...
-                if (values.length > 1) {
-                  // ... concatenate both arrays.
-                  query[key] = query[key].concat(values)
-                } else {
-                  // ... append the current single value.
-                  query[key].push(value)
-                }
-              }
-              // We saw that keyword only once before
-              else {
-                // Put both the current value and the new
-                // value in an array
-                query[key] = [query[key]]
-                query[key].push(value)
-              }
-            }
-            // First time we see that keyword
-            else {
-              // ...and got several values this time...
-              if (values.length > 1) {
-                // ...add all values seen.
-                query[key] = values
-              }
-              // Got only a single value this time
-              else {
-                if (options.alwaysArray) {
-                  // ...but we always return an array if option alwaysArray is true
-                  query[key] = [value]
-                } else {
-                  // Record its value as a string
-                  query[key] = value
-                }
-              }
-            }
-          }
-        }
-      }
-      // The key allows a range
-      else if (isRange) {
-        // When offsets is true, push a new offset
-        if (options.offsets) {
-          query.offsets.push(term)
-        }
-
-        const value = term.value
-        // Range are separated with a dash
-        const rangeValues = value.split('-')
-        // When both end of the range are specified
-        // keyword:XXXX-YYYY
-        query[key] = {}
-        if (rangeValues.length === 2) {
-          query[key].from = rangeValues[0]
-          query[key].to = rangeValues[1]
-        }
-          // When pairs of ranges are specified
-          // keyword:XXXX-YYYY,AAAA-BBBB
-          // else if (!rangeValues.length % 2) {
-          // }
-          // When only getting a single value,
-        // or an odd number of values
-        else {
-          query[key].from = value
-        }
-      } else {
-        // We add it as pure text
-        const text = term.keyword + ':' + term.value
-        query.text.push(text)
-
-        // When offsets is true, push a new offset
-        if (options.offsets) {
-          query.offsets.push({
-            text: text,
-            offsetStart: term.offsetStart,
-            offsetEnd: term.offsetEnd,
-          })
-        }
-      }
-    }
-  }
-
-  // Return forged query object
-  query.exclude = exclusion
-  return query
-}