Merge branch 'develop'

# Conflicts: # manifest-beta.json # src/settings.ts # versions.json
2023-07-22 23:17:43 +02:00
parent 3ac85e7aa8 0b9a7099fe
commit 8f1638da2c
9 changed files with 215 additions and 90 deletions
--- a/src/search/omnisearch.ts
+++ b/src/search/omnisearch.ts
@@ -12,6 +12,7 @@ import {
  logDebug,
  removeDiacritics,
  splitCamelCase,
+  splitHyphens,
  stringsToRegex,
  stripMarkdownCharacters,
  warnDebug,
@@ -22,17 +23,22 @@ import { cacheManager } from '../cache-manager'
 import { sortBy } from 'lodash-es'

 const tokenize = (text: string): string[] => {
-  const tokens = text.split(SPACE_OR_PUNCTUATION)
+  let tokens = text.split(SPACE_OR_PUNCTUATION)
+
+  // When enabled, we only use the chsSegmenter,
+  // and not the other custom tokenizers
  const chsSegmenter = getChsSegmenter()
  if (chsSegmenter) {
-    return tokens.flatMap(word =>
+    tokens = tokens.flatMap(word =>
      chsRegex.test(word) ? chsSegmenter.cut(word) : [word]
    )
  } else {
-    if (settings.splitCamelCase)
-      return [...tokens, ...tokens.flatMap(splitCamelCase)]
-    return tokens
+    // Split camelCase tokens into "camel" and "case
+    tokens = [...tokens, ...tokens.flatMap(splitCamelCase)]
+    // Split hyphenated tokens
+    tokens = [...tokens, ...tokens.flatMap(splitHyphens)]
  }
+  return tokens
 }

 export class Omnisearch {
@@ -182,12 +188,26 @@ export class Omnisearch {

    logDebug('Starting search for', query)

+    let fuzziness: number
+    switch (settings.fuzziness) {
+      case '0':
+        fuzziness = 0
+        break
+      case '1':
+        fuzziness = 0.1
+        break
+      default:
+        fuzziness = 0.2
+        break
+    }
+
    let results = this.minisearch.search(query.segmentsToStr(), {
      prefix: term => term.length >= options.prefixLength,
      // length <= 3: no fuzziness
      // length <= 5: fuzziness of 10%
      // length > 5: fuzziness of 20%
-      fuzzy: term => (term.length <= 3 ? 0 : term.length <= 5 ? 0.1 : 0.2),
+      fuzzy: term =>
+        term.length <= 3 ? 0 : term.length <= 5 ? fuzziness / 2 : fuzziness,
      combineWith: 'AND',
      boost: {
        basename: settings.weightBasename,
--- a/src/search/query.ts
+++ b/src/search/query.ts
@@ -9,11 +9,10 @@ type Keywords = {
 } & { text: string[] }

 export class Query {
-  query: Keywords & { exclude: Keywords }
-  /**
-   * @deprecated
-   */
-  extensions: string[] = []
+  query: Keywords & {
+    exclude: Keywords
+  }
+  #inQuotes: string[]

  constructor(text = '') {
    if (settings.ignoreDiacritics) {
@@ -44,7 +43,10 @@ export class Query {
      }
    }
    this.query = parsed
-    this.extensions = this.query.ext ?? []
+
+    // Get strings in quotes, and remove the quotes
+    this.#inQuotes =
+      text.match(/"([^"]+)"/g)?.map(o => o.replace(/"/g, '')) ?? []
  }

  public isEmpty(): boolean {
@@ -62,7 +64,7 @@ export class Query {
  public segmentsToStr(): string {
    return this.query.text.join(' ')
  }
-  
+
  public getTags(): string[] {
    return this.query.text.filter(o => o.startsWith('#'))
  }
@@ -72,6 +74,11 @@ export class Query {
  }

  public getExactTerms(): string[] {
-    return this.query.text.filter(o => o.split(' ').length > 1)
+    return [
+      ...new Set([
+        ...this.query.text.filter(o => o.split(' ').length > 1),
+        ...this.#inQuotes,
+      ]),
+    ]
  }
 }