Merge branch 'feature/176-CamelCaseTokens' into develop

2023-03-02 21:33:31 +01:00
parent 1109d85593 1901bcf55b
commit a9352bbef9
4 changed files with 48 additions and 11 deletions
@@ -14,7 +14,7 @@ export const excerptBefore = 100
 export const excerptAfter = 300

 export const highlightClass = `suggestion-highlight omnisearch-highlight ${
-  settings.hightlight ? 'omnisearch-default-highlight' : ''
+  settings.highlight ? 'omnisearch-default-highlight' : ''
 }`

 export const eventBus = new EventBus()
@@ -10,6 +10,7 @@ import { settings } from '../settings'
 import {
  chunkArray,
  removeDiacritics,
+  splitCamelCase,
  stringsToRegex,
  stripMarkdownCharacters,
 } from '../tools/utils'
@@ -25,7 +26,11 @@ const tokenize = (text: string): string[] => {
    return tokens.flatMap(word =>
      chsRegex.test(word) ? chsSegmenter.cut(word) : [word]
    )
-  } else return tokens
+  } else {
+    if (settings.splitCamelCase)
+      return [...tokens, ...tokens.flatMap(splitCamelCase)]
+    return tokens
+  }
 }

 export class Omnisearch {
@@ -45,7 +45,8 @@ export interface OmnisearchSettings extends WeightingSettings {
  welcomeMessage: string
  /** If a query returns 0 result, try again with more relax conditions */
  simpleSearch: boolean
-  hightlight: boolean
+  highlight: boolean
+  splitCamelCase: boolean
 }

 /**
@@ -204,6 +205,25 @@ export class SettingsTab extends PluginSettingTab {
        })
      )

+    // Split CamelCaseWords
+    const camelCaseDesc = new DocumentFragment()
+    camelCaseDesc.createSpan({}, span => {
+      span.innerHTML = `Enable this if you want to be able to search for CamelCaseWords as separate words.<br/>        
+        ⚠️ <span style="color: var(--text-accent)">Changing this setting will clear the cache.</span><br>
+        <strong style="color: var(--text-accent)">Needs a restart to fully take effect.</strong>
+        `
+    })
+    new Setting(containerEl)
+      .setName('Split CamelCaseWords')
+      .setDesc(camelCaseDesc)
+      .addToggle(toggle =>
+        toggle.setValue(settings.splitCamelCase).onChange(async v => {
+          await database.clearCache()
+          settings.splitCamelCase = v
+          await saveSettings(this.plugin)
+        })
+      )
+
    // Simpler search
    new Setting(containerEl)
      .setName('Simpler search')
@@ -301,8 +321,8 @@ export class SettingsTab extends PluginSettingTab {
        'Will highlight matching results when enabled. See README for more customization options.'
      )
      .addToggle(toggle =>
-        toggle.setValue(settings.hightlight).onChange(async v => {
-          settings.hightlight = v
+        toggle.setValue(settings.highlight).onChange(async v => {
+          settings.highlight = v
          await saveSettings(this.plugin)
        })
      )
@@ -379,12 +399,13 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = {
  indexedFileTypes: [] as string[],
  PDFIndexing: false,
  imagesIndexing: false,
+  splitCamelCase: false,

  ribbonIcon: true,
  showExcerpt: true,
  renderLineReturnInExcerpts: true,
  showCreateButton: false,
-  hightlight: true,
+  highlight: true,
  showPreviousQueryResults: true,
  simpleSearch: false,

@@ -91,14 +91,17 @@ export function getAllIndices(text: string, regex: RegExp): SearchMatch[] {
 */
 export function stringsToRegex(strings: string[]): RegExp {
  if (!strings.length) return /^$/g
-  // Default word split is not applied if the user uses the cm-chs-patch plugin
  const joined =
    '(' +
-    (getChsSegmenter() ? '' : `^|${SPACE_OR_PUNCTUATION.source}`) +
+    // Default word split is not applied if the user uses the cm-chs-patch plugin
+    (getChsSegmenter()
+      ? ''
+      : // Split on start of line, spaces, punctuation, or capital letters (for camelCase)
+      settings.splitCamelCase
+      ? `^|${SPACE_OR_PUNCTUATION.source}|[A-Z]`
+      : `^|${SPACE_OR_PUNCTUATION.source}`) +
    ')' +
-    '(' +
-    strings.map(s => escapeRegex(s)).join('|') +
-    ')'
+    `(${strings.map(s => escapeRegex(s)).join('|')})`

  const reg = new RegExp(`${joined}`, 'giu')
  return reg
@@ -313,3 +316,11 @@ export function chunkArray<T>(arr: T[], len: number): T[][] {

  return chunks
 }
+
+/**
+ * Converts a 'fooBarBAZLorem' into ['foo', 'Bar', 'BAZ', 'Lorem]
+ * @param text
+ */
+export function splitCamelCase(text: string): string[] {
+  return text.replace(/([a-z](?=[A-Z]))/g, '$1 ').split(' ')
+}