#176 - WIP tokenization of CamelCase words

Technically works, but highlighting needs a rework
2023-02-24 12:16:40 +01:00
parent 4df09aa70c
commit 96b4ac631d
2 changed files with 10 additions and 1 deletions
--- a/src/search/omnisearch.ts
+++ b/src/search/omnisearch.ts
@@ -10,6 +10,7 @@ import { settings } from '../settings'
 import {
  chunkArray,
  removeDiacritics,
+  splitCamelCase,
  stringsToRegex,
  stripMarkdownCharacters,
 } from '../tools/utils'
@@ -25,7 +26,7 @@ const tokenize = (text: string): string[] => {
    return tokens.flatMap(word =>
      chsRegex.test(word) ? chsSegmenter.cut(word) : [word]
    )
-  } else return tokens
+  } else return tokens.flatMap(splitCamelCase)
 }

 export class Omnisearch {
--- a/src/tools/utils.ts
+++ b/src/tools/utils.ts
@@ -307,3 +307,11 @@ export function chunkArray<T>(arr: T[], len: number): T[][] {

  return chunks
 }
+
+/**
+ * Converts a 'fooBarBAZLorem' into ['foo', 'Bar', 'BAZ', 'Lorem]
+ * @param text
+ */
+export function splitCamelCase(text: string): string[] {
+  return text.replace(/([a-z](?=[A-Z]))/g, '$1 ').split(' ')
+}