try-catch the url extractor part of the tokenizer, should fix #362

2024-04-11 12:50:16 +02:00
parent ae920814e0
commit 610272b885
1 changed files with 6 additions and 2 deletions
--- a/src/search/tokenizer.ts
+++ b/src/search/tokenizer.ts
@@ -24,7 +24,12 @@ function tokenizeTokens(text: string): string[] {
 */
 export function tokenizeForIndexing(text: string): string[] {
  const words = tokenizeWords(text)
-  const urls: string[] = markdownLinkExtractor(text)
+  let urls: string[] = []
+  try {
+    urls = markdownLinkExtractor(text)
+  } catch (e) {
+    logDebug('Error extracting urls', e)
+  }

  let tokens = tokenizeTokens(text)

@@ -63,7 +68,6 @@ export function tokenizeForIndexing(text: string): string[] {
 * @returns
 */
 export function tokenizeForSearch(text: string): QueryCombination {
-
  // Extract urls and remove them from the query
  const urls: string[] = markdownLinkExtractor(text)
  text = urls.reduce((acc, url) => acc.replace(url, ''), text)