From 610272b88512a9f4e5ec877c1c20689f5ba234c0 Mon Sep 17 00:00:00 2001
From: Simon Cambier <simon.cambier@protonmail.com>
Date: Thu, 11 Apr 2024 12:50:16 +0200
Subject: [PATCH] try-catch the url extractor part of the tokenizer, should fix
 #362

---
 src/search/tokenizer.ts | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/search/tokenizer.ts b/src/search/tokenizer.ts
index 29816e2..c0c0c5a 100644
--- a/src/search/tokenizer.ts
+++ b/src/search/tokenizer.ts
@@ -24,7 +24,12 @@ function tokenizeTokens(text: string): string[] {
  */
 export function tokenizeForIndexing(text: string): string[] {
   const words = tokenizeWords(text)
-  const urls: string[] = markdownLinkExtractor(text)
+  let urls: string[] = []
+  try {
+    urls = markdownLinkExtractor(text)
+  } catch (e) {
+    logDebug('Error extracting urls', e)
+  }
 
   let tokens = tokenizeTokens(text)
 
@@ -63,7 +68,6 @@ export function tokenizeForIndexing(text: string): string[] {
  * @returns
  */
 export function tokenizeForSearch(text: string): QueryCombination {
-
   // Extract urls and remove them from the query
   const urls: string[] = markdownLinkExtractor(text)
   text = urls.reduce((acc, url) => acc.replace(url, ''), text)