From 610272b88512a9f4e5ec877c1c20689f5ba234c0 Mon Sep 17 00:00:00 2001 From: Simon Cambier Date: Thu, 11 Apr 2024 12:50:16 +0200 Subject: [PATCH] try-catch the url extractor part of the tokenizer, should fix #362 --- src/search/tokenizer.ts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/search/tokenizer.ts b/src/search/tokenizer.ts index 29816e2..c0c0c5a 100644 --- a/src/search/tokenizer.ts +++ b/src/search/tokenizer.ts @@ -24,7 +24,12 @@ function tokenizeTokens(text: string): string[] { */ export function tokenizeForIndexing(text: string): string[] { const words = tokenizeWords(text) - const urls: string[] = markdownLinkExtractor(text) + let urls: string[] = [] + try { + urls = markdownLinkExtractor(text) + } catch (e) { + logDebug('Error extracting urls', e) + } let tokens = tokenizeTokens(text) @@ -63,7 +68,6 @@ export function tokenizeForIndexing(text: string): string[] { * @returns */ export function tokenizeForSearch(text: string): QueryCombination { - // Extract urls and remove them from the query const urls: string[] = markdownLinkExtractor(text) text = urls.reduce((acc, url) => acc.replace(url, ''), text)