try-catch the url extractor part of the tokenizer, should fix #362

This commit is contained in:
Simon Cambier
2024-04-11 12:50:16 +02:00
parent ae920814e0
commit 610272b885

View File

@@ -24,7 +24,12 @@ function tokenizeTokens(text: string): string[] {
*/
export function tokenizeForIndexing(text: string): string[] {
const words = tokenizeWords(text)
const urls: string[] = markdownLinkExtractor(text)
let urls: string[] = []
try {
urls = markdownLinkExtractor(text)
} catch (e) {
logDebug('Error extracting urls', e)
}
let tokens = tokenizeTokens(text)
@@ -63,7 +68,6 @@ export function tokenizeForIndexing(text: string): string[] {
* @returns
*/
export function tokenizeForSearch(text: string): QueryCombination {
// Extract urls and remove them from the query
const urls: string[] = markdownLinkExtractor(text)
text = urls.reduce((acc, url) => acc.replace(url, ''), text)