try-catch the url extractor part of the tokenizer, should fix #362

This commit is contained in:
Simon Cambier
2024-04-11 12:50:16 +02:00
parent ae920814e0
commit 610272b885

View File

@@ -24,7 +24,12 @@ function tokenizeTokens(text: string): string[] {
*/ */
export function tokenizeForIndexing(text: string): string[] { export function tokenizeForIndexing(text: string): string[] {
const words = tokenizeWords(text) const words = tokenizeWords(text)
const urls: string[] = markdownLinkExtractor(text) let urls: string[] = []
try {
urls = markdownLinkExtractor(text)
} catch (e) {
logDebug('Error extracting urls', e)
}
let tokens = tokenizeTokens(text) let tokens = tokenizeTokens(text)
@@ -63,7 +68,6 @@ export function tokenizeForIndexing(text: string): string[] {
* @returns * @returns
*/ */
export function tokenizeForSearch(text: string): QueryCombination { export function tokenizeForSearch(text: string): QueryCombination {
// Extract urls and remove them from the query // Extract urls and remove them from the query
const urls: string[] = markdownLinkExtractor(text) const urls: string[] = markdownLinkExtractor(text)
text = urls.reduce((acc, url) => acc.replace(url, ''), text) text = urls.reduce((acc, url) => acc.replace(url, ''), text)