#363 - Gate url tokenization behind a setting

This commit is contained in:
Simon Cambier
2024-04-15 07:18:17 +02:00
parent e5ac29c8ff
commit 13c7ec7c78
2 changed files with 24 additions and 5 deletions

View File

@@ -5,6 +5,7 @@ import {
chsRegex,
getChsSegmenter,
} from 'src/globals'
import { settings } from 'src/settings'
import { logDebug, splitCamelCase, splitHyphens } from 'src/tools/utils'
const markdownLinkExtractor = require('markdown-link-extractor')
@@ -25,10 +26,12 @@ function tokenizeTokens(text: string): string[] {
export function tokenizeForIndexing(text: string): string[] {
const words = tokenizeWords(text)
let urls: string[] = []
try {
urls = markdownLinkExtractor(text)
} catch (e) {
logDebug('Error extracting urls', e)
if (settings.tokenizeUrls) {
try {
urls = markdownLinkExtractor(text)
} catch (e) {
logDebug('Error extracting urls', e)
}
}
let tokens = tokenizeTokens(text)