From 13c7ec7c7860afcb97cc8232955fc0a3dc330a2c Mon Sep 17 00:00:00 2001 From: Simon Cambier Date: Mon, 15 Apr 2024 07:18:17 +0200 Subject: [PATCH] #363 - Gate url tokenization behind a setting --- src/search/tokenizer.ts | 11 +++++++---- src/settings.ts | 18 +++++++++++++++++- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/search/tokenizer.ts b/src/search/tokenizer.ts index c0c0c5a..3eb8ecc 100644 --- a/src/search/tokenizer.ts +++ b/src/search/tokenizer.ts @@ -5,6 +5,7 @@ import { chsRegex, getChsSegmenter, } from 'src/globals' +import { settings } from 'src/settings' import { logDebug, splitCamelCase, splitHyphens } from 'src/tools/utils' const markdownLinkExtractor = require('markdown-link-extractor') @@ -25,10 +26,12 @@ function tokenizeTokens(text: string): string[] { export function tokenizeForIndexing(text: string): string[] { const words = tokenizeWords(text) let urls: string[] = [] - try { - urls = markdownLinkExtractor(text) - } catch (e) { - logDebug('Error extracting urls', e) + if (settings.tokenizeUrls) { + try { + urls = markdownLinkExtractor(text) + } catch (e) { + logDebug('Error extracting urls', e) + } } let tokens = tokenizeTokens(text) diff --git a/src/settings.ts b/src/settings.ts index be63474..eb48af4 100644 --- a/src/settings.ts +++ b/src/settings.ts @@ -58,6 +58,7 @@ export interface OmnisearchSettings extends WeightingSettings { welcomeMessage: string /** If a query returns 0 result, try again with more relax conditions */ simpleSearch: boolean + tokenizeUrls: boolean highlight: boolean splitCamelCase: boolean openInNewPane: boolean @@ -199,7 +200,7 @@ export class SettingsTab extends PluginSettingTab { .setValue(settings.unsupportedFilesIndexing) .onChange(async v => { await database.clearCache() - ; (settings.unsupportedFilesIndexing as any) = v + ;(settings.unsupportedFilesIndexing as any) = v await saveSettings(this.plugin) }) }) @@ -321,6 +322,20 @@ export class SettingsTab extends PluginSettingTab { }) ) + // Extract URLs + new Setting(containerEl) + .setName('Tokenize URLs') + .setDesc( + `Enable this if you want to be able to search for URLs as separate words. + This have a strong impact on indexing performance, and can crash Obsidian under certain conditions.` + ) + .addToggle(toggle => + toggle.setValue(settings.tokenizeUrls).onChange(async v => { + settings.tokenizeUrls = v + await saveSettings(this.plugin) + }) + ) + // Open in new pane new Setting(containerEl) .setName('Open in new pane') @@ -659,6 +674,7 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = { highlight: true, showPreviousQueryResults: true, simpleSearch: false, + tokenizeUrls: false, fuzziness: '1', weightBasename: 3,