#363 - Gate url tokenization behind a setting

This commit is contained in:
Simon Cambier
2024-04-15 07:18:17 +02:00
parent e5ac29c8ff
commit 13c7ec7c78
2 changed files with 24 additions and 5 deletions

View File

@@ -5,6 +5,7 @@ import {
chsRegex, chsRegex,
getChsSegmenter, getChsSegmenter,
} from 'src/globals' } from 'src/globals'
import { settings } from 'src/settings'
import { logDebug, splitCamelCase, splitHyphens } from 'src/tools/utils' import { logDebug, splitCamelCase, splitHyphens } from 'src/tools/utils'
const markdownLinkExtractor = require('markdown-link-extractor') const markdownLinkExtractor = require('markdown-link-extractor')
@@ -25,10 +26,12 @@ function tokenizeTokens(text: string): string[] {
export function tokenizeForIndexing(text: string): string[] { export function tokenizeForIndexing(text: string): string[] {
const words = tokenizeWords(text) const words = tokenizeWords(text)
let urls: string[] = [] let urls: string[] = []
try { if (settings.tokenizeUrls) {
urls = markdownLinkExtractor(text) try {
} catch (e) { urls = markdownLinkExtractor(text)
logDebug('Error extracting urls', e) } catch (e) {
logDebug('Error extracting urls', e)
}
} }
let tokens = tokenizeTokens(text) let tokens = tokenizeTokens(text)

View File

@@ -58,6 +58,7 @@ export interface OmnisearchSettings extends WeightingSettings {
welcomeMessage: string welcomeMessage: string
/** If a query returns 0 result, try again with more relax conditions */ /** If a query returns 0 result, try again with more relax conditions */
simpleSearch: boolean simpleSearch: boolean
tokenizeUrls: boolean
highlight: boolean highlight: boolean
splitCamelCase: boolean splitCamelCase: boolean
openInNewPane: boolean openInNewPane: boolean
@@ -199,7 +200,7 @@ export class SettingsTab extends PluginSettingTab {
.setValue(settings.unsupportedFilesIndexing) .setValue(settings.unsupportedFilesIndexing)
.onChange(async v => { .onChange(async v => {
await database.clearCache() await database.clearCache()
; (settings.unsupportedFilesIndexing as any) = v ;(settings.unsupportedFilesIndexing as any) = v
await saveSettings(this.plugin) await saveSettings(this.plugin)
}) })
}) })
@@ -321,6 +322,20 @@ export class SettingsTab extends PluginSettingTab {
}) })
) )
// Extract URLs
new Setting(containerEl)
.setName('Tokenize URLs')
.setDesc(
`Enable this if you want to be able to search for URLs as separate words.
This have a strong impact on indexing performance, and can crash Obsidian under certain conditions.`
)
.addToggle(toggle =>
toggle.setValue(settings.tokenizeUrls).onChange(async v => {
settings.tokenizeUrls = v
await saveSettings(this.plugin)
})
)
// Open in new pane // Open in new pane
new Setting(containerEl) new Setting(containerEl)
.setName('Open in new pane') .setName('Open in new pane')
@@ -659,6 +674,7 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = {
highlight: true, highlight: true,
showPreviousQueryResults: true, showPreviousQueryResults: true,
simpleSearch: false, simpleSearch: false,
tokenizeUrls: false,
fuzziness: '1', fuzziness: '1',
weightBasename: 3, weightBasename: 3,