#363 - Gate url tokenization behind a setting

This commit is contained in:
Simon Cambier
2024-04-15 07:18:17 +02:00
parent e5ac29c8ff
commit 13c7ec7c78
2 changed files with 24 additions and 5 deletions

View File

@@ -5,6 +5,7 @@ import {
chsRegex,
getChsSegmenter,
} from 'src/globals'
import { settings } from 'src/settings'
import { logDebug, splitCamelCase, splitHyphens } from 'src/tools/utils'
const markdownLinkExtractor = require('markdown-link-extractor')
@@ -25,11 +26,13 @@ function tokenizeTokens(text: string): string[] {
export function tokenizeForIndexing(text: string): string[] {
const words = tokenizeWords(text)
let urls: string[] = []
if (settings.tokenizeUrls) {
try {
urls = markdownLinkExtractor(text)
} catch (e) {
logDebug('Error extracting urls', e)
}
}
let tokens = tokenizeTokens(text)

View File

@@ -58,6 +58,7 @@ export interface OmnisearchSettings extends WeightingSettings {
welcomeMessage: string
/** If a query returns 0 result, try again with more relax conditions */
simpleSearch: boolean
tokenizeUrls: boolean
highlight: boolean
splitCamelCase: boolean
openInNewPane: boolean
@@ -199,7 +200,7 @@ export class SettingsTab extends PluginSettingTab {
.setValue(settings.unsupportedFilesIndexing)
.onChange(async v => {
await database.clearCache()
; (settings.unsupportedFilesIndexing as any) = v
;(settings.unsupportedFilesIndexing as any) = v
await saveSettings(this.plugin)
})
})
@@ -321,6 +322,20 @@ export class SettingsTab extends PluginSettingTab {
})
)
// Extract URLs
new Setting(containerEl)
.setName('Tokenize URLs')
.setDesc(
`Enable this if you want to be able to search for URLs as separate words.
This have a strong impact on indexing performance, and can crash Obsidian under certain conditions.`
)
.addToggle(toggle =>
toggle.setValue(settings.tokenizeUrls).onChange(async v => {
settings.tokenizeUrls = v
await saveSettings(this.plugin)
})
)
// Open in new pane
new Setting(containerEl)
.setName('Open in new pane')
@@ -659,6 +674,7 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = {
highlight: true,
showPreviousQueryResults: true,
simpleSearch: false,
tokenizeUrls: false,
fuzziness: '1',
weightBasename: 3,