diff --git a/src/components/ResultItemInFile.svelte b/src/components/ResultItemInFile.svelte
index db9bfa5..096d9db 100644
--- a/src/components/ResultItemInFile.svelte
+++ b/src/components/ResultItemInFile.svelte
@@ -1,8 +1,5 @@
- {@html highlightText(cleanedContent.content, matchesExcerpt)}
+ {@html highlightText(cleanedContent, matchesExcerpt)}
diff --git a/src/components/ResultItemVault.svelte b/src/components/ResultItemVault.svelte
index 9357dae..b1f9970 100644
--- a/src/components/ResultItemVault.svelte
+++ b/src/components/ResultItemVault.svelte
@@ -40,10 +40,6 @@
$: reg = stringsToRegex(note.foundWords)
$: matchesTitle = getMatches(title, reg)
$: matchesNotePath = getMatches(notePath, reg)
- $: matchesExcerpt = cloneDeep(note.matches).map(m => {
- m.offset = m.offset - cleanedContent.offset
- return m
- })
$: cleanedContent = makeExcerpt(note.content, note.matches[0]?.offset ?? -1)
$: glyph = false //cacheManager.getLiveDocument(note.path)?.doesNotExist
$: {
@@ -102,7 +98,7 @@
{#if $showExcerpt}
- {@html highlightText(cleanedContent.content, matchesExcerpt)}
+ {@html highlightText(cleanedContent, note.matches)}
{/if}
diff --git a/src/search/omnisearch.ts b/src/search/omnisearch.ts
index 4411dc3..88e9cb3 100644
--- a/src/search/omnisearch.ts
+++ b/src/search/omnisearch.ts
@@ -190,6 +190,8 @@ export class Omnisearch {
headings3: settings.weightH3,
unmarkedTags: settings.weightUnmarkedTags,
},
+ // The query is already tokenized, don't tokenize again
+ tokenize: text => [text],
})
logDebug('Found', results.length, 'results')
@@ -404,7 +406,7 @@ export class Omnisearch {
// Tags, starting with #
...query.getTags(),
- ].filter(w => w.length > 1 || /\p{Emoji}/u.test(w))
+ ]
logDebug('Matching tokens:', foundWords)
logDebug('Getting matches locations...')
diff --git a/src/search/tokenizer.ts b/src/search/tokenizer.ts
index 2fca57e..29816e2 100644
--- a/src/search/tokenizer.ts
+++ b/src/search/tokenizer.ts
@@ -6,6 +6,7 @@ import {
getChsSegmenter,
} from 'src/globals'
import { logDebug, splitCamelCase, splitHyphens } from 'src/tools/utils'
+const markdownLinkExtractor = require('markdown-link-extractor')
function tokenizeWords(text: string): string[] {
return text.split(BRACKETS_AND_SPACE)
@@ -23,6 +24,7 @@ function tokenizeTokens(text: string): string[] {
*/
export function tokenizeForIndexing(text: string): string[] {
const words = tokenizeWords(text)
+ const urls: string[] = markdownLinkExtractor(text)
let tokens = tokenizeTokens(text)
@@ -35,6 +37,11 @@ export function tokenizeForIndexing(text: string): string[] {
// Add whole words (aka "not tokens")
tokens = [...tokens, ...words]
+ // Add urls
+ if (urls.length) {
+ tokens = [...tokens, ...urls]
+ }
+
const chsSegmenter = getChsSegmenter()
if (chsSegmenter) {
const chs = tokens.flatMap(word =>
@@ -56,7 +63,12 @@ export function tokenizeForIndexing(text: string): string[] {
* @returns
*/
export function tokenizeForSearch(text: string): QueryCombination {
- const tokens = tokenizeTokens(text)
+
+ // Extract urls and remove them from the query
+ const urls: string[] = markdownLinkExtractor(text)
+ text = urls.reduce((acc, url) => acc.replace(url, ''), text)
+
+ const tokens = [...tokenizeTokens(text), ...urls].filter(Boolean)
let chs: string[] = []
const chsSegmenter = getChsSegmenter()
@@ -70,7 +82,7 @@ export function tokenizeForSearch(text: string): QueryCombination {
combineWith: 'OR',
queries: [
{ combineWith: 'AND', queries: tokens },
- { combineWith: 'AND', queries: tokenizeWords(text) },
+ { combineWith: 'AND', queries: tokenizeWords(text).filter(Boolean) },
{ combineWith: 'AND', queries: tokens.flatMap(splitHyphens) },
{ combineWith: 'AND', queries: tokens.flatMap(splitCamelCase) },
{ combineWith: 'AND', queries: chs },
diff --git a/src/tools/api.ts b/src/tools/api.ts
index 711253c..93abe7a 100644
--- a/src/tools/api.ts
+++ b/src/tools/api.ts
@@ -44,7 +44,7 @@ function mapResults(results: ResultNote[]): ResultNoteApi[] {
offset: match.offset,
}
}),
- excerpt: excerpt.content,
+ excerpt: excerpt,
}
return res
diff --git a/src/tools/text-processing.ts b/src/tools/text-processing.ts
index f9d2047..481ea3d 100644
--- a/src/tools/text-processing.ts
+++ b/src/tools/text-processing.ts
@@ -6,15 +6,12 @@ import {
regexStripQuotes,
excerptAfter,
excerptBefore,
- SEPARATORS,
} from 'src/globals'
import { settings } from 'src/settings'
import { removeDiacritics, warnDebug } from './utils'
import type { Query } from 'src/search/query'
import { Notice } from 'obsidian'
import { escapeRegExp } from 'lodash-es'
-import { tokenizeForSearch } from 'src/search/tokenizer'
-import type { QueryCombination } from 'minisearch'
/**
* Wraps the matches in the text with a
element and a highlight class
@@ -115,14 +112,19 @@ export function stringsToRegex(strings: string[]): RegExp {
return new RegExp(`${joined}`, 'gui')
}
+/**
+ * Returns an array of matches in the text, using the provided regex
+ * @param text
+ * @param reg
+ * @param query
+ */
export function getMatches(
text: string,
reg: RegExp,
query?: Query
): SearchMatch[] {
- const separatorRegExp = new RegExp(SEPARATORS, 'gu')
const originalText = text
- text = text.toLowerCase().replace(separatorRegExp, ' ')
+ // text = text.toLowerCase().replace(new RegExp(SEPARATORS, 'gu'), ' ')
if (settings.ignoreDiacritics) {
text = removeDiacritics(text)
}
@@ -153,21 +155,16 @@ export function getMatches(
) {
const best = text.indexOf(query.getBestStringForExcerpt())
if (best > -1 && matches.find(m => m.offset === best)) {
- matches = matches.filter(m => m.offset !== best)
matches.unshift({
offset: best,
match: query.getBestStringForExcerpt(),
})
}
}
-
return matches
}
-export function makeExcerpt(
- content: string,
- offset: number
-): { content: string; offset: number } {
+export function makeExcerpt(content: string, offset: number): string {
try {
const pos = offset ?? -1
const from = Math.max(0, pos - excerptBefore)
@@ -201,14 +198,14 @@ export function makeExcerpt(
content = content.trim().replaceAll('\n', '
')
}
- return { content: content, offset: pos }
+ return content
} catch (e) {
new Notice(
'Omnisearch - Error while creating excerpt, see developer console'
)
console.error(`Omnisearch - Error while creating excerpt`)
console.error(e)
- return { content: '', offset: -1 }
+ return ''
}
}