Highlighting rework, should help with #304
This commit is contained in:
@@ -1,8 +1,5 @@
|
|||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import {
|
import { makeExcerpt, highlightText } from 'src/tools/text-processing'
|
||||||
makeExcerpt,
|
|
||||||
highlightText,
|
|
||||||
} from 'src/tools/text-processing'
|
|
||||||
import type { ResultNote } from '../globals'
|
import type { ResultNote } from '../globals'
|
||||||
import ResultItemContainer from './ResultItemContainer.svelte'
|
import ResultItemContainer from './ResultItemContainer.svelte'
|
||||||
import { cloneDeep } from 'lodash-es'
|
import { cloneDeep } from 'lodash-es'
|
||||||
@@ -13,10 +10,7 @@
|
|||||||
export let selected = false
|
export let selected = false
|
||||||
|
|
||||||
$: cleanedContent = makeExcerpt(note?.content ?? '', offset)
|
$: cleanedContent = makeExcerpt(note?.content ?? '', offset)
|
||||||
$: matchesExcerpt = cloneDeep(note.matches).map(m => {
|
$: matchesExcerpt = cloneDeep(note.matches)
|
||||||
m.offset = m.offset - cleanedContent.offset
|
|
||||||
return m
|
|
||||||
})
|
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<ResultItemContainer
|
<ResultItemContainer
|
||||||
@@ -26,6 +20,6 @@
|
|||||||
on:click
|
on:click
|
||||||
on:auxclick>
|
on:auxclick>
|
||||||
<div class="omnisearch-result__body">
|
<div class="omnisearch-result__body">
|
||||||
{@html highlightText(cleanedContent.content, matchesExcerpt)}
|
{@html highlightText(cleanedContent, matchesExcerpt)}
|
||||||
</div>
|
</div>
|
||||||
</ResultItemContainer>
|
</ResultItemContainer>
|
||||||
|
|||||||
@@ -40,10 +40,6 @@
|
|||||||
$: reg = stringsToRegex(note.foundWords)
|
$: reg = stringsToRegex(note.foundWords)
|
||||||
$: matchesTitle = getMatches(title, reg)
|
$: matchesTitle = getMatches(title, reg)
|
||||||
$: matchesNotePath = getMatches(notePath, reg)
|
$: matchesNotePath = getMatches(notePath, reg)
|
||||||
$: matchesExcerpt = cloneDeep(note.matches).map(m => {
|
|
||||||
m.offset = m.offset - cleanedContent.offset
|
|
||||||
return m
|
|
||||||
})
|
|
||||||
$: cleanedContent = makeExcerpt(note.content, note.matches[0]?.offset ?? -1)
|
$: cleanedContent = makeExcerpt(note.content, note.matches[0]?.offset ?? -1)
|
||||||
$: glyph = false //cacheManager.getLiveDocument(note.path)?.doesNotExist
|
$: glyph = false //cacheManager.getLiveDocument(note.path)?.doesNotExist
|
||||||
$: {
|
$: {
|
||||||
@@ -102,7 +98,7 @@
|
|||||||
<div style="display: flex; flex-direction: row;">
|
<div style="display: flex; flex-direction: row;">
|
||||||
{#if $showExcerpt}
|
{#if $showExcerpt}
|
||||||
<div class="omnisearch-result__body">
|
<div class="omnisearch-result__body">
|
||||||
{@html highlightText(cleanedContent.content, matchesExcerpt)}
|
{@html highlightText(cleanedContent, note.matches)}
|
||||||
</div>
|
</div>
|
||||||
{/if}
|
{/if}
|
||||||
|
|
||||||
|
|||||||
@@ -190,6 +190,8 @@ export class Omnisearch {
|
|||||||
headings3: settings.weightH3,
|
headings3: settings.weightH3,
|
||||||
unmarkedTags: settings.weightUnmarkedTags,
|
unmarkedTags: settings.weightUnmarkedTags,
|
||||||
},
|
},
|
||||||
|
// The query is already tokenized, don't tokenize again
|
||||||
|
tokenize: text => [text],
|
||||||
})
|
})
|
||||||
|
|
||||||
logDebug('Found', results.length, 'results')
|
logDebug('Found', results.length, 'results')
|
||||||
@@ -404,7 +406,7 @@ export class Omnisearch {
|
|||||||
|
|
||||||
// Tags, starting with #
|
// Tags, starting with #
|
||||||
...query.getTags(),
|
...query.getTags(),
|
||||||
].filter(w => w.length > 1 || /\p{Emoji}/u.test(w))
|
]
|
||||||
logDebug('Matching tokens:', foundWords)
|
logDebug('Matching tokens:', foundWords)
|
||||||
|
|
||||||
logDebug('Getting matches locations...')
|
logDebug('Getting matches locations...')
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import {
|
|||||||
getChsSegmenter,
|
getChsSegmenter,
|
||||||
} from 'src/globals'
|
} from 'src/globals'
|
||||||
import { logDebug, splitCamelCase, splitHyphens } from 'src/tools/utils'
|
import { logDebug, splitCamelCase, splitHyphens } from 'src/tools/utils'
|
||||||
|
const markdownLinkExtractor = require('markdown-link-extractor')
|
||||||
|
|
||||||
function tokenizeWords(text: string): string[] {
|
function tokenizeWords(text: string): string[] {
|
||||||
return text.split(BRACKETS_AND_SPACE)
|
return text.split(BRACKETS_AND_SPACE)
|
||||||
@@ -23,6 +24,7 @@ function tokenizeTokens(text: string): string[] {
|
|||||||
*/
|
*/
|
||||||
export function tokenizeForIndexing(text: string): string[] {
|
export function tokenizeForIndexing(text: string): string[] {
|
||||||
const words = tokenizeWords(text)
|
const words = tokenizeWords(text)
|
||||||
|
const urls: string[] = markdownLinkExtractor(text)
|
||||||
|
|
||||||
let tokens = tokenizeTokens(text)
|
let tokens = tokenizeTokens(text)
|
||||||
|
|
||||||
@@ -35,6 +37,11 @@ export function tokenizeForIndexing(text: string): string[] {
|
|||||||
// Add whole words (aka "not tokens")
|
// Add whole words (aka "not tokens")
|
||||||
tokens = [...tokens, ...words]
|
tokens = [...tokens, ...words]
|
||||||
|
|
||||||
|
// Add urls
|
||||||
|
if (urls.length) {
|
||||||
|
tokens = [...tokens, ...urls]
|
||||||
|
}
|
||||||
|
|
||||||
const chsSegmenter = getChsSegmenter()
|
const chsSegmenter = getChsSegmenter()
|
||||||
if (chsSegmenter) {
|
if (chsSegmenter) {
|
||||||
const chs = tokens.flatMap(word =>
|
const chs = tokens.flatMap(word =>
|
||||||
@@ -56,7 +63,12 @@ export function tokenizeForIndexing(text: string): string[] {
|
|||||||
* @returns
|
* @returns
|
||||||
*/
|
*/
|
||||||
export function tokenizeForSearch(text: string): QueryCombination {
|
export function tokenizeForSearch(text: string): QueryCombination {
|
||||||
const tokens = tokenizeTokens(text)
|
|
||||||
|
// Extract urls and remove them from the query
|
||||||
|
const urls: string[] = markdownLinkExtractor(text)
|
||||||
|
text = urls.reduce((acc, url) => acc.replace(url, ''), text)
|
||||||
|
|
||||||
|
const tokens = [...tokenizeTokens(text), ...urls].filter(Boolean)
|
||||||
|
|
||||||
let chs: string[] = []
|
let chs: string[] = []
|
||||||
const chsSegmenter = getChsSegmenter()
|
const chsSegmenter = getChsSegmenter()
|
||||||
@@ -70,7 +82,7 @@ export function tokenizeForSearch(text: string): QueryCombination {
|
|||||||
combineWith: 'OR',
|
combineWith: 'OR',
|
||||||
queries: [
|
queries: [
|
||||||
{ combineWith: 'AND', queries: tokens },
|
{ combineWith: 'AND', queries: tokens },
|
||||||
{ combineWith: 'AND', queries: tokenizeWords(text) },
|
{ combineWith: 'AND', queries: tokenizeWords(text).filter(Boolean) },
|
||||||
{ combineWith: 'AND', queries: tokens.flatMap(splitHyphens) },
|
{ combineWith: 'AND', queries: tokens.flatMap(splitHyphens) },
|
||||||
{ combineWith: 'AND', queries: tokens.flatMap(splitCamelCase) },
|
{ combineWith: 'AND', queries: tokens.flatMap(splitCamelCase) },
|
||||||
{ combineWith: 'AND', queries: chs },
|
{ combineWith: 'AND', queries: chs },
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ function mapResults(results: ResultNote[]): ResultNoteApi[] {
|
|||||||
offset: match.offset,
|
offset: match.offset,
|
||||||
}
|
}
|
||||||
}),
|
}),
|
||||||
excerpt: excerpt.content,
|
excerpt: excerpt,
|
||||||
}
|
}
|
||||||
|
|
||||||
return res
|
return res
|
||||||
|
|||||||
@@ -6,15 +6,12 @@ import {
|
|||||||
regexStripQuotes,
|
regexStripQuotes,
|
||||||
excerptAfter,
|
excerptAfter,
|
||||||
excerptBefore,
|
excerptBefore,
|
||||||
SEPARATORS,
|
|
||||||
} from 'src/globals'
|
} from 'src/globals'
|
||||||
import { settings } from 'src/settings'
|
import { settings } from 'src/settings'
|
||||||
import { removeDiacritics, warnDebug } from './utils'
|
import { removeDiacritics, warnDebug } from './utils'
|
||||||
import type { Query } from 'src/search/query'
|
import type { Query } from 'src/search/query'
|
||||||
import { Notice } from 'obsidian'
|
import { Notice } from 'obsidian'
|
||||||
import { escapeRegExp } from 'lodash-es'
|
import { escapeRegExp } from 'lodash-es'
|
||||||
import { tokenizeForSearch } from 'src/search/tokenizer'
|
|
||||||
import type { QueryCombination } from 'minisearch'
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Wraps the matches in the text with a <span> element and a highlight class
|
* Wraps the matches in the text with a <span> element and a highlight class
|
||||||
@@ -115,14 +112,19 @@ export function stringsToRegex(strings: string[]): RegExp {
|
|||||||
return new RegExp(`${joined}`, 'gui')
|
return new RegExp(`${joined}`, 'gui')
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an array of matches in the text, using the provided regex
|
||||||
|
* @param text
|
||||||
|
* @param reg
|
||||||
|
* @param query
|
||||||
|
*/
|
||||||
export function getMatches(
|
export function getMatches(
|
||||||
text: string,
|
text: string,
|
||||||
reg: RegExp,
|
reg: RegExp,
|
||||||
query?: Query
|
query?: Query
|
||||||
): SearchMatch[] {
|
): SearchMatch[] {
|
||||||
const separatorRegExp = new RegExp(SEPARATORS, 'gu')
|
|
||||||
const originalText = text
|
const originalText = text
|
||||||
text = text.toLowerCase().replace(separatorRegExp, ' ')
|
// text = text.toLowerCase().replace(new RegExp(SEPARATORS, 'gu'), ' ')
|
||||||
if (settings.ignoreDiacritics) {
|
if (settings.ignoreDiacritics) {
|
||||||
text = removeDiacritics(text)
|
text = removeDiacritics(text)
|
||||||
}
|
}
|
||||||
@@ -153,21 +155,16 @@ export function getMatches(
|
|||||||
) {
|
) {
|
||||||
const best = text.indexOf(query.getBestStringForExcerpt())
|
const best = text.indexOf(query.getBestStringForExcerpt())
|
||||||
if (best > -1 && matches.find(m => m.offset === best)) {
|
if (best > -1 && matches.find(m => m.offset === best)) {
|
||||||
matches = matches.filter(m => m.offset !== best)
|
|
||||||
matches.unshift({
|
matches.unshift({
|
||||||
offset: best,
|
offset: best,
|
||||||
match: query.getBestStringForExcerpt(),
|
match: query.getBestStringForExcerpt(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return matches
|
return matches
|
||||||
}
|
}
|
||||||
|
|
||||||
export function makeExcerpt(
|
export function makeExcerpt(content: string, offset: number): string {
|
||||||
content: string,
|
|
||||||
offset: number
|
|
||||||
): { content: string; offset: number } {
|
|
||||||
try {
|
try {
|
||||||
const pos = offset ?? -1
|
const pos = offset ?? -1
|
||||||
const from = Math.max(0, pos - excerptBefore)
|
const from = Math.max(0, pos - excerptBefore)
|
||||||
@@ -201,14 +198,14 @@ export function makeExcerpt(
|
|||||||
content = content.trim().replaceAll('\n', '<br>')
|
content = content.trim().replaceAll('\n', '<br>')
|
||||||
}
|
}
|
||||||
|
|
||||||
return { content: content, offset: pos }
|
return content
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
new Notice(
|
new Notice(
|
||||||
'Omnisearch - Error while creating excerpt, see developer console'
|
'Omnisearch - Error while creating excerpt, see developer console'
|
||||||
)
|
)
|
||||||
console.error(`Omnisearch - Error while creating excerpt`)
|
console.error(`Omnisearch - Error while creating excerpt`)
|
||||||
console.error(e)
|
console.error(e)
|
||||||
return { content: '', offset: -1 }
|
return ''
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user