Highlighting rework, should help with #304

This commit is contained in:
Simon Cambier
2024-03-26 21:32:03 +01:00
parent f77ff3ec2f
commit 484e961a7e
6 changed files with 32 additions and 31 deletions

View File

@@ -1,8 +1,5 @@
<script lang="ts"> <script lang="ts">
import { import { makeExcerpt, highlightText } from 'src/tools/text-processing'
makeExcerpt,
highlightText,
} from 'src/tools/text-processing'
import type { ResultNote } from '../globals' import type { ResultNote } from '../globals'
import ResultItemContainer from './ResultItemContainer.svelte' import ResultItemContainer from './ResultItemContainer.svelte'
import { cloneDeep } from 'lodash-es' import { cloneDeep } from 'lodash-es'
@@ -13,10 +10,7 @@
export let selected = false export let selected = false
$: cleanedContent = makeExcerpt(note?.content ?? '', offset) $: cleanedContent = makeExcerpt(note?.content ?? '', offset)
$: matchesExcerpt = cloneDeep(note.matches).map(m => { $: matchesExcerpt = cloneDeep(note.matches)
m.offset = m.offset - cleanedContent.offset
return m
})
</script> </script>
<ResultItemContainer <ResultItemContainer
@@ -26,6 +20,6 @@
on:click on:click
on:auxclick> on:auxclick>
<div class="omnisearch-result__body"> <div class="omnisearch-result__body">
{@html highlightText(cleanedContent.content, matchesExcerpt)} {@html highlightText(cleanedContent, matchesExcerpt)}
</div> </div>
</ResultItemContainer> </ResultItemContainer>

View File

@@ -40,10 +40,6 @@
$: reg = stringsToRegex(note.foundWords) $: reg = stringsToRegex(note.foundWords)
$: matchesTitle = getMatches(title, reg) $: matchesTitle = getMatches(title, reg)
$: matchesNotePath = getMatches(notePath, reg) $: matchesNotePath = getMatches(notePath, reg)
$: matchesExcerpt = cloneDeep(note.matches).map(m => {
m.offset = m.offset - cleanedContent.offset
return m
})
$: cleanedContent = makeExcerpt(note.content, note.matches[0]?.offset ?? -1) $: cleanedContent = makeExcerpt(note.content, note.matches[0]?.offset ?? -1)
$: glyph = false //cacheManager.getLiveDocument(note.path)?.doesNotExist $: glyph = false //cacheManager.getLiveDocument(note.path)?.doesNotExist
$: { $: {
@@ -102,7 +98,7 @@
<div style="display: flex; flex-direction: row;"> <div style="display: flex; flex-direction: row;">
{#if $showExcerpt} {#if $showExcerpt}
<div class="omnisearch-result__body"> <div class="omnisearch-result__body">
{@html highlightText(cleanedContent.content, matchesExcerpt)} {@html highlightText(cleanedContent, note.matches)}
</div> </div>
{/if} {/if}

View File

@@ -190,6 +190,8 @@ export class Omnisearch {
headings3: settings.weightH3, headings3: settings.weightH3,
unmarkedTags: settings.weightUnmarkedTags, unmarkedTags: settings.weightUnmarkedTags,
}, },
// The query is already tokenized, don't tokenize again
tokenize: text => [text],
}) })
logDebug('Found', results.length, 'results') logDebug('Found', results.length, 'results')
@@ -404,7 +406,7 @@ export class Omnisearch {
// Tags, starting with # // Tags, starting with #
...query.getTags(), ...query.getTags(),
].filter(w => w.length > 1 || /\p{Emoji}/u.test(w)) ]
logDebug('Matching tokens:', foundWords) logDebug('Matching tokens:', foundWords)
logDebug('Getting matches locations...') logDebug('Getting matches locations...')

View File

@@ -6,6 +6,7 @@ import {
getChsSegmenter, getChsSegmenter,
} from 'src/globals' } from 'src/globals'
import { logDebug, splitCamelCase, splitHyphens } from 'src/tools/utils' import { logDebug, splitCamelCase, splitHyphens } from 'src/tools/utils'
const markdownLinkExtractor = require('markdown-link-extractor')
function tokenizeWords(text: string): string[] { function tokenizeWords(text: string): string[] {
return text.split(BRACKETS_AND_SPACE) return text.split(BRACKETS_AND_SPACE)
@@ -23,6 +24,7 @@ function tokenizeTokens(text: string): string[] {
*/ */
export function tokenizeForIndexing(text: string): string[] { export function tokenizeForIndexing(text: string): string[] {
const words = tokenizeWords(text) const words = tokenizeWords(text)
const urls: string[] = markdownLinkExtractor(text)
let tokens = tokenizeTokens(text) let tokens = tokenizeTokens(text)
@@ -35,6 +37,11 @@ export function tokenizeForIndexing(text: string): string[] {
// Add whole words (aka "not tokens") // Add whole words (aka "not tokens")
tokens = [...tokens, ...words] tokens = [...tokens, ...words]
// Add urls
if (urls.length) {
tokens = [...tokens, ...urls]
}
const chsSegmenter = getChsSegmenter() const chsSegmenter = getChsSegmenter()
if (chsSegmenter) { if (chsSegmenter) {
const chs = tokens.flatMap(word => const chs = tokens.flatMap(word =>
@@ -56,7 +63,12 @@ export function tokenizeForIndexing(text: string): string[] {
* @returns * @returns
*/ */
export function tokenizeForSearch(text: string): QueryCombination { export function tokenizeForSearch(text: string): QueryCombination {
const tokens = tokenizeTokens(text)
// Extract urls and remove them from the query
const urls: string[] = markdownLinkExtractor(text)
text = urls.reduce((acc, url) => acc.replace(url, ''), text)
const tokens = [...tokenizeTokens(text), ...urls].filter(Boolean)
let chs: string[] = [] let chs: string[] = []
const chsSegmenter = getChsSegmenter() const chsSegmenter = getChsSegmenter()
@@ -70,7 +82,7 @@ export function tokenizeForSearch(text: string): QueryCombination {
combineWith: 'OR', combineWith: 'OR',
queries: [ queries: [
{ combineWith: 'AND', queries: tokens }, { combineWith: 'AND', queries: tokens },
{ combineWith: 'AND', queries: tokenizeWords(text) }, { combineWith: 'AND', queries: tokenizeWords(text).filter(Boolean) },
{ combineWith: 'AND', queries: tokens.flatMap(splitHyphens) }, { combineWith: 'AND', queries: tokens.flatMap(splitHyphens) },
{ combineWith: 'AND', queries: tokens.flatMap(splitCamelCase) }, { combineWith: 'AND', queries: tokens.flatMap(splitCamelCase) },
{ combineWith: 'AND', queries: chs }, { combineWith: 'AND', queries: chs },

View File

@@ -44,7 +44,7 @@ function mapResults(results: ResultNote[]): ResultNoteApi[] {
offset: match.offset, offset: match.offset,
} }
}), }),
excerpt: excerpt.content, excerpt: excerpt,
} }
return res return res

View File

@@ -6,15 +6,12 @@ import {
regexStripQuotes, regexStripQuotes,
excerptAfter, excerptAfter,
excerptBefore, excerptBefore,
SEPARATORS,
} from 'src/globals' } from 'src/globals'
import { settings } from 'src/settings' import { settings } from 'src/settings'
import { removeDiacritics, warnDebug } from './utils' import { removeDiacritics, warnDebug } from './utils'
import type { Query } from 'src/search/query' import type { Query } from 'src/search/query'
import { Notice } from 'obsidian' import { Notice } from 'obsidian'
import { escapeRegExp } from 'lodash-es' import { escapeRegExp } from 'lodash-es'
import { tokenizeForSearch } from 'src/search/tokenizer'
import type { QueryCombination } from 'minisearch'
/** /**
* Wraps the matches in the text with a <span> element and a highlight class * Wraps the matches in the text with a <span> element and a highlight class
@@ -115,14 +112,19 @@ export function stringsToRegex(strings: string[]): RegExp {
return new RegExp(`${joined}`, 'gui') return new RegExp(`${joined}`, 'gui')
} }
/**
* Returns an array of matches in the text, using the provided regex
* @param text
* @param reg
* @param query
*/
export function getMatches( export function getMatches(
text: string, text: string,
reg: RegExp, reg: RegExp,
query?: Query query?: Query
): SearchMatch[] { ): SearchMatch[] {
const separatorRegExp = new RegExp(SEPARATORS, 'gu')
const originalText = text const originalText = text
text = text.toLowerCase().replace(separatorRegExp, ' ') // text = text.toLowerCase().replace(new RegExp(SEPARATORS, 'gu'), ' ')
if (settings.ignoreDiacritics) { if (settings.ignoreDiacritics) {
text = removeDiacritics(text) text = removeDiacritics(text)
} }
@@ -153,21 +155,16 @@ export function getMatches(
) { ) {
const best = text.indexOf(query.getBestStringForExcerpt()) const best = text.indexOf(query.getBestStringForExcerpt())
if (best > -1 && matches.find(m => m.offset === best)) { if (best > -1 && matches.find(m => m.offset === best)) {
matches = matches.filter(m => m.offset !== best)
matches.unshift({ matches.unshift({
offset: best, offset: best,
match: query.getBestStringForExcerpt(), match: query.getBestStringForExcerpt(),
}) })
} }
} }
return matches return matches
} }
export function makeExcerpt( export function makeExcerpt(content: string, offset: number): string {
content: string,
offset: number
): { content: string; offset: number } {
try { try {
const pos = offset ?? -1 const pos = offset ?? -1
const from = Math.max(0, pos - excerptBefore) const from = Math.max(0, pos - excerptBefore)
@@ -201,14 +198,14 @@ export function makeExcerpt(
content = content.trim().replaceAll('\n', '<br>') content = content.trim().replaceAll('\n', '<br>')
} }
return { content: content, offset: pos } return content
} catch (e) { } catch (e) {
new Notice( new Notice(
'Omnisearch - Error while creating excerpt, see developer console' 'Omnisearch - Error while creating excerpt, see developer console'
) )
console.error(`Omnisearch - Error while creating excerpt`) console.error(`Omnisearch - Error while creating excerpt`)
console.error(e) console.error(e)
return { content: '', offset: -1 } return ''
} }
} }