Refactored excerpts and highlighting
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
import type { ResultNote } from '../globals'
|
||||
import { Query } from '../search/query'
|
||||
import { searchEngine } from '../search/omnisearch'
|
||||
import { makeExcerpt } from './utils'
|
||||
import { makeExcerpt } from './text-processing'
|
||||
import { refreshIndex } from '../notes-index'
|
||||
|
||||
type ResultNoteApi = {
|
||||
@@ -31,7 +31,7 @@ function mapResults(results: ResultNote[]): ResultNoteApi[] {
|
||||
|
||||
const excerpt = makeExcerpt(content, matches[0]?.offset ?? -1)
|
||||
|
||||
return {
|
||||
const res: ResultNoteApi = {
|
||||
score,
|
||||
path,
|
||||
basename,
|
||||
@@ -42,8 +42,10 @@ function mapResults(results: ResultNote[]): ResultNoteApi[] {
|
||||
offset: match.offset,
|
||||
}
|
||||
}),
|
||||
excerpt,
|
||||
excerpt: excerpt.content,
|
||||
}
|
||||
|
||||
return res
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { type CachedMetadata, MarkdownView, TFile } from 'obsidian'
|
||||
import { stringsToRegex } from './utils'
|
||||
import type { ResultNote } from '../globals'
|
||||
import { stringsToRegex } from './text-processing'
|
||||
|
||||
export async function openNote(
|
||||
item: ResultNote,
|
||||
|
||||
204
src/tools/text-processing.ts
Normal file
204
src/tools/text-processing.ts
Normal file
@@ -0,0 +1,204 @@
|
||||
import {
|
||||
highlightClass,
|
||||
type SearchMatch,
|
||||
regexLineSplit,
|
||||
regexYaml,
|
||||
getChsSegmenter,
|
||||
SPACE_OR_PUNCTUATION_UNIQUE,
|
||||
regexStripQuotes,
|
||||
excerptAfter,
|
||||
excerptBefore,
|
||||
} from 'src/globals'
|
||||
import { settings } from 'src/settings'
|
||||
import { escapeRegex, warnDebug } from './utils'
|
||||
import type { Query } from 'src/search/query'
|
||||
import { Notice } from 'obsidian'
|
||||
|
||||
export function highlighterGroups(_substring: string, ...args: any[]) {
|
||||
// args[0] is the single char preceding args[1], which is the word we want to highlight
|
||||
if (!!args[1].trim())
|
||||
return `<span>${args[0]}</span><span class="${highlightClass}">${args[1]}</span>`
|
||||
return '<no content>'
|
||||
}
|
||||
|
||||
export function highlightText(text: string, matches: SearchMatch[]): string {
|
||||
matches.forEach(matchInfo => {
|
||||
const matchRegex = new RegExp(`\\b${matchInfo.match}\\b`, 'giu')
|
||||
const matchOffsets = []
|
||||
|
||||
let match
|
||||
while ((match = matchRegex.exec(text)) !== null) {
|
||||
matchOffsets.push({ index: match.index, text: match[0] })
|
||||
}
|
||||
|
||||
if (!matchOffsets.length) {
|
||||
return text
|
||||
}
|
||||
|
||||
const closestMatch = matchOffsets.reduce((prev, curr) => {
|
||||
return Math.abs(curr.index - matchInfo.offset) <
|
||||
Math.abs(prev.index - matchInfo.offset)
|
||||
? curr
|
||||
: prev
|
||||
})
|
||||
|
||||
if (matchOffsets.includes(closestMatch)) {
|
||||
const originalMatch = closestMatch.text
|
||||
text =
|
||||
text.substring(0, closestMatch.index) +
|
||||
`<span class="${highlightClass}">` +
|
||||
originalMatch +
|
||||
'</span>' +
|
||||
text.substring(closestMatch.index + originalMatch.length)
|
||||
}
|
||||
})
|
||||
|
||||
return text
|
||||
}
|
||||
|
||||
export function escapeHTML(html: string): string {
|
||||
return html
|
||||
.replaceAll('&', '&')
|
||||
.replaceAll('<', '<')
|
||||
.replaceAll('>', '>')
|
||||
.replaceAll('"', '"')
|
||||
.replaceAll("'", ''')
|
||||
}
|
||||
|
||||
export function splitLines(text: string): string[] {
|
||||
return text.split(regexLineSplit).filter(l => !!l && l.length > 2)
|
||||
}
|
||||
|
||||
export function removeFrontMatter(text: string): string {
|
||||
// Regex to recognize YAML Front Matter (at beginning of file, 3 hyphens, than any charecter, including newlines, then 3 hyphens).
|
||||
return text.replace(regexYaml, '')
|
||||
}
|
||||
|
||||
/**
|
||||
* Used to find excerpts in a note body, or select which words to highlight
|
||||
*/
|
||||
export function stringsToRegex(strings: string[]): RegExp {
|
||||
if (!strings.length) return /^$/g
|
||||
|
||||
// sort strings by decreasing length, so that longer strings are matched first
|
||||
strings.sort((a, b) => b.length - a.length)
|
||||
|
||||
const joined =
|
||||
'(' +
|
||||
// Default word split is not applied if the user uses the cm-chs-patch plugin
|
||||
(getChsSegmenter()
|
||||
? ''
|
||||
: // Split on start of line, spaces, punctuation, or capital letters (for camelCase)
|
||||
// We also add the hyphen to the list of characters that can split words
|
||||
settings.splitCamelCase
|
||||
? `^|${SPACE_OR_PUNCTUATION_UNIQUE.source}|\-|[A-Z]`
|
||||
: `^|${SPACE_OR_PUNCTUATION_UNIQUE.source}|\-`) +
|
||||
')' +
|
||||
`(${strings.map(s => escapeRegex(s)).join('|')})`
|
||||
|
||||
const reg = new RegExp(`${joined}`, 'gu')
|
||||
return reg
|
||||
}
|
||||
|
||||
export function getMatches(
|
||||
text: string,
|
||||
reg: RegExp,
|
||||
query?: Query
|
||||
): SearchMatch[] {
|
||||
text = text.toLowerCase()
|
||||
const startTime = new Date().getTime()
|
||||
let match: RegExpExecArray | null = null
|
||||
let matches: SearchMatch[] = []
|
||||
let count = 0
|
||||
while ((match = reg.exec(text)) !== null) {
|
||||
// Avoid infinite loops, stop looking after 100 matches or if we're taking too much time
|
||||
if (++count >= 100 || new Date().getTime() - startTime > 50) {
|
||||
warnDebug('Stopped getMatches at', count, 'results')
|
||||
break
|
||||
}
|
||||
const m = match[2]
|
||||
if (m && match.index >= 0) {
|
||||
matches.push({ match: m, offset: match.index + 1 })
|
||||
}
|
||||
}
|
||||
|
||||
// If the query can be found "as is" in the text, put this match first
|
||||
if (query) {
|
||||
const best = text.indexOf(query.segmentsToStr())
|
||||
if (best > -1 && matches.find(m => m.offset === best)) {
|
||||
matches = matches.filter(m => m.offset !== best)
|
||||
matches.unshift({
|
||||
offset: best,
|
||||
match: query.segmentsToStr(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return matches
|
||||
}
|
||||
|
||||
export function makeExcerpt(
|
||||
content: string,
|
||||
offset: number
|
||||
): { content: string; offset: number } {
|
||||
try {
|
||||
const pos = offset ?? -1
|
||||
const from = Math.max(0, pos - excerptBefore)
|
||||
const to = Math.min(content.length, pos + excerptAfter)
|
||||
if (pos > -1) {
|
||||
content =
|
||||
(from > 0 ? '…' : '') +
|
||||
content.slice(from, to).trim() +
|
||||
(to < content.length - 1 ? '…' : '')
|
||||
} else {
|
||||
content = content.slice(0, excerptAfter)
|
||||
}
|
||||
if (settings.renderLineReturnInExcerpts) {
|
||||
const lineReturn = new RegExp(/(?:\r\n|\r|\n)/g)
|
||||
// Remove multiple line returns
|
||||
content = content
|
||||
.split(lineReturn)
|
||||
.filter(l => l)
|
||||
.join('\n')
|
||||
|
||||
const last = content.lastIndexOf('\n', pos - from)
|
||||
|
||||
if (last > 0) {
|
||||
content = content.slice(last)
|
||||
}
|
||||
}
|
||||
|
||||
content = escapeHTML(content)
|
||||
|
||||
if (settings.renderLineReturnInExcerpts) {
|
||||
content = content.trim().replaceAll('\n', '<br>')
|
||||
}
|
||||
|
||||
return { content: content, offset: pos }
|
||||
} catch (e) {
|
||||
new Notice(
|
||||
'Omnisearch - Error while creating excerpt, see developer console'
|
||||
)
|
||||
console.error(`Omnisearch - Error while creating excerpt`)
|
||||
console.error(e)
|
||||
return { content: '', offset: -1 }
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* splits a string in words or "expressions in quotes"
|
||||
* @param str
|
||||
* @returns
|
||||
*/
|
||||
export function splitQuotes(str: string): string[] {
|
||||
return (
|
||||
str
|
||||
.match(/"(.*?)"/g)
|
||||
?.map(s => s.replace(/"/g, ''))
|
||||
.filter(q => !!q) ?? []
|
||||
)
|
||||
}
|
||||
|
||||
export function stripSurroundingQuotes(str: string): string {
|
||||
return str.replace(regexStripQuotes, '')
|
||||
}
|
||||
@@ -1,52 +1,17 @@
|
||||
import {
|
||||
type CachedMetadata,
|
||||
getAllTags,
|
||||
Notice,
|
||||
parseFrontMatterAliases,
|
||||
Platform,
|
||||
} from 'obsidian'
|
||||
import {
|
||||
excerptAfter,
|
||||
excerptBefore,
|
||||
getChsSegmenter,
|
||||
getTextExtractor,
|
||||
highlightClass,
|
||||
isSearchMatch,
|
||||
regexLineSplit,
|
||||
regexStripQuotes,
|
||||
regexYaml,
|
||||
SPACE_OR_PUNCTUATION,
|
||||
type SearchMatch,
|
||||
} from '../globals'
|
||||
import { getTextExtractor, isSearchMatch, type SearchMatch } from '../globals'
|
||||
import { canIndexUnsupportedFiles, settings } from '../settings'
|
||||
import { type BinaryLike, createHash } from 'crypto'
|
||||
import { md5 } from 'pure-md5'
|
||||
|
||||
export function highlighter(str: string): string {
|
||||
return `<span class="${highlightClass}">${str}</span>`
|
||||
}
|
||||
|
||||
export function highlighterGroups(substring: string, ...args: any[]): string {
|
||||
return `<span class="${highlightClass}">${substring}</span>`
|
||||
}
|
||||
|
||||
export function escapeHTML(html: string): string {
|
||||
return html
|
||||
.replaceAll('&', '&')
|
||||
.replaceAll('<', '<')
|
||||
.replaceAll('>', '>')
|
||||
.replaceAll('"', '"')
|
||||
.replaceAll("'", ''')
|
||||
}
|
||||
|
||||
export function splitLines(text: string): string[] {
|
||||
return text.split(regexLineSplit).filter(l => !!l && l.length > 2)
|
||||
}
|
||||
|
||||
export function removeFrontMatter(text: string): string {
|
||||
// Regex to recognize YAML Front Matter (at beginning of file, 3 hyphens, than any charecter, including newlines, then 3 hyphens).
|
||||
return text.replace(regexYaml, '')
|
||||
}
|
||||
// export function highlighter(str: string): string {
|
||||
// return `<span class="${highlightClass}">${str}</span>`
|
||||
// }
|
||||
|
||||
export function pathWithoutFilename(path: string): string {
|
||||
const split = path.split('/')
|
||||
@@ -79,20 +44,6 @@ export function getAllIndices(text: string, regex: RegExp): SearchMatch[] {
|
||||
.filter(isSearchMatch)
|
||||
}
|
||||
|
||||
/**
|
||||
* Used to find excerpts in a note body, or select which words to highlight
|
||||
*/
|
||||
export function stringsToRegex(strings: string[]): RegExp {
|
||||
if (!strings.length) return /^$/g
|
||||
|
||||
// sort strings by decreasing length, so that longer strings are matched first
|
||||
strings.sort((a, b) => b.length - a.length)
|
||||
|
||||
const joined = `(${strings.map(s => escapeRegex(s)).join('|')})`
|
||||
|
||||
return new RegExp(`${joined}`, 'giu')
|
||||
}
|
||||
|
||||
export function extractHeadingsFromCache(
|
||||
cache: CachedMetadata,
|
||||
level: number
|
||||
@@ -106,69 +57,6 @@ export function loopIndex(index: number, nbItems: number): number {
|
||||
return (index + nbItems) % nbItems
|
||||
}
|
||||
|
||||
export function makeExcerpt(content: string, offset: number): string {
|
||||
try {
|
||||
const pos = offset ?? -1
|
||||
const from = Math.max(0, pos - excerptBefore)
|
||||
const to = Math.min(content.length, pos + excerptAfter)
|
||||
if (pos > -1) {
|
||||
content =
|
||||
(from > 0 ? '…' : '') +
|
||||
content.slice(from, to).trim() +
|
||||
(to < content.length - 1 ? '…' : '')
|
||||
} else {
|
||||
content = content.slice(0, excerptAfter)
|
||||
}
|
||||
if (settings.renderLineReturnInExcerpts) {
|
||||
const lineReturn = new RegExp(/(?:\r\n|\r|\n)/g)
|
||||
// Remove multiple line returns
|
||||
content = content
|
||||
.split(lineReturn)
|
||||
.filter(l => l)
|
||||
.join('\n')
|
||||
|
||||
const last = content.lastIndexOf('\n', pos - from)
|
||||
|
||||
if (last > 0) {
|
||||
content = content.slice(last)
|
||||
}
|
||||
}
|
||||
|
||||
content = escapeHTML(content)
|
||||
|
||||
if (settings.renderLineReturnInExcerpts) {
|
||||
content = content.trim().replaceAll('\n', '<br>')
|
||||
}
|
||||
|
||||
return content
|
||||
} catch (e) {
|
||||
new Notice(
|
||||
'Omnisearch - Error while creating excerpt, see developer console'
|
||||
)
|
||||
console.error(`Omnisearch - Error while creating excerpt`)
|
||||
console.error(e)
|
||||
return ''
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* splits a string in words or "expressions in quotes"
|
||||
* @param str
|
||||
* @returns
|
||||
*/
|
||||
export function splitQuotes(str: string): string[] {
|
||||
return (
|
||||
str
|
||||
.match(/"(.*?)"/g)
|
||||
?.map(s => s.replace(/"/g, ''))
|
||||
.filter(q => !!q) ?? []
|
||||
)
|
||||
}
|
||||
|
||||
export function stripSurroundingQuotes(str: string): string {
|
||||
return str.replace(regexStripQuotes, '')
|
||||
}
|
||||
|
||||
function mapAsync<T, U>(
|
||||
array: T[],
|
||||
callbackfn: (value: T, index: number, array: T[]) => Promise<U>
|
||||
@@ -263,7 +151,7 @@ export function isContentIndexable(path: string): boolean {
|
||||
|
||||
export function isFilenameIndexable(path: string): boolean {
|
||||
return (
|
||||
(canIndexUnsupportedFiles()) ||
|
||||
canIndexUnsupportedFiles() ||
|
||||
isFilePlaintext(path) ||
|
||||
isFileCanvas(path) ||
|
||||
isFileFromDataloomPlugin(path)
|
||||
@@ -329,13 +217,13 @@ export function chunkArray<T>(arr: T[], len: number): T[][] {
|
||||
export function splitCamelCase(text: string): string[] {
|
||||
// if no camel case found, do nothing
|
||||
if (!/[a-z][A-Z]/.test(text)) {
|
||||
return [];
|
||||
return []
|
||||
}
|
||||
const splittedText = text
|
||||
.replace(/([a-z](?=[A-Z]))/g, '$1 ')
|
||||
.split(' ')
|
||||
.filter(t => t);
|
||||
return splittedText;
|
||||
.filter(t => t)
|
||||
return splittedText
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user