327 lines
8.5 KiB
TypeScript
327 lines
8.5 KiB
TypeScript
import {
|
|
type CachedMetadata,
|
|
getAllTags,
|
|
Notice,
|
|
parseFrontMatterAliases,
|
|
Platform,
|
|
} from 'obsidian'
|
|
import {
|
|
excerptAfter,
|
|
excerptBefore,
|
|
getChsSegmenter,
|
|
getTextExtractor,
|
|
highlightClass,
|
|
isSearchMatch,
|
|
regexLineSplit,
|
|
regexStripQuotes,
|
|
regexYaml,
|
|
SPACE_OR_PUNCTUATION,
|
|
type SearchMatch,
|
|
} from '../globals'
|
|
import { settings } from '../settings'
|
|
import { type BinaryLike, createHash } from 'crypto'
|
|
import { md5 } from 'pure-md5'
|
|
|
|
export function highlighter(str: string): string {
|
|
return `<span class="${highlightClass}">${str}</span>`
|
|
}
|
|
|
|
export function highlighterGroups(...args: any[]) {
|
|
if (
|
|
args[1] !== null &&
|
|
args[1] !== undefined &&
|
|
args[2] !== null &&
|
|
args[2] !== undefined
|
|
)
|
|
return `${args[1]}<span class="${highlightClass}">${args[2]}</span>`
|
|
return '<no content>'
|
|
}
|
|
|
|
export function escapeHTML(html: string): string {
|
|
return html
|
|
.replaceAll('&', '&')
|
|
.replaceAll('<', '<')
|
|
.replaceAll('>', '>')
|
|
.replaceAll('"', '"')
|
|
.replaceAll("'", ''')
|
|
}
|
|
|
|
export function splitLines(text: string): string[] {
|
|
return text.split(regexLineSplit).filter(l => !!l && l.length > 2)
|
|
}
|
|
|
|
export function removeFrontMatter(text: string): string {
|
|
// Regex to recognize YAML Front Matter (at beginning of file, 3 hyphens, than any charecter, including newlines, then 3 hyphens).
|
|
return text.replace(regexYaml, '')
|
|
}
|
|
|
|
export function pathWithoutFilename(path: string): string {
|
|
const split = path.split('/')
|
|
split.pop()
|
|
return split.join('/')
|
|
}
|
|
|
|
export function wait(ms: number): Promise<void> {
|
|
return new Promise(resolve => {
|
|
setTimeout(resolve, ms)
|
|
})
|
|
}
|
|
|
|
// https://stackoverflow.com/a/3561711
|
|
// but we enclose special chars in brackets to avoid them being interpreted as regex
|
|
export function escapeRegex(str: string): string {
|
|
return str.replace(/[-/\\^$*+?.()|[\]{}]/g, '[$&]')
|
|
}
|
|
|
|
/**
|
|
* Returns the positions of all occurences of `val` inside of `text`
|
|
* https://stackoverflow.com/a/58828841
|
|
* @param text
|
|
* @param regex
|
|
* @returns
|
|
*/
|
|
export function getAllIndices(text: string, regex: RegExp): SearchMatch[] {
|
|
return [...text.matchAll(regex)]
|
|
.map(o => ({ match: o[0], offset: o.index }))
|
|
.filter(isSearchMatch)
|
|
}
|
|
|
|
/**
|
|
* Used to find excerpts in a note body, or select which words to highlight
|
|
*/
|
|
export function stringsToRegex(strings: string[]): RegExp {
|
|
if (!strings.length) return /^$/g
|
|
const joined =
|
|
'(' +
|
|
// Default word split is not applied if the user uses the cm-chs-patch plugin
|
|
(getChsSegmenter()
|
|
? ''
|
|
: // Split on start of line, spaces, punctuation, or capital letters (for camelCase)
|
|
settings.splitCamelCase
|
|
? `^|${SPACE_OR_PUNCTUATION.source}|[A-Z]`
|
|
: `^|${SPACE_OR_PUNCTUATION.source}`) +
|
|
')' +
|
|
`(${strings.map(s => escapeRegex(s)).join('|')})`
|
|
|
|
const reg = new RegExp(`${joined}`, 'giu')
|
|
return reg
|
|
}
|
|
|
|
export function extractHeadingsFromCache(
|
|
cache: CachedMetadata,
|
|
level: number
|
|
): string[] {
|
|
return (
|
|
cache.headings?.filter(h => h.level === level).map(h => h.heading) ?? []
|
|
)
|
|
}
|
|
|
|
export function loopIndex(index: number, nbItems: number): number {
|
|
return (index + nbItems) % nbItems
|
|
}
|
|
|
|
export function makeExcerpt(content: string, offset: number): string {
|
|
try {
|
|
const pos = offset ?? -1
|
|
const from = Math.max(0, pos - excerptBefore)
|
|
const to = Math.min(content.length, pos + excerptAfter)
|
|
if (pos > -1) {
|
|
content =
|
|
(from > 0 ? '…' : '') +
|
|
content.slice(from, to).trim() +
|
|
(to < content.length - 1 ? '…' : '')
|
|
} else {
|
|
content = content.slice(0, excerptAfter)
|
|
}
|
|
if (settings.renderLineReturnInExcerpts) {
|
|
const lineReturn = new RegExp(/(?:\r\n|\r|\n)/g)
|
|
// Remove multiple line returns
|
|
content = content
|
|
.split(lineReturn)
|
|
.filter(l => l)
|
|
.join('\n')
|
|
|
|
const last = content.lastIndexOf('\n', pos - from)
|
|
|
|
if (last > 0) {
|
|
content = content.slice(last)
|
|
}
|
|
}
|
|
|
|
content = escapeHTML(content)
|
|
|
|
if (settings.renderLineReturnInExcerpts) {
|
|
content = content.trim().replaceAll('\n', '<br>')
|
|
}
|
|
|
|
return content
|
|
} catch (e) {
|
|
new Notice(
|
|
'Omnisearch - Error while creating excerpt, see developer console'
|
|
)
|
|
console.error(`Omnisearch - Error while creating excerpt`)
|
|
console.error(e)
|
|
return ''
|
|
}
|
|
}
|
|
|
|
/**
|
|
* splits a string in words or "expressions in quotes"
|
|
* @param str
|
|
* @returns
|
|
*/
|
|
export function splitQuotes(str: string): string[] {
|
|
return (
|
|
str
|
|
.match(/"(.*?)"/g)
|
|
?.map(s => s.replace(/"/g, ''))
|
|
.filter(q => !!q) ?? []
|
|
)
|
|
}
|
|
|
|
export function stripSurroundingQuotes(str: string): string {
|
|
return str.replace(regexStripQuotes, '')
|
|
}
|
|
|
|
function mapAsync<T, U>(
|
|
array: T[],
|
|
callbackfn: (value: T, index: number, array: T[]) => Promise<U>
|
|
): Promise<U[]> {
|
|
return Promise.all(array.map(callbackfn))
|
|
}
|
|
|
|
/**
|
|
* https://stackoverflow.com/a/53508547
|
|
* @param array
|
|
* @param callbackfn
|
|
* @returns
|
|
*/
|
|
export async function filterAsync<T>(
|
|
array: T[],
|
|
callbackfn: (value: T, index: number, array: T[]) => Promise<boolean>
|
|
): Promise<T[]> {
|
|
const filterMap = await mapAsync(array, callbackfn)
|
|
return array.filter((_value, index) => filterMap[index])
|
|
}
|
|
|
|
/**
|
|
* A simple function to strip bold and italic markdown chars from a string
|
|
* @param text
|
|
* @returns
|
|
*/
|
|
export function stripMarkdownCharacters(text: string): string {
|
|
return text.replace(/(\*|_)+(.+?)(\*|_)+/g, (_match, _p1, p2) => p2)
|
|
}
|
|
|
|
export function getAliasesFromMetadata(
|
|
metadata: CachedMetadata | null
|
|
): string[] {
|
|
return metadata?.frontmatter
|
|
? parseFrontMatterAliases(metadata.frontmatter) ?? []
|
|
: []
|
|
}
|
|
|
|
export function getTagsFromMetadata(metadata: CachedMetadata | null): string[] {
|
|
let tags = metadata ? getAllTags(metadata) ?? [] : []
|
|
// This will "un-nest" tags that are in the form of "#tag/subtag"
|
|
// A tag like "#tag/subtag" will be split into 3 tags: '#tag/subtag", "#tag" and "#subtag"
|
|
// https://github.com/scambier/obsidian-omnisearch/issues/146
|
|
tags = [
|
|
...new Set(
|
|
tags.reduce((acc, tag) => {
|
|
return [
|
|
...acc,
|
|
...tag
|
|
.split('/')
|
|
.filter(t => t)
|
|
.map(t => (t.startsWith('#') ? t : `#${t}`)),
|
|
tag,
|
|
]
|
|
}, [] as string[])
|
|
),
|
|
]
|
|
return tags
|
|
}
|
|
|
|
/**
|
|
* https://stackoverflow.com/a/37511463
|
|
*/
|
|
export function removeDiacritics(str: string): string {
|
|
if (str === null || str === undefined) {
|
|
return ''
|
|
}
|
|
// Keep backticks for code blocks, because otherwise they are removed by the .normalize() function
|
|
// https://stackoverflow.com/a/36100275
|
|
str = str.replaceAll('`', '[__omnisearch__backtick__]')
|
|
str = str.normalize('NFD').replace(/\p{Diacritic}/gu, '')
|
|
str = str.replaceAll('[__omnisearch__backtick__]', '`')
|
|
return str
|
|
}
|
|
|
|
export function getCtrlKeyLabel(): 'ctrl' | '⌘' {
|
|
return Platform.isMacOS ? '⌘' : 'ctrl'
|
|
}
|
|
|
|
export function isFileIndexable(path: string): boolean {
|
|
const hasTextExtractor = !!getTextExtractor()
|
|
const canIndexPDF = hasTextExtractor && settings.PDFIndexing
|
|
const canIndexImages = hasTextExtractor && settings.imagesIndexing
|
|
return (
|
|
isFilePlaintext(path) ||
|
|
isFileCanvas(path) ||
|
|
(canIndexPDF && isFilePDF(path)) ||
|
|
(canIndexImages && isFileImage(path))
|
|
)
|
|
}
|
|
|
|
export function isFileImage(path: string): boolean {
|
|
const ext = getExtension(path)
|
|
return ext === 'png' || ext === 'jpg' || ext === 'jpeg'
|
|
}
|
|
|
|
export function isFilePDF(path: string): boolean {
|
|
return getExtension(path) === 'pdf'
|
|
}
|
|
|
|
export function isFilePlaintext(path: string): boolean {
|
|
return [...settings.indexedFileTypes, 'md'].some(t => path.endsWith(`.${t}`))
|
|
}
|
|
|
|
export function isFileCanvas(path: string): boolean {
|
|
return path.endsWith('.canvas')
|
|
}
|
|
|
|
export function getExtension(path: string): string {
|
|
const split = path.split('.')
|
|
return split[split.length - 1] ?? ''
|
|
}
|
|
|
|
export function makeMD5(data: BinaryLike): string {
|
|
if (Platform.isMobileApp) {
|
|
// A node-less implementation, but since we're not hashing the same data
|
|
// (arrayBuffer vs stringified array) the hash will be different
|
|
return md5(data.toString())
|
|
}
|
|
return createHash('md5').update(data).digest('hex')
|
|
}
|
|
|
|
export function chunkArray<T>(arr: T[], len: number): T[][] {
|
|
const chunks = []
|
|
let i = 0
|
|
const n = arr.length
|
|
|
|
while (i < n) {
|
|
chunks.push(arr.slice(i, (i += len)))
|
|
}
|
|
|
|
return chunks
|
|
}
|
|
|
|
/**
|
|
* Converts a 'fooBarBAZLorem' into ['foo', 'Bar', 'BAZ', 'Lorem]
|
|
* @param text
|
|
*/
|
|
export function splitCamelCase(text: string): string[] {
|
|
return text.replace(/([a-z](?=[A-Z]))/g, '$1 ').split(' ')
|
|
}
|