Merge pull request #38 from scambier/feature/25-search-filters

Feature/25 search filters
This commit is contained in:
Simon Cambier
2022-04-30 17:21:05 +02:00
committed by GitHub
7 changed files with 507 additions and 23 deletions

10
manifest-beta.json Normal file
View File

@@ -0,0 +1,10 @@
{
"id": "omnisearch",
"name": "Omnisearch",
"version": "1.0.1",
"minAppVersion": "0.14.2",
"description": "A search engine that just works",
"author": "Simon Cambier",
"authorUrl": "https://github.com/scambier/obsidian-omnisearch",
"isDesktopOnly": false
}

View File

@@ -42,14 +42,14 @@ onDestroy(() => {
eventBus.enable("vault")
})
$: {
$: (async () => {
if (searchQuery) {
note = getSuggestions(searchQuery, { singleFilePath })[0] ?? null
note = (await getSuggestions(searchQuery, { singleFilePath }))[0] ?? null
lastSearch = searchQuery
}
selectedIndex = 0
scrollIntoView()
}
})()
$: {
if (note) {

View File

@@ -20,13 +20,10 @@ let searchQuery: string
let resultNotes: ResultNote[] = []
$: selectedNote = resultNotes[selectedIndex]
$: {
if (searchQuery) {
resultNotes = getSuggestions(searchQuery)
lastSearch = searchQuery
}
selectedIndex = 0
scrollIntoView()
$: if (searchQuery) {
updateResults()
} else {
resultNotes = []
}
onMount(() => {
@@ -39,6 +36,14 @@ onMount(() => {
eventBus.on("vault", "arrow-down", () => moveIndex(1))
})
async function updateResults() {
resultNotes = await getSuggestions(searchQuery)
lastSearch = searchQuery
selectedIndex = 0
scrollIntoView()
// if (resultNotes.length) console.log(resultNotes[0])
}
function onClick() {
if (!selectedNote) return
openNote(selectedNote)

View File

@@ -4,6 +4,7 @@ import { EventBus } from './event-bus'
export const regexWikilink = /^!?\[\[(?<name>.+?)(\|(?<alias>.+?))?\]\]/
export const regexLineSplit = /\r?\n|\r|((\.|\?|!)( |\r?\n|\r))/g
export const regexYaml = /^---\s*\n(.*?)\n?^---\s?/ms
export const regexStripQuotes = /^"|"$|^'|'$/g
export const excerptBefore = 100
export const excerptAfter = 180
@@ -12,8 +13,6 @@ export const highlightClass = 'suggestion-highlight omnisearch-highlight'
export const eventBus = new EventBus()
// export const eventBus = new EventBus()
export type SearchNote = {
path: string
basename: string

380
src/query.ts Normal file
View File

@@ -0,0 +1,380 @@
import { stripSurroundingQuotes } from './utils'
type QueryToken = {
/**
* The query token string value
*/
value: string
/**
* Was this token encased in quotes?
*/
exact: boolean
}
/**
* This class is used to parse a query string into a structured object
*/
export class Query {
public words: QueryToken[] = []
public exclusions: QueryToken[] = []
constructor(text: string) {
const tokens = parseQuery(text.toLowerCase(), { tokenize: true })
this.exclusions = tokens.exclude.text
.map(this.formatToken)
.filter(o => !!o.value)
this.words = tokens.text.map(this.formatToken)
}
public getWordsStr(): string {
return this.words.map(({ value }) => value).join(' ')
}
/**
* Returns the terms that are encased in quotes
* @returns
*/
public getExactTerms(): string[] {
return this.words.filter(({ exact }) => exact).map(({ value }) => value)
}
private formatToken(str: string): QueryToken {
const stripped = stripSurroundingQuotes(str)
return {
value: stripped,
exact: stripped !== str,
}
}
}
/*!
* search-query-parser.js
* Original: https://github.com/nepsilon/search-query-parser
* Modified by Simon Cambier
* Copyright(c) 2014-2019
* MIT Licensed
*/
interface SearchParserOptions {
offsets?: boolean
tokenize: true
keywords?: string[]
ranges?: string[]
alwaysArray?: boolean
}
interface ISearchParserDictionary {
[key: string]: any
}
type SearchParserKeyWordOffset = {
keyword: string
value?: string
}
type SearchParserTextOffset = {
text: string
}
type SearchParserOffset = (
| SearchParserKeyWordOffset
| SearchParserTextOffset
) & {
offsetStart: number
offsetEnd: number
}
interface SearchParserResult extends ISearchParserDictionary {
text: string[]
offsets: SearchParserOffset[]
exclude: { text: string[] }
}
function parseQuery(
string: string,
options: SearchParserOptions,
): SearchParserResult {
// Set a default options object when none is provided
if (!options) {
options = { offsets: true, tokenize: true }
}
else {
// If options offsets was't passed, set it to true
options.offsets =
typeof options.offsets === 'undefined' ? true : options.offsets
}
if (!string) {
string = ''
}
// Our object to store the query object
const query: SearchParserResult = {
text: [],
offsets: [],
exclude: { text: [] },
}
// When offsets is true, create their array
if (options.offsets) {
query.offsets = []
}
const exclusion: ISearchParserDictionary & { text: string[] } = { text: [] }
const terms = []
// Get a list of search terms respecting single and double quotes
const regex =
/(\S+:'(?:[^'\\]|\\.)*')|(\S+:"(?:[^"\\]|\\.)*")|(-?"(?:[^"\\]|\\.)*")|(-?'(?:[^'\\]|\\.)*')|\S+|\S+:\S+/g
let match
while ((match = regex.exec(string)) !== null) {
let term = match[0]
const sepIndex = term.indexOf(':')
// Terms that contain a `:`
if (sepIndex !== -1) {
const key = term.slice(0, sepIndex)
let val = term.slice(sepIndex + 1)
// Strip backslashes respecting escapes
val = (val + '').replace(/\\(.?)/g, function (s, n1) {
switch (n1) {
case '\\':
return '\\'
case '0':
return '\u0000'
case '':
return ''
default:
return n1
}
})
terms.push({
keyword: key,
value: val,
offsetStart: match.index,
offsetEnd: match.index + term.length,
})
}
// Other terms
else {
let isExcludedTerm = false
if (term[0] === '-') {
isExcludedTerm = true
term = term.slice(1)
}
// Strip backslashes respecting escapes
term = (term + '').replace(/\\(.?)/g, function (s, n1) {
switch (n1) {
case '\\':
return '\\'
case '0':
return '\u0000'
case '':
return ''
default:
return n1
}
})
if (isExcludedTerm) {
exclusion.text.push(term)
}
else {
terms.push({
text: term,
offsetStart: match.index,
offsetEnd: match.index + term.length,
})
}
}
}
// Reverse to ensure proper order when pop()'ing.
terms.reverse()
// For each search term
let term
while ((term = terms.pop())) {
// When just a simple term
if (term.text) {
// We add it as pure text
query.text.push(term.text)
// When offsets is true, push a new offset
if (options.offsets) {
query.offsets.push(term)
}
}
// We got an advanced search syntax
else if (term.keyword) {
let key = term.keyword
// Check if the key is a registered keyword
options.keywords = options.keywords || []
let isKeyword = false
let isExclusion = false
if (!/^-/.test(key)) {
isKeyword = !(options.keywords.indexOf(key) === -1)
}
else if (key[0] === '-') {
const _key = key.slice(1)
isKeyword = !(options.keywords.indexOf(_key) === -1)
if (isKeyword) {
key = _key
isExclusion = true
}
}
// Check if the key is a registered range
options.ranges = options.ranges || []
const isRange = !(options.ranges.indexOf(key) === -1)
// When the key matches a keyword
if (isKeyword) {
// When offsets is true, push a new offset
if (options.offsets) {
query.offsets.push({
keyword: key,
value: term.value,
offsetStart: isExclusion ? term.offsetStart + 1 : term.offsetStart,
offsetEnd: term.offsetEnd,
})
}
const value = term.value
// When value is a thing
if (value.length) {
// Get an array of values when several are there
const values = value.split(',')
if (isExclusion) {
if (exclusion[key]) {
// ...many times...
if (exclusion[key] instanceof Array) {
// ...and got several values this time...
if (values.length > 1) {
// ... concatenate both arrays.
exclusion[key] = exclusion[key].concat(values)
}
else {
// ... append the current single value.
exclusion[key].push(value)
}
}
// We saw that keyword only once before
else {
// Put both the current value and the new
// value in an array
exclusion[key] = [exclusion[key]]
exclusion[key].push(value)
}
}
// First time we see that keyword
else {
// ...and got several values this time...
if (values.length > 1) {
// ...add all values seen.
exclusion[key] = values
}
// Got only a single value this time
else {
// Record its value as a string
if (options.alwaysArray) {
// ...but we always return an array if option alwaysArray is true
exclusion[key] = [value]
}
else {
// Record its value as a string
exclusion[key] = value
}
}
}
}
else {
// If we already have seen that keyword...
if (query[key]) {
// ...many times...
if (query[key] instanceof Array) {
// ...and got several values this time...
if (values.length > 1) {
// ... concatenate both arrays.
query[key] = query[key].concat(values)
}
else {
// ... append the current single value.
query[key].push(value)
}
}
// We saw that keyword only once before
else {
// Put both the current value and the new
// value in an array
query[key] = [query[key]]
query[key].push(value)
}
}
// First time we see that keyword
else {
// ...and got several values this time...
if (values.length > 1) {
// ...add all values seen.
query[key] = values
}
// Got only a single value this time
else {
if (options.alwaysArray) {
// ...but we always return an array if option alwaysArray is true
query[key] = [value]
}
else {
// Record its value as a string
query[key] = value
}
}
}
}
}
}
// The key allows a range
else if (isRange) {
// When offsets is true, push a new offset
if (options.offsets) {
query.offsets.push(term)
}
const value = term.value
// Range are separated with a dash
const rangeValues = value.split('-')
// When both end of the range are specified
// keyword:XXXX-YYYY
query[key] = {}
if (rangeValues.length === 2) {
query[key].from = rangeValues[0]
query[key].to = rangeValues[1]
}
// When pairs of ranges are specified
// keyword:XXXX-YYYY,AAAA-BBBB
// else if (!rangeValues.length % 2) {
// }
// When only getting a single value,
// or an odd number of values
else {
query[key].from = value
}
}
else {
// We add it as pure text
const text = term.keyword + ':' + term.value
query.text.push(text)
// When offsets is true, push a new offset
if (options.offsets) {
query.offsets.push({
text: text,
offsetStart: term.offsetStart,
offsetEnd: term.offsetEnd,
})
}
}
}
}
// Return forged query object
query.exclude = exclusion
return query
}

View File

@@ -6,7 +6,14 @@ import {
type ResultNote,
type SearchMatch,
} from './globals'
import { extractHeadingsFromCache, stringsToRegex, wait } from './utils'
import {
extractHeadingsFromCache,
splitQuotes,
stringsToRegex,
stripMarkdownCharacters,
wait,
} from './utils'
import { Query } from './query'
let minisearchInstance: MiniSearch<IndexedNote>
@@ -54,12 +61,12 @@ export async function initGlobalSearchIndex(): Promise<void> {
/**
* Searches the index for the given query,
* and returns an array of raw results
* @param query
* @param text
* @returns
*/
function search(query: string): SearchResult[] {
if (!query) return []
return minisearchInstance.search(query, {
async function search(query: Query): Promise<SearchResult[]> {
if (!query.getWordsStr()) return []
let results = minisearchInstance.search(query.getWordsStr(), {
prefix: true,
fuzzy: term => (term.length > 4 ? 0.2 : false),
combineWith: 'AND',
@@ -70,6 +77,29 @@ function search(query: string): SearchResult[] {
headings3: 1.1,
},
})
// If the search query contains quotes, filter out results that don't have the exact match
const exactTerms = query.getExactTerms()
if (exactTerms.length) {
results = results.filter(r => {
const content = stripMarkdownCharacters(
indexedNotes[r.id]?.content ?? '',
).toLowerCase()
return exactTerms.every(q => content.includes(q))
})
}
// // If the search query contains exclude terms, filter out results that have them
const exclusions = query.exclusions
if (exclusions.length) {
results = results.filter(r => {
const content = stripMarkdownCharacters(
indexedNotes[r.id]?.content ?? '',
).toLowerCase()
return exclusions.every(q => !content.includes(q.value))
})
}
return results
}
/**
@@ -96,12 +126,13 @@ export function getMatches(text: string, reg: RegExp): SearchMatch[] {
* @param options
* @returns
*/
export function getSuggestions(
query: string,
export async function getSuggestions(
queryStr: string,
options?: Partial<{ singleFilePath: string | null }>,
): ResultNote[] {
): Promise<ResultNote[]> {
// Get the raw results
let results = search(query)
const query = new Query(queryStr)
let results = await search(query)
if (!results.length) return []
// Either keep the 50 first results,
@@ -121,7 +152,17 @@ export function getSuggestions(
if (!note) {
throw new Error(`Note "${result.id}" not indexed`)
}
const words = Object.keys(result.match)
// Clean search matches that match quoted expresins,
// and inject those expressions instead
let words = Object.keys(result.match)
const quoted = splitQuotes(query.getWordsStr())
for (const quote of quoted) {
for (const q of quote.toLowerCase()) {
words = words.filter(w => !w.toLowerCase().startsWith(q))
}
words.push(quote)
}
const matches = getMatches(note.content, stringsToRegex(words))
const resultNote: ResultNote = {
score: result.score,

View File

@@ -5,6 +5,7 @@ import {
highlightClass,
isSearchMatch,
regexLineSplit,
regexStripQuotes,
regexYaml,
} from './globals'
import type { SearchMatch } from './globals'
@@ -89,7 +90,7 @@ export function makeExcerpt(content: string, offset: number): string {
const pos = offset ?? -1
if (pos > -1) {
const from = Math.max(0, pos - excerptBefore)
const to = Math.min(content.length - 1, pos + excerptAfter)
const to = Math.min(content.length, pos + excerptAfter)
content =
(from > 0 ? '…' : '') +
content.slice(from, to).trim() +
@@ -97,3 +98,51 @@ export function makeExcerpt(content: string, offset: number): string {
}
return escapeHTML(content)
}
/**
* splits a string in words or "expressions in quotes"
* @param str
* @returns
*/
export function splitQuotes(str: string): string[] {
return (
str
.match(/"(.*?)"/g)
?.map(s => s.replace(/"/g, ''))
.filter(q => !!q) ?? []
)
}
export function stripSurroundingQuotes(str: string): string {
return str.replace(regexStripQuotes, '')
}
function mapAsync<T, U>(
array: T[],
callbackfn: (value: T, index: number, array: T[]) => Promise<U>,
): Promise<U[]> {
return Promise.all(array.map(callbackfn))
}
/**
* https://stackoverflow.com/a/53508547
* @param arr
* @param callback
* @returns
*/
export async function filterAsync<T>(
array: T[],
callbackfn: (value: T, index: number, array: T[]) => Promise<boolean>,
): Promise<T[]> {
const filterMap = await mapAsync(array, callbackfn)
return array.filter((value, index) => filterMap[index])
}
/**
* A simple function to strip bold and italic markdown chars from a string
* @param text
* @returns
*/
export function stripMarkdownCharacters(text: string): string {
return text.replace(/(\*|_)+(.+?)(\*|_)+/g, (match, p1, p2) => p2)
}