Feature/40 key value current folder (#218)

* #40 - Reworked Query

* #40 - added a "path:" option in the query field

* #40 - folder exclusion

* Cleaner code
This commit is contained in:
Simon Cambier
2023-04-02 13:00:52 +02:00
committed by GitHub
parent 60f56452dc
commit 56fc8157fb
7 changed files with 95 additions and 438 deletions

View File

@@ -44,7 +44,8 @@
"dexie": "^3.2.2",
"lodash-es": "4.17.21",
"minisearch": "6.0.0-beta.1",
"pure-md5": "^0.1.14"
"pure-md5": "^0.1.14",
"search-query-parser": "^1.6.0"
},
"pnpm": {
"overrides": {

6
pnpm-lock.yaml generated
View File

@@ -26,6 +26,7 @@ specifiers:
prettier: ^2.8.1
prettier-plugin-svelte: ^2.8.1
pure-md5: ^0.1.14
search-query-parser: ^1.6.0
svelte: ^3.54.0
svelte-check: ^2.10.2
svelte-jester: ^2.3.2
@@ -40,6 +41,7 @@ dependencies:
lodash-es: 4.17.21
minisearch: 6.0.0-beta.1
pure-md5: 0.1.14
search-query-parser: 1.6.0
devDependencies:
'@babel/preset-env': 7.20.2
@@ -4477,6 +4479,10 @@ packages:
xmlchars: 2.2.0
dev: true
/search-query-parser/1.6.0:
resolution: {integrity: sha512-bhf+phLlKF38nuniwLcVHWPArHGdzenlPhPi955CR3vm1QQifXIuPHwAffhjapojdVVzmv4hgIJ6NOX1d/w+Uw==}
dev: false
/semver/6.3.0:
resolution: {integrity: sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==}
hasBin: true

View File

@@ -9,7 +9,7 @@ describe('The Query class', () => {
const query = new Query(stringQuery)
// Assert
const segments = query.segments.map(s => s.value)
const segments = query.query.text
expect(segments).toHaveLength(5)
expect(segments).toContain('foo')
expect(segments).toContain('bar')
@@ -17,35 +17,18 @@ describe('The Query class', () => {
expect(segments).toContain('dolor')
expect(segments).toContain('sit amet')
const exclusions = query.exclusions.map(s => s.value)
const exclusions = query.query.exclude.text
expect(exclusions).toHaveLength(2)
expect(exclusions).toContain('baz')
expect(exclusions).toContain('quoted exclusion')
})
it('should mark quoted segments & exclusions as "exact"', () => {
// Act
const query = new Query(stringQuery)
// Assert
expect(query.segments.filter(s => s.exact)).toHaveLength(2)
expect(
query.segments.find(o => o.value === 'lorem ipsum')!.exact
).toBeTruthy()
expect(query.segments.find(o => o.value === 'sit amet')!.exact).toBeTruthy()
expect(query.exclusions.filter(s => s.exact)).toHaveLength(1)
expect(
query.exclusions.find(o => o.value === 'quoted exclusion')!.exact
).toBeTruthy()
})
it('should not exclude words when there is no space before', () => {
// Act
const query = new Query('foo bar-baz')
// Assert
expect(query.exclusions).toHaveLength(0)
expect(query.query.exclude.text).toHaveLength(0)
})
describe('.getExactTerms()', () => {

View File

@@ -15,7 +15,6 @@
getCtrlKeyLabel,
getExtension,
isFilePDF,
logDebug,
loopIndex,
} from 'src/tools/utils'
import {

View File

@@ -72,8 +72,8 @@ export class Omnisearch {
}
private minisearch: MiniSearch
private indexedDocuments: Map<string, number> = new Map()
private previousResults: SearchResult[] = []
private previousQuery: Query | null = null
// private previousResults: SearchResult[] = []
// private previousQuery: Query | null = null
constructor() {
this.minisearch = new MiniSearch(Omnisearch.options)
@@ -175,8 +175,8 @@ export class Omnisearch {
options: { prefixLength: number; singleFilePath?: string }
): Promise<SearchResult[]> {
if (query.isEmpty()) {
this.previousResults = []
this.previousQuery = null
// this.previousResults = []
// this.previousQuery = null
return []
}
@@ -210,6 +210,22 @@ export class Omnisearch {
})
}
// Filter query results that match the path
if (query.query.path) {
results = results.filter(r =>
query.query.path?.some(p =>
(r.id as string).toLowerCase().includes(p.toLowerCase())
)
)
}
if (query.query.exclude.path) {
results = results.filter(r =>
!query.query.exclude.path?.some(p =>
(r.id as string).toLowerCase().includes(p.toLowerCase())
)
)
}
// If the query does not return any result,
// retry but with a shorter prefix limit
if (!results.length) {
@@ -243,9 +259,7 @@ export class Omnisearch {
}
// Extract tags from the query
const tags = query.segments
.filter(s => s.value.startsWith('#'))
.map(s => s.value)
const tags = query.getTags()
// Put the results with tags on top
for (const tag of tags) {
@@ -280,14 +294,14 @@ export class Omnisearch {
}
// If the search query contains exclude terms, filter out results that have them
const exclusions = query.exclusions
const exclusions = query.query.exclude.text
if (exclusions.length) {
logDebug('Filtering with exclusions')
results = results.filter(r => {
const content = stripMarkdownCharacters(
documents.find(d => d.path === r.id)?.content ?? ''
).toLowerCase()
return exclusions.every(q => !content.includes(q.value))
return exclusions.every(q => !content.includes(q))
})
}
@@ -298,8 +312,8 @@ export class Omnisearch {
(result, index, arr) => arr.findIndex(t => t.id === result.id) === index
)
this.previousQuery = query
this.previousResults = results
// this.previousQuery = query
// this.previousResults = results
return results
}
@@ -375,16 +389,6 @@ export class Omnisearch {
} as IndexedDocument
}
// Remove '#' from tags, for highlighting
query.segments.forEach(s => {
s.value = s.value.replace(/^#/, '')
})
// Extract tags from the query
const tags = query.segments
.filter(s => s.value.startsWith('#'))
.map(s => s.value)
// Clean search matches that match quoted expressions,
// and inject those expressions instead
const foundWords = [
@@ -393,10 +397,10 @@ export class Omnisearch {
...Object.keys(result.match),
// Quoted expressions
...query.segments.filter(s => s.exact).map(s => s.value),
...query.getExactTerms(),
// Tags, starting with #
...tags,
...query.getTags(),
].filter(w => w.length > 1 || /\p{Emoji}/u.test(w))
logDebug('Matching tokens:', foundWords)

View File

@@ -1,83 +1,77 @@
import { settings } from '../settings'
import { removeDiacritics, stripSurroundingQuotes } from '../tools/utils'
import { parseQuery } from '../vendor/parse-query'
import { regexExtensions } from '../globals'
import { removeDiacritics } from '../tools/utils'
import { parse } from 'search-query-parser'
type QueryToken = {
/**
* The query token string value
*/
value: string
const keywords = ['ext', 'path'] as const
/**
* Was this token encased in quotes?
*/
exact: boolean
}
type Keywords = {
[K in typeof keywords[number]]?: string[]
} & { text: string[] }
/**
* This class is used to parse a query string into a structured object
*/
export class Query {
public segments: QueryToken[] = []
public exclusions: QueryToken[] = []
public extensions: string[] = []
query: Keywords & { exclude: Keywords }
/**
* @deprecated
*/
extensions: string[] = []
constructor(text = '') {
// Extract & remove extensions from the query
this.extensions = this.extractExtensions(text)
text = this.removeExtensions(text)
if (settings.ignoreDiacritics) text = removeDiacritics(text)
const tokens = parseQuery(text.toLowerCase(), { tokenize: true })
this.exclusions = tokens.exclude.text
.map(this.formatToken)
.filter(o => !!o.value)
this.segments = tokens.text.reduce<QueryToken[]>((prev, curr) => {
const formatted = this.formatToken(curr)
if (formatted.value) {
prev.push(formatted)
if (settings.ignoreDiacritics) {
text = removeDiacritics(text)
}
return prev
}, [])
const parsed = parse(text.toLowerCase(), {
tokenize: true,
keywords: keywords as unknown as string[],
}) as unknown as typeof this.query
// Default values
parsed.text = parsed.text ?? []
parsed.exclude = parsed.exclude ?? {}
parsed.exclude.text = parsed.exclude.text ?? []
if (!Array.isArray(parsed.exclude.text)) {
parsed.exclude.text = [parsed.exclude.text]
}
// Make sure that all fields are string[]
for (const k of keywords) {
const v = parsed[k]
if (v) {
parsed[k] = Array.isArray(v) ? v : [v]
}
const e = parsed.exclude[k]
if (e) {
parsed.exclude[k] = Array.isArray(e) ? e : [e]
}
}
this.query = parsed
this.extensions = this.query.ext ?? []
}
public isEmpty(): boolean {
return this.segments.length === 0
for (const k of keywords) {
if (this.query[k]?.length) {
return false
}
if (this.query.text.length) {
return false
}
}
return true
}
public segmentsToStr(): string {
return this.segments.map(({ value }) => value).join(' ')
return this.query.text.join(' ')
}
public getTags(): string[] {
return this.query.text.filter(o => o.startsWith('#'))
}
public getTagsWithoutHashtag(): string[] {
return this.getTags().map(o => o.replace(/^#/, ''))
}
/**
* Returns the terms that are encased in quotes
* @returns
*/
public getExactTerms(): string[] {
return this.segments.filter(({ exact }) => exact).map(({ value }) => value)
}
private formatToken(str: string): QueryToken {
const stripped = stripSurroundingQuotes(str)
return {
value: stripped,
exact: stripped !== str,
}
}
/**
* Extracts an array of extensions like ".png" from a string
*/
private extractExtensions(str: string): string[] {
const extensions = (str.match(regexExtensions) ?? []).map(o => o.trim())
if (extensions) {
return extensions.map(ext => ext.toLowerCase())
}
return []
}
private removeExtensions(str: string): string {
return str.replace(regexExtensions, '')
return this.query.text.filter(o => o.split(' ').length > 1)
}
}

View File

@@ -1,330 +0,0 @@
/*!
* search-query-parser.js
* Original: https://github.com/nepsilon/search-query-parser
* Modified by Simon Cambier
* Copyright(c) 2014-2019
* MIT Licensed
*/
import { warnDebug } from "../tools/utils";
interface SearchParserOptions {
offsets?: boolean
tokenize: true
keywords?: string[]
ranges?: string[]
alwaysArray?: boolean
}
interface ISearchParserDictionary {
[key: string]: any
}
type SearchParserKeyWordOffset = {
keyword: string
value?: string
}
type SearchParserTextOffset = {
text: string
}
type SearchParserOffset = (
| SearchParserKeyWordOffset
| SearchParserTextOffset
) & {
offsetStart: number
offsetEnd: number
}
interface SearchParserResult extends ISearchParserDictionary {
text: string[]
offsets: SearchParserOffset[]
exclude: { text: string[] }
}
export function parseQuery(
string: string,
options: SearchParserOptions,
): SearchParserResult {
// Set a default options object when none is provided
if (!options) {
options = { offsets: true, tokenize: true }
} else {
// If options.offsets wasn't passed, set it to true
options.offsets =
typeof options.offsets === 'undefined' ? true : options.offsets
}
if (!string) {
string = ''
}
// Our object to store the query object
const query: SearchParserResult = {
text: [],
offsets: [],
exclude: { text: [] },
}
// When offsets is true, create their array
if (options.offsets) {
query.offsets = []
}
const exclusion: ISearchParserDictionary & { text: string[] } = { text: [] }
const terms = []
// Get a list of search terms respecting single and double quotes
const regex =
/(\S+:'(?:[^'\\]|\\.)*')|(\S+:"(?:[^"\\]|\\.)*")|(-?"(?:[^"\\]|\\.)*")|(-?'(?:[^'\\]|\\.)*')|\S+|\S+:\S+/g
let match
let count = 0
const startTime = new Date().getTime()
while ((match = regex.exec(string)) !== null) {
if (++count >= 100 || new Date().getTime() - startTime > 50) {
warnDebug('Stopped SearchParserResult at', count, 'results')
break
}
let term = match[0]
const sepIndex = term.indexOf(':')
// Terms that contain a `:`
if (sepIndex !== -1) {
const key = term.slice(0, sepIndex)
let val = term.slice(sepIndex + 1)
// Strip backslashes respecting escapes
val = (val + '').replace(/\\(.?)/g, function (_s, n1) {
switch (n1) {
case '\\':
return '\\'
case '0':
return '\u0000'
case '':
return ''
default:
return n1
}
})
terms.push({
keyword: key,
value: val,
offsetStart: match.index,
offsetEnd: match.index + term.length,
})
}
// Other terms
else {
let isExcludedTerm = false
if (term[0] === '-') {
isExcludedTerm = true
term = term.slice(1)
}
// Strip backslashes respecting escapes
term = (term + '').replace(/\\(.?)/g, function (_s, n1) {
switch (n1) {
case '\\':
return '\\'
case '0':
return '\u0000'
case '':
return ''
default:
return n1
}
})
if (isExcludedTerm) {
exclusion.text.push(term)
} else {
terms.push({
text: term,
offsetStart: match.index,
offsetEnd: match.index + term.length,
})
}
}
}
// Reverse to ensure proper order when pop()'ing.
terms.reverse()
// For each search term
let term
while ((term = terms.pop())) {
// When just a simple term
if (term.text) {
// We add it as pure text
query.text.push(term.text)
// When offsets is true, push a new offset
if (options.offsets) {
query.offsets.push(term)
}
}
// We got an advanced search syntax
else if (term.keyword) {
let key = term.keyword
// Check if the key is a registered keyword
options.keywords = options.keywords || []
let isKeyword = false
let isExclusion = false
if (!/^-/.test(key)) {
isKeyword = !(options.keywords.indexOf(key) === -1)
} else if (key[0] === '-') {
const _key = key.slice(1)
isKeyword = !(options.keywords.indexOf(_key) === -1)
if (isKeyword) {
key = _key
isExclusion = true
}
}
// Check if the key is a registered range
options.ranges = options.ranges || []
const isRange = !(options.ranges.indexOf(key) === -1)
// When the key matches a keyword
if (isKeyword) {
// When offsets is true, push a new offset
if (options.offsets) {
query.offsets.push({
keyword: key,
value: term.value,
offsetStart: isExclusion ? term.offsetStart + 1 : term.offsetStart,
offsetEnd: term.offsetEnd,
})
}
const value = term.value
// When value is a thing
if (value.length) {
// Get an array of values when several are there
const values = value.split(',')
if (isExclusion) {
if (exclusion[key]) {
// ...many times...
if (exclusion[key] instanceof Array) {
// ...and got several values this time...
if (values.length > 1) {
// ... concatenate both arrays.
exclusion[key] = exclusion[key].concat(values)
} else {
// ... append the current single value.
exclusion[key].push(value)
}
}
// We saw that keyword only once before
else {
// Put both the current value and the new
// value in an array
exclusion[key] = [exclusion[key]]
exclusion[key].push(value)
}
}
// First time we see that keyword
else {
// ...and got several values this time...
if (values.length > 1) {
// ...add all values seen.
exclusion[key] = values
}
// Got only a single value this time
else {
// Record its value as a string
if (options.alwaysArray) {
// ...but we always return an array if option alwaysArray is true
exclusion[key] = [value]
} else {
// Record its value as a string
exclusion[key] = value
}
}
}
} else {
// If we already have seen that keyword...
if (query[key]) {
// ...many times...
if (query[key] instanceof Array) {
// ...and got several values this time...
if (values.length > 1) {
// ... concatenate both arrays.
query[key] = query[key].concat(values)
} else {
// ... append the current single value.
query[key].push(value)
}
}
// We saw that keyword only once before
else {
// Put both the current value and the new
// value in an array
query[key] = [query[key]]
query[key].push(value)
}
}
// First time we see that keyword
else {
// ...and got several values this time...
if (values.length > 1) {
// ...add all values seen.
query[key] = values
}
// Got only a single value this time
else {
if (options.alwaysArray) {
// ...but we always return an array if option alwaysArray is true
query[key] = [value]
} else {
// Record its value as a string
query[key] = value
}
}
}
}
}
}
// The key allows a range
else if (isRange) {
// When offsets is true, push a new offset
if (options.offsets) {
query.offsets.push(term)
}
const value = term.value
// Range are separated with a dash
const rangeValues = value.split('-')
// When both end of the range are specified
// keyword:XXXX-YYYY
query[key] = {}
if (rangeValues.length === 2) {
query[key].from = rangeValues[0]
query[key].to = rangeValues[1]
}
// When pairs of ranges are specified
// keyword:XXXX-YYYY,AAAA-BBBB
// else if (!rangeValues.length % 2) {
// }
// When only getting a single value,
// or an odd number of values
else {
query[key].from = value
}
} else {
// We add it as pure text
const text = term.keyword + ':' + term.value
query.text.push(text)
// When offsets is true, push a new offset
if (options.offsets) {
query.offsets.push({
text: text,
offsetStart: term.offsetStart,
offsetEnd: term.offsetEnd,
})
}
}
}
}
// Return forged query object
query.exclude = exclusion
return query
}