Highlighting rework, should help with #304
This commit is contained in:
@@ -190,6 +190,8 @@ export class Omnisearch {
|
||||
headings3: settings.weightH3,
|
||||
unmarkedTags: settings.weightUnmarkedTags,
|
||||
},
|
||||
// The query is already tokenized, don't tokenize again
|
||||
tokenize: text => [text],
|
||||
})
|
||||
|
||||
logDebug('Found', results.length, 'results')
|
||||
@@ -404,7 +406,7 @@ export class Omnisearch {
|
||||
|
||||
// Tags, starting with #
|
||||
...query.getTags(),
|
||||
].filter(w => w.length > 1 || /\p{Emoji}/u.test(w))
|
||||
]
|
||||
logDebug('Matching tokens:', foundWords)
|
||||
|
||||
logDebug('Getting matches locations...')
|
||||
|
||||
@@ -6,6 +6,7 @@ import {
|
||||
getChsSegmenter,
|
||||
} from 'src/globals'
|
||||
import { logDebug, splitCamelCase, splitHyphens } from 'src/tools/utils'
|
||||
const markdownLinkExtractor = require('markdown-link-extractor')
|
||||
|
||||
function tokenizeWords(text: string): string[] {
|
||||
return text.split(BRACKETS_AND_SPACE)
|
||||
@@ -23,6 +24,7 @@ function tokenizeTokens(text: string): string[] {
|
||||
*/
|
||||
export function tokenizeForIndexing(text: string): string[] {
|
||||
const words = tokenizeWords(text)
|
||||
const urls: string[] = markdownLinkExtractor(text)
|
||||
|
||||
let tokens = tokenizeTokens(text)
|
||||
|
||||
@@ -35,6 +37,11 @@ export function tokenizeForIndexing(text: string): string[] {
|
||||
// Add whole words (aka "not tokens")
|
||||
tokens = [...tokens, ...words]
|
||||
|
||||
// Add urls
|
||||
if (urls.length) {
|
||||
tokens = [...tokens, ...urls]
|
||||
}
|
||||
|
||||
const chsSegmenter = getChsSegmenter()
|
||||
if (chsSegmenter) {
|
||||
const chs = tokens.flatMap(word =>
|
||||
@@ -56,7 +63,12 @@ export function tokenizeForIndexing(text: string): string[] {
|
||||
* @returns
|
||||
*/
|
||||
export function tokenizeForSearch(text: string): QueryCombination {
|
||||
const tokens = tokenizeTokens(text)
|
||||
|
||||
// Extract urls and remove them from the query
|
||||
const urls: string[] = markdownLinkExtractor(text)
|
||||
text = urls.reduce((acc, url) => acc.replace(url, ''), text)
|
||||
|
||||
const tokens = [...tokenizeTokens(text), ...urls].filter(Boolean)
|
||||
|
||||
let chs: string[] = []
|
||||
const chsSegmenter = getChsSegmenter()
|
||||
@@ -70,7 +82,7 @@ export function tokenizeForSearch(text: string): QueryCombination {
|
||||
combineWith: 'OR',
|
||||
queries: [
|
||||
{ combineWith: 'AND', queries: tokens },
|
||||
{ combineWith: 'AND', queries: tokenizeWords(text) },
|
||||
{ combineWith: 'AND', queries: tokenizeWords(text).filter(Boolean) },
|
||||
{ combineWith: 'AND', queries: tokens.flatMap(splitHyphens) },
|
||||
{ combineWith: 'AND', queries: tokens.flatMap(splitCamelCase) },
|
||||
{ combineWith: 'AND', queries: chs },
|
||||
|
||||
Reference in New Issue
Block a user