Merge branch 'develop'

2023-03-18 11:11:21 +01:00
parent bb8fd10d1b 23640f4b0b
commit de1b453801
9 changed files with 144 additions and 52 deletions
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "scambier.obsidian-search",
-  "version": "1.12.3",
+  "version": "1.13.0-beta.2",
  "description": "A search engine for Obsidian",
  "main": "dist/main.js",
  "scripts": {
--- a/src/cache-manager.ts
+++ b/src/cache-manager.ts
@@ -11,6 +11,7 @@ import {
  getTagsFromMetadata,
  isFileCanvas,
  isFilePlaintext,
  logDebug,
  makeMD5,
  removeDiacritics,
 } from './tools/utils'
@@ -150,6 +151,7 @@ class CacheManager {
    if (this.documents.has(path)) {
      return this.documents.get(path)!
    }
    logDebug('Generating IndexedDocument from', path)
    await this.addToLiveCache(path)
    return this.documents.get(path)!
  }
--- a/src/components/ModalVault.svelte
+++ b/src/components/ModalVault.svelte
@@ -15,6 +15,7 @@
    getCtrlKeyLabel,
    getExtension,
    isFilePDF,
    logDebug,
    loopIndex,
  } from 'src/tools/utils'
  import {
@@ -27,7 +28,6 @@
  import * as NotesIndex from '../notes-index'
  import { cacheManager } from '../cache-manager'
  import { searchEngine } from 'src/search/omnisearch'
  import CancelablePromise, { cancelable } from 'cancelable-promise'
  export let modal: OmnisearchVaultModal
  export let previousQuery: string | undefined
@@ -40,28 +40,13 @@
  let searching = true
  let refInput: InputSearch | undefined
  let pWaitingResults: CancelablePromise | null = null
  $: selectedNote = resultNotes[selectedIndex]
  $: searchQuery = searchQuery ?? previousQuery
  $: if (searchQuery) {
    if (pWaitingResults) {
      pWaitingResults.cancel()
      pWaitingResults = null
    }
    searching = true
-    pWaitingResults = cancelable(
+    updateResults().then(() => {
      new Promise((resolve, reject) => {
        updateResults()
          .then(() => {
      searching = false
            resolve(null)
    })
          .catch(e => {
            reject(e)
          })
      })
    )
  } else {
    searching = false
    resultNotes = []
@@ -130,9 +115,7 @@
  async function updateResults() {
    query = new Query(searchQuery)
-    resultNotes = (await searchEngine.getSuggestions(query)).sort(
+    resultNotes = await searchEngine.getSuggestions(query)
      (a, b) => b.score - a.score
    )
    selectedIndex = 0
    await scrollIntoView()
  }
--- a/src/globals.ts
+++ b/src/globals.ts
@@ -14,7 +14,7 @@ export const excerptBefore = 100
 export const excerptAfter = 300
 export const highlightClass = `suggestion-highlight omnisearch-highlight ${
-  settings.hightlight ? 'omnisearch-default-highlight' : ''
+  settings.highlight ? 'omnisearch-default-highlight' : ''
 }`
 export const eventBus = new EventBus()
--- a/src/main.ts
+++ b/src/main.ts
@@ -18,7 +18,7 @@ import {
  isCacheEnabled,
 } from './globals'
 import api, { notifyOnIndexed } from './tools/api'
-import { isFileIndexable } from './tools/utils'
+import { isFileIndexable, logDebug } from './tools/utils'
 import { database, OmnisearchCache } from './database'
 import * as NotesIndex from './notes-index'
 import { searchEngine } from './search/omnisearch'
@@ -69,6 +69,7 @@ export default class OmnisearchPlugin extends Plugin {
      this.registerEvent(
        this.app.vault.on('create', file => {
          if (isFileIndexable(file.path)) {
            logDebug('Indexing new file', file.path)
            // await cacheManager.addToLiveCache(file.path)
            searchEngine.addFromPaths([file.path])
          }
@@ -76,6 +77,7 @@ export default class OmnisearchPlugin extends Plugin {
      )
      this.registerEvent(
        this.app.vault.on('delete', file => {
          logDebug('Removing file', file.path)
          cacheManager.removeFromLiveCache(file.path)
          searchEngine.removeFromPaths([file.path])
        })
@@ -83,6 +85,7 @@ export default class OmnisearchPlugin extends Plugin {
      this.registerEvent(
        this.app.vault.on('modify', async file => {
          if (isFileIndexable(file.path)) {
            logDebug('Updating file', file.path)
            await cacheManager.addToLiveCache(file.path)
            NotesIndex.markNoteForReindex(file)
          }
@@ -91,6 +94,7 @@ export default class OmnisearchPlugin extends Plugin {
      this.registerEvent(
        this.app.vault.on('rename', async (file, oldPath) => {
          if (isFileIndexable(file.path)) {
            logDebug('Renaming file', file.path)
            cacheManager.removeFromLiveCache(oldPath)
            cacheManager.addToLiveCache(file.path)
            searchEngine.removeFromPaths([oldPath])
--- a/src/search/omnisearch.ts
+++ b/src/search/omnisearch.ts
@@ -9,9 +9,12 @@ import { chsRegex, getChsSegmenter, SPACE_OR_PUNCTUATION } from '../globals'
 import { settings } from '../settings'
 import {
  chunkArray,
  logDebug,
  removeDiacritics,
  splitCamelCase,
  stringsToRegex,
  stripMarkdownCharacters,
  warnDebug,
 } from '../tools/utils'
 import { Notice } from 'obsidian'
 import type { Query } from './query'
@@ -25,7 +28,11 @@ const tokenize = (text: string): string[] => {
    return tokens.flatMap(word =>
      chsRegex.test(word) ? chsSegmenter.cut(word) : [word]
    )
-  } else return tokens
+  } else {
    if (settings.splitCamelCase)
      return [...tokens, ...tokens.flatMap(splitCamelCase)]
    return tokens
  }
 }
 export class Omnisearch {
@@ -117,11 +124,13 @@ export class Omnisearch {
   * @param paths
   */
  public async addFromPaths(paths: string[]): Promise<void> {
    logDebug('Adding files', paths)
    let documents = (
      await Promise.all(
        paths.map(async path => await cacheManager.getDocument(path))
      )
    ).filter(d => !!d?.path)
    logDebug('Sorting documents to first index markdown')
    // Index markdown files first
    documents = sortBy(documents, d => (d.path.endsWith('.md') ? 0 : 1))
@@ -133,6 +142,7 @@ export class Omnisearch {
    // Split the documents in smaller chunks to add them to minisearch
    const chunkedDocs = chunkArray(documents, 500)
    for (const docs of chunkedDocs) {
      logDebug('Indexing into search engine', docs)
      // Update the list of indexed docs
      docs.forEach(doc => this.indexedDocuments.set(doc.path, doc.mtime))
@@ -170,6 +180,8 @@ export class Omnisearch {
      return []
    }
    logDebug('Starting search for', query)
    let results = this.minisearch.search(query.segmentsToStr(), {
      prefix: term => term.length >= options.prefixLength,
      // length <= 3: no fuzziness
@@ -187,6 +199,8 @@ export class Omnisearch {
      },
    })
    logDebug('Found', results.length, 'results')
    // Filter query results to only keep files that match query.extensions (if any)
    if (query.extensions.length) {
      results = results.filter(r => {
@@ -242,7 +256,10 @@ export class Omnisearch {
      }
    }
-    results = results.slice(0, 50)
+    logDebug('Sorting and limiting results')
    // Sort results and keep the 50 best
    results = results.sort((a, b) => b.score - a.score).slice(0, 50)
    const documents = await Promise.all(
      results.map(async result => await cacheManager.getDocument(result.id))
@@ -251,6 +268,7 @@ export class Omnisearch {
    // If the search query contains quotes, filter out results that don't have the exact match
    const exactTerms = query.getExactTerms()
    if (exactTerms.length) {
      logDebug('Filtering with quoted terms')
      results = results.filter(r => {
        const document = documents.find(d => d.path === r.id)
        const title = document?.path.toLowerCase() ?? ''
@@ -264,6 +282,7 @@ export class Omnisearch {
    // If the search query contains exclude terms, filter out results that have them
    const exclusions = query.exclusions
    if (exclusions.length) {
      logDebug('Filtering with exclusions')
      results = results.filter(r => {
        const content = stripMarkdownCharacters(
          documents.find(d => d.path === r.id)?.content ?? ''
@@ -271,6 +290,8 @@ export class Omnisearch {
        return exclusions.every(q => !content.includes(q.value))
      })
    }
    logDebug('Deduping')
    // FIXME:
    // Dedupe results - clutch for https://github.com/scambier/obsidian-omnisearch/issues/129
    results = results.filter(
@@ -284,11 +305,16 @@ export class Omnisearch {
  }
  public getMatches(text: string, reg: RegExp, query: Query): SearchMatch[] {
    const startTime = new Date().getTime()
    let match: RegExpExecArray | null = null
    const matches: SearchMatch[] = []
    let count = 0
    while ((match = reg.exec(text)) !== null) {
-      if (++count >= 100) break // Avoid infinite loops, stop looking after 100 matches
+      // Avoid infinite loops, stop looking after 100 matches or if we're taking too much time
      if (++count >= 100 || new Date().getTime() - startTime > 50) {
        warnDebug('Stopped getMatches at', count, 'results')
        break
      }
      const m = match[0]
      if (m) matches.push({ match: m, offset: match.index })
    }
@@ -331,17 +357,13 @@ export class Omnisearch {
      })
    }
    // Extract tags from the query
    const tags = query.segments
      .filter(s => s.value.startsWith('#'))
      .map(s => s.value)
    const documents = await Promise.all(
      results.map(async result => await cacheManager.getDocument(result.id))
    )
    // Map the raw results to get usable suggestions
    const resultNotes = results.map(result => {
      logDebug('Locating matches for', result.id)
      let note = documents.find(d => d.path === result.id)
      if (!note) {
        // throw new Error(`Omnisearch - Note "${result.id}" not indexed`)
@@ -357,6 +379,12 @@ export class Omnisearch {
      query.segments.forEach(s => {
        s.value = s.value.replace(/^#/, '')
      })
      // Extract tags from the query
      const tags = query.segments
        .filter(s => s.value.startsWith('#'))
        .map(s => s.value)
      // Clean search matches that match quoted expressions,
      // and inject those expressions instead
      const foundWords = [
@@ -370,13 +398,15 @@ export class Omnisearch {
        // Tags, starting with #
        ...tags,
      ].filter(w => w.length > 1 || /\p{Emoji}/u.test(w))
      logDebug('Matching tokens:', foundWords)
-      // console.log(foundWords)
+      logDebug('Getting matches locations...')
      const matches = this.getMatches(
        note.content,
        stringsToRegex(foundWords),
        query
      )
      logDebug('Matches:', matches)
      const resultNote: ResultNote = {
        score: result.score,
        foundWords,
--- a/src/settings.ts
+++ b/src/settings.ts
@@ -45,7 +45,9 @@ export interface OmnisearchSettings extends WeightingSettings {
  welcomeMessage: string
  /** If a query returns 0 result, try again with more relax conditions */
  simpleSearch: boolean
-  hightlight: boolean
+  highlight: boolean
  splitCamelCase: boolean
  verboseLogging: boolean
 }
 /**
@@ -204,6 +206,25 @@ export class SettingsTab extends PluginSettingTab {
        })
      )
    // Split CamelCaseWords
    const camelCaseDesc = new DocumentFragment()
    camelCaseDesc.createSpan({}, span => {
      span.innerHTML = `Enable this if you want to be able to search for CamelCaseWords as separate words.<br/>        
        ⚠️ <span style="color: var(--text-accent)">Changing this setting will clear the cache.</span><br>
        <strong style="color: var(--text-accent)">Needs a restart to fully take effect.</strong>
        `
    })
    new Setting(containerEl)
      .setName('Split CamelCaseWords')
      .setDesc(camelCaseDesc)
      .addToggle(toggle =>
        toggle.setValue(settings.splitCamelCase).onChange(async v => {
          await database.clearCache()
          settings.splitCamelCase = v
          await saveSettings(this.plugin)
        })
      )
    // Simpler search
    new Setting(containerEl)
      .setName('Simpler search')
@@ -301,8 +322,8 @@ export class SettingsTab extends PluginSettingTab {
        'Will highlight matching results when enabled. See README for more customization options.'
      )
      .addToggle(toggle =>
-        toggle.setValue(settings.hightlight).onChange(async v => {
+        toggle.setValue(settings.highlight).onChange(async v => {
-          settings.hightlight = v
+          settings.highlight = v
          await saveSettings(this.plugin)
        })
      )
@@ -337,6 +358,22 @@ export class SettingsTab extends PluginSettingTab {
    //#endregion Results Weighting
    //#region Debugging
    new Setting(containerEl).setName('Debugging').setHeading()
    new Setting(containerEl)
      .setName('Enable verbose logging')
      .setDesc('Adds a LOT of logs for debugging purposes. Don\'t forget to disable it.')
      .addToggle(toggle =>
        toggle.setValue(settings.verboseLogging).onChange(async v => {
          settings.verboseLogging = v
          await saveSettings(this.plugin)
        })
      )
    //#endregion Debugginh
    //#region Danger Zone
    if (isCacheEnabled()) {
      new Setting(containerEl).setName('Danger Zone').setHeading()
@@ -379,12 +416,13 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = {
  indexedFileTypes: [] as string[],
  PDFIndexing: false,
  imagesIndexing: false,
  splitCamelCase: false,
  ribbonIcon: true,
  showExcerpt: true,
  renderLineReturnInExcerpts: true,
  showCreateButton: false,
-  hightlight: true,
+  highlight: true,
  showPreviousQueryResults: true,
  simpleSearch: false,
@@ -395,6 +433,7 @@ export const DEFAULT_SETTINGS: OmnisearchSettings = {
  weightH3: 1.1,
  welcomeMessage: '',
  verboseLogging: false,
 } as const
 export let settings = Object.assign({}, DEFAULT_SETTINGS) as OmnisearchSettings
--- a/src/tools/utils.ts
+++ b/src/tools/utils.ts
@@ -91,14 +91,17 @@ export function getAllIndices(text: string, regex: RegExp): SearchMatch[] {
 */
 export function stringsToRegex(strings: string[]): RegExp {
  if (!strings.length) return /^$/g
  // Default word split is not applied if the user uses the cm-chs-patch plugin
  const joined =
    '(' +
-    (getChsSegmenter() ? '' : `^|${SPACE_OR_PUNCTUATION.source}`) +
+    // Default word split is not applied if the user uses the cm-chs-patch plugin
    (getChsSegmenter()
      ? ''
      : // Split on start of line, spaces, punctuation, or capital letters (for camelCase)
      settings.splitCamelCase
      ? `^|${SPACE_OR_PUNCTUATION.source}|[A-Z]`
      : `^|${SPACE_OR_PUNCTUATION.source}`) +
    ')' +
-    '(' +
+    `(${strings.map(s => escapeRegex(s)).join('|')})`
    strings.map(s => escapeRegex(s)).join('|') +
    ')'
  const reg = new RegExp(`${joined}`, 'giu')
  return reg
@@ -313,3 +316,27 @@ export function chunkArray<T>(arr: T[], len: number): T[][] {
  return chunks
 }
 /**
 * Converts a 'fooBarBAZLorem' into ['foo', 'Bar', 'BAZ', 'Lorem]
 * @param text
 */
 export function splitCamelCase(text: string): string[] {
  return text.replace(/([a-z](?=[A-Z]))/g, '$1 ').split(' ')
 }
 export function logDebug(...args: any[]): void {
  printDebug(console.log, ...args)
 }
 export function warnDebug(...args: any[]): void {
  printDebug(console.warn, ...args)
 }
 function printDebug(fn: (...args: any[]) => any, ...args: any[]): void {
  if (settings.verboseLogging) {
    const t = new Date()
    const ts = `${t.getMinutes()}:${t.getSeconds()}:${t.getMilliseconds()}`
    fn(...['Omnisearch -', ts + ' -', ...args])
  }
 }
--- a/src/vendor/parse-query.ts
+++ b/src/vendor/parse-query.ts
@@ -6,6 +6,8 @@
 * MIT Licensed
 */
 import { warnDebug } from "../tools/utils";
 interface SearchParserOptions {
  offsets?: boolean
  tokenize: true
@@ -43,7 +45,7 @@ interface SearchParserResult extends ISearchParserDictionary {
 export function parseQuery(
  string: string,
-  options: SearchParserOptions
+  options: SearchParserOptions,
 ): SearchParserResult {
  // Set a default options object when none is provided
  if (!options) {
@@ -74,9 +76,14 @@ export function parseQuery(
  const regex =
    /(\S+:'(?:[^'\\]|\\.)*')|(\S+:"(?:[^"\\]|\\.)*")|(-?"(?:[^"\\]|\\.)*")|(-?'(?:[^'\\]|\\.)*')|\S+|\S+:\S+/g
  let match
-  let count = 0 // TODO: FIXME: this is a hack to avoid infinite loops
+  let count = 0
  const startTime = new Date().getTime()
  while ((match = regex.exec(string)) !== null) {
-    if (++count >= 100) break
+    if (++count >= 100 || new Date().getTime() - startTime > 50) {
      warnDebug('Stopped SearchParserResult at', count, 'results')
      break
    }
    let term = match[0]
    const sepIndex = term.indexOf(':')