Merge branch 'develop'

2024-04-07 14:39:17 +02:00
parent 76a1ce2c0c 7d44a40c4c
commit 119d6d2797
14 changed files with 966 additions and 748 deletions
@@ -48,7 +48,7 @@ You can check the [CHANGELOG](./CHANGELOG.md) for more information on the differ
 - Resistance to typos
 - Switch between Vault and In-file search to quickly skim multiple results in a single note
 - Supports `"expressions in quotes"` and `-exclusions`
- Filters file types with '.jpg' or '.md'
+- Filters file types with `.jpg` or `.md`
 - Directly Insert a `[[link]]` from the search results
 - Supports Vim navigation keys

@@ -56,6 +56,14 @@ You can check the [CHANGELOG](./CHANGELOG.md) for more information on the differ
 on [this additional plugin](https://github.com/aidenlx/cm-chs-patch). Please read its documentation for more
 information.

+## Projects that use Omnisearch
+
+_Submit a PR to add your own project!_
+
+- [Omnisearch Companion](https://github.com/ALegendsTale/omnisearch-companion), an extension for your browser ([Firefox](https://addons.mozilla.org/en-US/firefox/addon/omnisearch-companion/), [Chrome](https://chromewebstore.google.com/detail/omnisearch-companion/kcjcnnlpfbilodfnnkpioijobpjhokkd))
+- [Actions for Obsidian](https://actions.work/actions-for-obsidian)
+- [Userscripts](https://publish.obsidian.md/omnisearch/Inject+Omnisearch+results+into+your+search+engine) to inject Omnisearch into your favorite web search engine
+
 ## LICENSE

 Omnisearch is licensed under [GPL-3](https://tldrlegal.com/license/gnu-general-public-license-v3-(gpl-3)).
@@ -1,6 +1,6 @@
 {
  "name": "scambier.obsidian-search",
-  "version": "1.21.1",
+  "version": "1.22.0-beta.3",
  "description": "A search engine for Obsidian",
  "main": "dist/main.js",
  "scripts": {
@@ -14,13 +14,13 @@
  "author": "Simon Cambier",
  "license": "GPL-3",
  "devDependencies": {
-    "@babel/preset-env": "^7.23.8",
-    "@babel/preset-typescript": "^7.23.3",
+    "@babel/preset-env": "^7.24.3",
+    "@babel/preset-typescript": "^7.24.1",
    "@testing-library/jest-dom": "^5.17.0",
    "@tsconfig/svelte": "^3.0.0",
    "@types/jest": "^27.5.2",
    "@types/lodash-es": "^4.17.12",
-    "@types/node": "^16.18.74",
+    "@types/node": "^16.18.91",
    "@types/pako": "^2.0.3",
    "babel-jest": "^27.5.1",
    "builtin-modules": "^3.3.0",
@@ -37,12 +37,13 @@
    "svelte-preprocess": "^4.10.7",
    "tslib": "2.3.1",
    "typescript": "^4.9.5",
-    "vite": "^3.2.8"
+    "vite": "^3.2.10"
  },
  "dependencies": {
    "cancelable-promise": "^4.3.1",
-    "dexie": "^3.2.4",
+    "dexie": "^3.2.7",
    "lodash-es": "4.17.21",
+    "markdown-link-extractor": "^4.0.2",
    "minisearch": "^6.3.0",
    "pure-md5": "^0.1.14",
    "search-query-parser": "^1.6.0"
@@ -47,6 +47,7 @@
  let openInCurrentPaneKey: string
  let createInNewPaneKey: string
  let createInCurrentPaneKey: string
+  let openInNewLeafKey: string = getCtrlKeyLabel() + ' alt ↵'

  $: selectedNote = resultNotes[selectedIndex]
  $: searchQuery = searchQuery ?? previousQuery
@@ -101,6 +102,7 @@
    eventBus.on('vault', Action.ArrowDown, () => moveIndex(1))
    eventBus.on('vault', Action.PrevSearchHistory, prevSearchHistory)
    eventBus.on('vault', Action.NextSearchHistory, nextSearchHistory)
+    eventBus.on('vault', Action.OpenInNewLeaf, openNoteInNewLeaf)
    await NotesIndex.refreshIndex()
    await updateResultsDebounced()
  })
@@ -178,16 +180,26 @@
    modal.close()
  }

+  function openNoteInNewLeaf(): void {
+    if (!selectedNote) return
+    openSearchResult(selectedNote, true, true)
+    modal.close()
+  }
+
  function saveCurrentQuery() {
    if (searchQuery) {
      cacheManager.addToSearchHistory(searchQuery)
    }
  }

-  function openSearchResult(note: ResultNote, newPane = false) {
+  function openSearchResult(
+    note: ResultNote,
+    newPane = false,
+    newLeaf = false
+  ) {
    saveCurrentQuery()
    const offset = note.matches?.[0]?.offset ?? 0
-    openNote(note, offset, newPane)
+    openNote(note, offset, newPane, newLeaf)
  }

  async function onClickCreateNote(_e: MouseEvent) {
@@ -354,6 +366,11 @@
    <span>to open in a new pane</span>
  </div>

+  <div class="prompt-instruction">
+    <span class="prompt-instruction-command">{openInNewLeafKey}</span>
+    <span>to open in a new split</span>
+  </div>
+
  <div class="prompt-instruction">
    <span class="prompt-instruction-command">alt o</span>
    <span>to open in the background</span>
@@ -373,7 +390,7 @@
    <span>to insert a link</span>
  </div>
  <div class="prompt-instruction">
-    <span class="prompt-instruction-command">ctrl h</span>
+    <span class="prompt-instruction-command">ctrl g</span>
    <span>to toggle excerpts</span>
  </div>
  <div class="prompt-instruction">
@@ -1,8 +1,5 @@
 <script lang="ts">
-  import {
-    makeExcerpt,
-    highlightText,
-  } from 'src/tools/text-processing'
+  import { makeExcerpt, highlightText } from 'src/tools/text-processing'
  import type { ResultNote } from '../globals'
  import ResultItemContainer from './ResultItemContainer.svelte'
  import { cloneDeep } from 'lodash-es'
@@ -13,10 +10,6 @@
  export let selected = false

  $: cleanedContent = makeExcerpt(note?.content ?? '', offset)
-  $: matchesExcerpt = cloneDeep(note.matches).map(m => {
-    m.offset = m.offset - cleanedContent.offset
-    return m
-  })
 </script>

 <ResultItemContainer
@@ -26,6 +19,6 @@
  on:click
  on:auxclick>
  <div class="omnisearch-result__body">
-    {@html highlightText(cleanedContent.content, matchesExcerpt)}
+    {@html highlightText(cleanedContent, note.matches)}
  </div>
 </ResultItemContainer>
@@ -40,10 +40,6 @@
  $: reg = stringsToRegex(note.foundWords)
  $: matchesTitle = getMatches(title, reg)
  $: matchesNotePath = getMatches(notePath, reg)
-  $: matchesExcerpt = cloneDeep(note.matches).map(m => {
-    m.offset = m.offset - cleanedContent.offset
-    return m
-  })
  $: cleanedContent = makeExcerpt(note.content, note.matches[0]?.offset ?? -1)
  $: glyph = false //cacheManager.getLiveDocument(note.path)?.doesNotExist
  $: {
@@ -102,7 +98,7 @@
    <div style="display: flex; flex-direction: row;">
      {#if $showExcerpt}
        <div class="omnisearch-result__body">
-          {@html highlightText(cleanedContent.content, matchesExcerpt)}
+          {@html highlightText(cleanedContent, note.matches)}
        </div>
      {/if}

@@ -68,6 +68,7 @@ abstract class OmnisearchModal extends Modal {
    let openInNewPaneKey: Modifier[]
    let createInCurrentPaneKey: Modifier[]
    let createInNewPaneKey: Modifier[]
+    let openInNewLeafKey: Modifier[] = ['Mod', 'Alt']
    if (settings.openInNewPane) {
      openInCurrentPaneKey = ['Mod']
      openInNewPaneKey = []
@@ -86,6 +87,12 @@ abstract class OmnisearchModal extends Modal {
      eventBus.emit(Action.OpenInNewPane)
    })

+    // Open in a new leaf
+    this.scope.register(openInNewLeafKey, 'Enter', e => {
+      e.preventDefault()
+      eventBus.emit(Action.OpenInNewLeaf)
+    })
+
    // Insert link
    this.scope.register(['Alt'], 'Enter', e => {
      e.preventDefault()
@@ -136,7 +143,7 @@ abstract class OmnisearchModal extends Modal {
    })

    // Context
-    this.scope.register(['Ctrl'], 'H', _e => {
+    this.scope.register(['Ctrl'], 'G', _e => {
      eventBus.emit(EventNames.ToggleExcerpts)
    })
  }
@@ -43,6 +43,7 @@ export const enum Action {
  ArrowDown = 'arrow-down',
  PrevSearchHistory = 'prev-search-history',
  NextSearchHistory = 'next-search-history',
+  OpenInNewLeaf = 'open-in-new-leaf',
 }

 export type DocumentRef = { path: string; mtime: number }
@@ -190,6 +190,8 @@ export class Omnisearch {
        headings3: settings.weightH3,
        unmarkedTags: settings.weightUnmarkedTags,
      },
+      // The query is already tokenized, don't tokenize again
+      tokenize: text => [text],
    })

    logDebug('Found', results.length, 'results')
@@ -252,6 +254,40 @@ export class Omnisearch {
      })
    }

+    logDebug(
+      'searching with downranked folders',
+      settings.downrankedFoldersFilters
+    )
+    // downrank files that are in folders listed in the downrankedFoldersFilters
+    if (settings.downrankedFoldersFilters.length > 0) {
+      results.forEach(result => {
+        const path = result.id
+        let downrankingFolder = false
+        settings.downrankedFoldersFilters.forEach(filter => {
+          if (path.startsWith(filter)) {
+            // we don't want the filter to match the folder sources, e.g.
+            // it needs to match a whole folder name
+            if (path === filter || path.startsWith(filter + '/')) {
+              logDebug('searching with downranked folders in path: ', path)
+              downrankingFolder = true
+            }
+          }
+        })
+        if (downrankingFolder) {
+          result.score /= 10
+        }
+        const pathParts = path.split('/')
+        const pathPartsLength = pathParts.length
+        for (let i = 0; i < pathPartsLength; i++) {
+          const pathPart = pathParts[i]
+          if (settings.downrankedFoldersFilters.includes(pathPart)) {
+            result.score /= 10
+            break
+          }
+        }
+      })
+    }
+
    // Extract tags from the query
    const tags = query.getTags()

@@ -370,7 +406,7 @@ export class Omnisearch {

        // Tags, starting with #
        ...query.getTags(),
-      ].filter(w => w.length > 1 || /\p{Emoji}/u.test(w))
+      ]
      logDebug('Matching tokens:', foundWords)

      logDebug('Getting matches locations...')
@@ -6,6 +6,7 @@ import {
  getChsSegmenter,
 } from 'src/globals'
 import { logDebug, splitCamelCase, splitHyphens } from 'src/tools/utils'
+const markdownLinkExtractor = require('markdown-link-extractor')

 function tokenizeWords(text: string): string[] {
  return text.split(BRACKETS_AND_SPACE)
@@ -23,6 +24,7 @@ function tokenizeTokens(text: string): string[] {
 */
 export function tokenizeForIndexing(text: string): string[] {
  const words = tokenizeWords(text)
+  const urls: string[] = markdownLinkExtractor(text)

  let tokens = tokenizeTokens(text)

@@ -35,6 +37,11 @@ export function tokenizeForIndexing(text: string): string[] {
  // Add whole words (aka "not tokens")
  tokens = [...tokens, ...words]

+  // Add urls
+  if (urls.length) {
+    tokens = [...tokens, ...urls]
+  }
+
  const chsSegmenter = getChsSegmenter()
  if (chsSegmenter) {
    const chs = tokens.flatMap(word =>
@@ -56,7 +63,12 @@ export function tokenizeForIndexing(text: string): string[] {
 * @returns
 */
 export function tokenizeForSearch(text: string): QueryCombination {
-  const tokens = tokenizeTokens(text)
+
+  // Extract urls and remove them from the query
+  const urls: string[] = markdownLinkExtractor(text)
+  text = urls.reduce((acc, url) => acc.replace(url, ''), text)
+
+  const tokens = [...tokenizeTokens(text), ...urls].filter(Boolean)

  let chs: string[] = []
  const chsSegmenter = getChsSegmenter()
@@ -70,7 +82,7 @@ export function tokenizeForSearch(text: string): QueryCombination {
    combineWith: 'OR',
    queries: [
      { combineWith: 'AND', queries: tokens },
-      { combineWith: 'AND', queries: tokenizeWords(text) },
+      { combineWith: 'AND', queries: tokenizeWords(text).filter(Boolean) },
      { combineWith: 'AND', queries: tokens.flatMap(splitHyphens) },
      { combineWith: 'AND', queries: tokens.flatMap(splitCamelCase) },
      { combineWith: 'AND', queries: chs },
@@ -29,6 +29,8 @@ export interface OmnisearchSettings extends WeightingSettings {
  useCache: boolean
  /** Respect the "excluded files" Obsidian setting by downranking results ignored files */
  hideExcluded: boolean
+  /** downrank files in the given folders */
+  downrankedFoldersFilters: string[]
  /** Ignore diacritics when indexing files */
  ignoreDiacritics: boolean
  /** Extensions of plain text files to index, in addition to .md */
@@ -268,6 +270,24 @@ export class SettingsTab extends PluginSettingTab {
        })
      )

+    // Downranked files
+    new Setting(containerEl)
+      .setName('Folders to downrank in search results')
+      .setDesc(
+        `Folders to downrank in search results. Files in these folders will be downranked in results.  They will still be indexed for tags, unlike excluded files.  Folders should be comma delimited.`
+      )
+      .addText(component => {
+        component
+          .setValue(settings.downrankedFoldersFilters.join(','))
+          .setPlaceholder('Example: src,p2/dir')
+          .onChange(async v => {
+            let folders = v.split(',')
+            folders = folders.map(f => f.trim())
+            settings.downrankedFoldersFilters = folders
+            await saveSettings(this.plugin)
+          })
+      })
+
    // Split CamelCaseWords
    const camelCaseDesc = new DocumentFragment()
    camelCaseDesc.createSpan({}, span => {
@@ -621,6 +641,7 @@ export class SettingsTab extends PluginSettingTab {
 export const DEFAULT_SETTINGS: OmnisearchSettings = {
  useCache: true,
  hideExcluded: false,
+  downrankedFoldersFilters: [] as string[],
  ignoreDiacritics: true,
  indexedFileTypes: [] as string[],
  PDFIndexing: false,
@@ -44,7 +44,7 @@ function mapResults(results: ResultNote[]): ResultNoteApi[] {
          offset: match.offset,
        }
      }),
-      excerpt: excerpt.content,
+      excerpt: excerpt,
    }

    return res
@@ -5,7 +5,8 @@ import { stringsToRegex } from './text-processing'
 export async function openNote(
  item: ResultNote,
  offset = 0,
-  newPane = false
+  newPane = false,
+  newLeaf = false
 ): Promise<void> {
  // Check if the note is already open,
  // to avoid opening it twice if the first one is pinned
@@ -25,7 +26,7 @@ export async function openNote(

  if (!alreadyOpenAndPinned) {
    // Open the note normally
-    await app.workspace.openLinkText(item.path, '', newPane)
+    await app.workspace.openLinkText(item.path, '', newLeaf ? 'split' : newPane)
  }

  const view = app.workspace.getActiveViewOfType(MarkdownView)
@@ -6,7 +6,6 @@ import {
  regexStripQuotes,
  excerptAfter,
  excerptBefore,
-  SEPARATORS,
 } from 'src/globals'
 import { settings } from 'src/settings'
 import { removeDiacritics, warnDebug } from './utils'
@@ -26,17 +25,17 @@ export function highlightText(text: string, matches: SearchMatch[]): string {
  }
  try {
    // Text to highlight
-    const src = new RegExp(
+    const smartMatches = new RegExp(
      matches
        .map(
          // This regex will match the word (with \b word boundary)
          // \b doesn't detect non-alphabetical character's word boundary, so we need to escape it
-          matchItem =>
-            `\\b${escapeRegExp(matchItem.match)}\\b${
-              !/[a-zA-Z]/.test(matchItem.match)
-                ? `|${escapeRegExp(matchItem.match)}`
-                : ''
+          matchItem => {
+            const escaped = escapeRegExp(matchItem.match)
+            return `\\b${escaped}\\b${
+              !/[a-zA-Z]/.test(matchItem.match) ? `|${escaped}` : ''
            }`
+          }
        )
        .join('|'),
      'giu'
@@ -61,7 +60,17 @@ export function highlightText(text: string, matches: SearchMatch[]): string {
    }

    // Effectively highlight the text
-    return text.replace(src, replacer)
+    let newText = text.replace(smartMatches, replacer)
+
+    // If the text didn't change (= nothing to highlight), re-run the regex but just replace the matches without the word boundary
+    if (newText === text) {
+      const dumbMatches = new RegExp(
+        matches.map(matchItem => escapeRegExp(matchItem.match)).join('|'),
+        'giu'
+      )
+      newText = text.replace(dumbMatches, replacer)
+    }
+    return newText
  } catch (e) {
    console.error('Omnisearch - Error in highlightText()', e)
    return text
@@ -87,7 +96,8 @@ export function removeFrontMatter(text: string): string {
 }

 /**
- * Used to find excerpts in a note body, or select which words to highlight
+ * Converts a list of strings to a list of words, using the \b word boundary.
+ * Used to find excerpts in a note body, or select which words to highlight.
 */
 export function stringsToRegex(strings: string[]): RegExp {
  if (!strings.length) return /^$/g
@@ -95,19 +105,26 @@ export function stringsToRegex(strings: string[]): RegExp {
  // sort strings by decreasing length, so that longer strings are matched first
  strings.sort((a, b) => b.length - a.length)

-  const joined =`(${strings.map(s => escapeRegExp(s)).join('|')})`
+  const joined = `(${strings
+    .map(s => `\\b${escapeRegExp(s)}\\b|${escapeRegExp(s)}`)
+    .join('|')})`

  return new RegExp(`${joined}`, 'gui')
 }

+/**
+ * Returns an array of matches in the text, using the provided regex
+ * @param text
+ * @param reg
+ * @param query
+ */
 export function getMatches(
  text: string,
  reg: RegExp,
  query?: Query
 ): SearchMatch[] {
-  const separatorRegExp = new RegExp(SEPARATORS, 'gu')
  const originalText = text
-  text = text.toLowerCase().replace(separatorRegExp, ' ')
+  // text = text.toLowerCase().replace(new RegExp(SEPARATORS, 'gu'), ' ')
  if (settings.ignoreDiacritics) {
    text = removeDiacritics(text)
  }
@@ -132,24 +149,22 @@ export function getMatches(
  }

  // If the query is more than 1 token and can be found "as is" in the text, put this match first
-  if (query && (query.query.text.length > 1 || query.getExactTerms().length > 0)) {
+  if (
+    query &&
+    (query.query.text.length > 1 || query.getExactTerms().length > 0)
+  ) {
    const best = text.indexOf(query.getBestStringForExcerpt())
    if (best > -1 && matches.find(m => m.offset === best)) {
-      matches = matches.filter(m => m.offset !== best)
      matches.unshift({
        offset: best,
        match: query.getBestStringForExcerpt(),
      })
    }
  }
-
  return matches
 }

-export function makeExcerpt(
-  content: string,
-  offset: number
-): { content: string; offset: number } {
+export function makeExcerpt(content: string, offset: number): string {
  try {
    const pos = offset ?? -1
    const from = Math.max(0, pos - excerptBefore)
@@ -183,14 +198,14 @@ export function makeExcerpt(
      content = content.trim().replaceAll('\n', '<br>')
    }

-    return { content: content, offset: pos }
+    return content
  } catch (e) {
    new Notice(
      'Omnisearch - Error while creating excerpt, see developer console'
    )
    console.error(`Omnisearch - Error while creating excerpt`)
    console.error(e)
-    return { content: '', offset: -1 }
+    return ''
  }
 }