pelican-obsidian/pelican/plugins/obsidian/obsidian.py

from pathlib import Path

from itertools import chain
import os
import re
from pelican import signals
from pelican.readers import MarkdownReader
from pelican.utils import pelican_open

from markdown import Markdown

ARTICLE_PATHS = {}
FILE_PATHS = {}

#link = r'\[\[\s*(?P<filename>[^|\]]+)(\|\s*(?P<linkname>.+))?\]\]'   # greedy, broken multiple same line
link = r'\[\[\s*(?P<filename>[^|\]]+?)(\|\s*(?P<linkname>.+?))?\]\]'  # non-greedy
file_re = re.compile(r'!' + link)
link_re = re.compile(link)


"""
# Test cases
be sure to check the [[The Useless Room|electronics room]] and [[Incubation Room]]
[[my link]]
[[ my work ]]
[[ my work | is finished ]]

![[ a file.jpg ]]
![[file.jpg]]
"""


def get_file_and_linkname(match):
    group = match.groupdict()
    filename = group['filename'].strip()
    linkname = group['linkname'] if group['linkname'] else filename
    linkname = linkname.strip()
    return filename, linkname


class ObsidianMarkdownReader(MarkdownReader):
    """
    Change the format of various links to the accepted case of pelican.
    """

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def replace_obsidian_links(self, text):
        def link_replacement(match):
            filename, linkname = get_file_and_linkname(match)
            path = ARTICLE_PATHS.get(filename)
            if path:
                link_structure = '[{linkname}]({{filename}}{path}{filename}.md)'.format(
                    linkname=linkname, path=path, filename=filename
                )
            else:
                link_structure = '{linkname}'.format(linkname=linkname)
            return link_structure

        def file_replacement(match):
            filename, linkname = get_file_and_linkname(match)
            path = FILE_PATHS.get(filename)
            if path:
                link_structure = '![{linkname}]({{static}}{path}{filename})'.format(
                    linkname=linkname, path=path, filename=filename
                )
            else:
                # don't show it at all since it will be broken
                link_structure = ''
            return link_structure

        text = file_re.sub(file_replacement, text)
        text = link_re.sub(link_replacement, text)
        return text

    def read(self, source_path):
        """Parse content and metadata of markdown files

        It also changes the links to the acceptable format for pelican
        """

        self._source_path = source_path
        self._md = Markdown(**self.settings['MARKDOWN'])

        with pelican_open(source_path) as text:
            text = self.replace_obsidian_links(text)
            content = self._md.convert(text)

        if hasattr(self._md, 'Meta'):
            metadata = self._parse_metadata(self._md.Meta)
        else:
            metadata = {}
        return content, metadata


def populate_files_and_articles(article_generator):
    global ARTICLE_PATHS
    global FILE_PATHS

    base_path = Path(article_generator.path)
    articles = base_path.glob('**/*.md')
    for article in articles:
        full_path, filename_w_ext = os.path.split(article)
        filename, ext = os.path.splitext(filename_w_ext)
        path = str(full_path).replace(str(base_path), '') + '/'
        ARTICLE_PATHS[filename] = path

    globs = [base_path.glob('**/*.{}'.format(ext)) for ext in ['png', 'jpg', 'svg', 'apkg', 'gif']]
    files = chain(*globs)

    for _file in files:
        full_path, filename_w_ext = os.path.split(_file)
        path = str(full_path).replace(str(base_path), '') + '/'
        FILE_PATHS[filename_w_ext] = path


def modify_reader(article_generator):
    populate_files_and_articles(article_generator)
    article_generator.readers.readers['md'] = ObsidianMarkdownReader(article_generator.settings)


def modify_metadata(article_generator, metadata):
    """
    Modify the tags so we can define the tags as we are used to in obsidian.
    """
    for tag in metadata.get('tags', []):
        if '#' in tag.name:
            tag.name = tag.name.replace('#', '')


def register():
    signals.article_generator_context.connect(modify_metadata)
    signals.article_generator_init.connect(modify_reader)
Updated code that actually works for links 2021-07-03 17:26:57 +00:00			`from pathlib import Path`

			`from itertools import chain`
			`import os`
			`import re`
Initial commit 2021-07-03 10:59:04 +00:00			`from pelican import signals`
Updated code that actually works for links 2021-07-03 17:26:57 +00:00			`from pelican.readers import MarkdownReader`
			`from pelican.utils import pelican_open`
Initial commit 2021-07-03 10:59:04 +00:00
Updated code that actually works for links 2021-07-03 17:26:57 +00:00			`from markdown import Markdown`
Use a name when installing with -e 2021-07-03 11:40:42 +00:00
Make naming a bit more clear 2022-04-22 21:33:13 +00:00			`ARTICLE_PATHS = {}`
			`FILE_PATHS = {}`
Updated code that actually works for links 2021-07-03 17:26:57 +00:00
Allow multiple links in one line 2024-11-11 17:28:15 +00:00			`#link = r'\[\[\s(?P<filename>[^\|\]]+)(\\|\s(?P<linkname>.+))?\]\]' # greedy, broken multiple same line`
			`link = r'\[\[\s(?P<filename>[^\|\]]+?)(\\|\s(?P<linkname>.+?))?\]\]' # non-greedy`
Updated code that actually works for links 2021-07-03 17:26:57 +00:00			`file_re = re.compile(r'!' + link)`
			`link_re = re.compile(link)`


			`"""`
			`# Test cases`
Allow multiple links in one line 2024-11-11 17:28:15 +00:00			`be sure to check the [[The Useless Room\|electronics room]] and [[Incubation Room]]`
Updated code that actually works for links 2021-07-03 17:26:57 +00:00			`[[my link]]`
			`[[ my work ]]`
			`[[ my work \| is finished ]]`
Initial commit 2021-07-03 10:59:04 +00:00
Updated code that actually works for links 2021-07-03 17:26:57 +00:00			`![[ a file.jpg ]]`
			`![[file.jpg]]`
			`"""`
Initial commit 2021-07-03 10:59:04 +00:00
Updated code that actually works for links 2021-07-03 17:26:57 +00:00
Support images as well 2021-07-03 18:41:06 +00:00			`def get_file_and_linkname(match):`
			`group = match.groupdict()`
			`filename = group['filename'].strip()`
			`linkname = group['linkname'] if group['linkname'] else filename`
			`linkname = linkname.strip()`
			`return filename, linkname`


Updated code that actually works for links 2021-07-03 17:26:57 +00:00			`class ObsidianMarkdownReader(MarkdownReader):`
Initial commit 2021-07-03 10:59:04 +00:00			`"""`
			`Change the format of various links to the accepted case of pelican.`
			`"""`
Updated code that actually works for links 2021-07-03 17:26:57 +00:00
			`def __init__(self, args, *kwargs):`
			`super().__init__(args, *kwargs)`

			`def replace_obsidian_links(self, text):`
Support images as well 2021-07-03 18:41:06 +00:00			`def link_replacement(match):`
			`filename, linkname = get_file_and_linkname(match)`
Make naming a bit more clear 2022-04-22 21:33:13 +00:00			`path = ARTICLE_PATHS.get(filename)`
Only link posts that actually exists 2021-07-03 18:11:08 +00:00			`if path:`
Include "/" in empty paths When articles were in the base directory, the string replacement: `str(full_path).replace(str(base_path) + '/'` would not match, causing the whole absolute path to be included which would break the link. This should now work if the article is in the base directory or in any subfolder. 2022-04-22 21:34:27 +00:00			`link_structure = '[{linkname}]({{filename}}{path}{filename}.md)'.format(`
Only link posts that actually exists 2021-07-03 18:11:08 +00:00			`linkname=linkname, path=path, filename=filename`
			`)`
			`else:`
			`link_structure = '{linkname}'.format(linkname=linkname)`
Updated code that actually works for links 2021-07-03 17:26:57 +00:00			`return link_structure`

Support images as well 2021-07-03 18:41:06 +00:00			`def file_replacement(match):`
			`filename, linkname = get_file_and_linkname(match)`
Make naming a bit more clear 2022-04-22 21:33:13 +00:00			`path = FILE_PATHS.get(filename)`
Support images as well 2021-07-03 18:41:06 +00:00			`if path:`
Include "/" in empty paths When articles were in the base directory, the string replacement: `str(full_path).replace(str(base_path) + '/'` would not match, causing the whole absolute path to be included which would break the link. This should now work if the article is in the base directory or in any subfolder. 2022-04-22 21:34:27 +00:00			`link_structure = '![{linkname}]({{static}}{path}{filename})'.format(`
Support images as well 2021-07-03 18:41:06 +00:00			`linkname=linkname, path=path, filename=filename`
			`)`
			`else:`
			`# don't show it at all since it will be broken`
			`link_structure = ''`
			`return link_structure`

			`text = file_re.sub(file_replacement, text)`
			`text = link_re.sub(link_replacement, text)`
Updated code that actually works for links 2021-07-03 17:26:57 +00:00			`return text`

			`def read(self, source_path):`
			`"""Parse content and metadata of markdown files`

			`It also changes the links to the acceptable format for pelican`
			`"""`

			`self._source_path = source_path`
			`self._md = Markdown(**self.settings['MARKDOWN'])`

			`with pelican_open(source_path) as text:`
			`text = self.replace_obsidian_links(text)`
			`content = self._md.convert(text)`

			`if hasattr(self._md, 'Meta'):`
			`metadata = self._parse_metadata(self._md.Meta)`
			`else:`
			`metadata = {}`
			`return content, metadata`


			`def populate_files_and_articles(article_generator):`
Make naming a bit more clear 2022-04-22 21:33:13 +00:00			`global ARTICLE_PATHS`
			`global FILE_PATHS`
Updated code that actually works for links 2021-07-03 17:26:57 +00:00
			`base_path = Path(article_generator.path)`
Regenerate paths on each change This is so we can detect new files without having to restart the Pelican server. Computers are fast enough anyway. 2022-04-22 21:31:08 +00:00			`articles = base_path.glob('*/.md')`
			`for article in articles:`
			`full_path, filename_w_ext = os.path.split(article)`
			`filename, ext = os.path.splitext(filename_w_ext)`
Include "/" in empty paths When articles were in the base directory, the string replacement: `str(full_path).replace(str(base_path) + '/'` would not match, causing the whole absolute path to be included which would break the link. This should now work if the article is in the base directory or in any subfolder. 2022-04-22 21:34:27 +00:00			`path = str(full_path).replace(str(base_path), '') + '/'`
Make naming a bit more clear 2022-04-22 21:33:13 +00:00			`ARTICLE_PATHS[filename] = path`
Updated code that actually works for links 2021-07-03 17:26:57 +00:00
Support images as well 2021-07-03 18:41:06 +00:00			`globs = [base_path.glob('*/.{}'.format(ext)) for ext in ['png', 'jpg', 'svg', 'apkg', 'gif']]`
			`files = chain(*globs)`

Regenerate paths on each change This is so we can detect new files without having to restart the Pelican server. Computers are fast enough anyway. 2022-04-22 21:31:08 +00:00			`for _file in files:`
			`full_path, filename_w_ext = os.path.split(_file)`
Include "/" in empty paths When articles were in the base directory, the string replacement: `str(full_path).replace(str(base_path) + '/'` would not match, causing the whole absolute path to be included which would break the link. This should now work if the article is in the base directory or in any subfolder. 2022-04-22 21:34:27 +00:00			`path = str(full_path).replace(str(base_path), '') + '/'`
Make naming a bit more clear 2022-04-22 21:33:13 +00:00			`FILE_PATHS[filename_w_ext] = path`
Updated code that actually works for links 2021-07-03 17:26:57 +00:00

			`def modify_reader(article_generator):`
			`populate_files_and_articles(article_generator)`
			`article_generator.readers.readers['md'] = ObsidianMarkdownReader(article_generator.settings)`
Initial commit 2021-07-03 10:59:04 +00:00

Remove # in tag names 2021-07-03 11:41:04 +00:00			`def modify_metadata(article_generator, metadata):`
Initial commit 2021-07-03 10:59:04 +00:00			`"""`
Remove # in tag names 2021-07-03 11:41:04 +00:00			`Modify the tags so we can define the tags as we are used to in obsidian.`
Initial commit 2021-07-03 10:59:04 +00:00			`"""`
Only link posts that actually exists 2021-07-03 18:11:08 +00:00			`for tag in metadata.get('tags', []):`
Remove # in tag names 2021-07-03 11:41:04 +00:00			`if '#' in tag.name:`
			`tag.name = tag.name.replace('#', '')`
Initial commit 2021-07-03 10:59:04 +00:00

			`def register():`
			`signals.article_generator_context.connect(modify_metadata)`
Updated code that actually works for links 2021-07-03 17:26:57 +00:00			`signals.article_generator_init.connect(modify_reader)`