personal-site/swap_guids.py

import logging
import pprint
import uuid

from pelican import signals

log = logging.getLogger(__name__)

def modify_feed(context, feed):
    articles = {}

    for article in context['articles']:
        if article.title in articles:
            raise Exception(f"Duplicate article title found: {article.title}")
        articles[article.title] = article

    for item in feed.items:
        item_title = item['title']
        article = articles.get(item_title)
        if not article:
            raise Exception(f"Article not found for title: {item_title}")

        if not hasattr(article, 'guid') or not article.guid:
            log.info(f"Article '{article.title}' ({article.source_path}) is missing a guid. Generating and embedding one.")
            new_guid_str = uuid.uuid4().hex
            # The string to embed in the paragraph. Note the leading space.
            guid_text_to_embed = f" Guid: {new_guid_str}"

            source_path = article.source_path

            # Reconstruct metadata header from article.metadata
            # article.metadata keys are typically lowercase. Capitalize them for convention.
            metadata_header_lines = []
            for key, value in article.metadata.items():
                if isinstance(value, list):
                    # Convert list items to string and join with comma (e.g., for tags)
                    metadata_header_lines.append(f"{key.capitalize()}: {', '.join(map(str, value))}")
                else:
                    # Ensure value is string for concatenation
                    metadata_header_lines.append(f"{key.capitalize()}: {str(value)}")

            # article._content holds the raw Markdown content string (after metadata parsing)
            if not hasattr(article, '_content'):
                log.error(f"Article '{article.title}' does not have '_content' attribute. Cannot embed Guid into source file.")
                # This is a critical issue for the requested operation.
                raise Exception(f"Cannot find raw content for article '{article.title}' to embed Guid.")

            markdown_body = article._content

            # Split the markdown body into the first paragraph and the rest
            # Paragraphs in Markdown are separated by one or more blank lines (\n\n)
            parts = markdown_body.split('\n\n', 1)
            first_paragraph_text = parts[0]
            rest_of_body = parts[1] if len(parts) > 1 else ""

            # Append the Guid text to the end of the first paragraph
            # .rstrip() removes any trailing whitespace/newlines from the paragraph itself before appending.
            modified_first_paragraph = first_paragraph_text.rstrip() + guid_text_to_embed

            # Reconstruct the new markdown body
            new_markdown_body = modified_first_paragraph
            if rest_of_body: # Add back the rest of the body with the double newline separator
                new_markdown_body += '\n\n' + rest_of_body

            # Combine metadata and new body to form the complete new file content
            if metadata_header_lines:
                full_new_content = "\n".join(metadata_header_lines) + "\n\n" + new_markdown_body
            else: # No metadata, just the body
                full_new_content = new_markdown_body

            try:
                with open(source_path, 'w', encoding='utf-8') as f:
                    f.write(full_new_content)
                log.info(f"Successfully wrote updated content with embedded Guid to '{source_path}'.")
            except Exception as e:
                log.error(f"Failed to write updated content to '{source_path}': {e}")
                raise # Re-raise the exception to halt processing if file write fails

            # Set article.guid for the current Pelican run, so it's used for the feed item
            article.guid = new_guid_str
            log.debug(f"Set in-memory article.guid = '{new_guid_str}' for '{article.title}'.")

        item['unique_id'] = article.guid


def register():
    signals.feed_generated.connect(modify_feed)