import logging import pprint import uuid from pelican import signals log = logging.getLogger(__name__) def modify_feed(context, feed): articles = {} for article in context['articles']: if article.title in articles: raise Exception(f"Duplicate article title found: {article.title}") articles[article.title] = article for item in feed.items: item_title = item['title'] article = articles.get(item_title) if not article: raise Exception(f"Article not found for title: {item_title}") if not hasattr(article, 'guid') or not article.guid: log.info(f"Article '{article.title}' ({article.source_path}) is missing a guid. Generating and embedding one.") new_guid_str = uuid.uuid4().hex # The string to embed in the paragraph. Note the leading space. guid_text_to_embed = f" Guid: {new_guid_str}" source_path = article.source_path # Ensure article object has the _content attribute if not hasattr(article, '_content'): log.error(f"Article '{article.title}' does not have '_content' attribute. Cannot embed Guid into source file.") raise Exception(f"Cannot find raw content for article '{article.title}' to embed Guid.") # Read the original file content. # Python's open() in text mode uses universal newlines by default, converting \r\n and \r to \n. # Pelican's MarkdownReader also provides article._content with \n newlines. try: with open(source_path, 'r', encoding='utf-8') as f: original_file_content_universal_newlines = f.read() except Exception as e: log.error(f"Failed to read original content from '{source_path}': {e}") raise # Sanity check: the article's body content should be a suffix of the read file content. if not original_file_content_universal_newlines.endswith(article._content): log.error(f"Content mismatch for '{article.title}' in '{source_path}'. " "The article's parsed content (article._content) does not match the " "ending of the raw file (read with universal newlines). This is unexpected " "and may indicate issues with file parsing or concurrent modifications.") # For debugging, one might log tails of both strings here. # log.debug(f"Tail of original file content: '{original_file_content_universal_newlines[-200:]}'") # log.debug(f"Tail of article._content: '{article._content[-200:]}'") raise Exception(f"Content boundary determination error for article '{article.title}'.") # Determine the metadata part by subtracting the length of the content body. metadata_section_length = len(original_file_content_universal_newlines) - len(article._content) metadata_part_from_file = original_file_content_universal_newlines[:metadata_section_length] # current_body_content is what Pelican parsed as the article's body. current_body_content = article._content # Split this body content to find its first paragraph. # Paragraphs in Markdown are separated by one or more blank lines (\n\n). body_parts = current_body_content.split('\n\n', 1) first_paragraph_of_body = body_parts[0] rest_of_body_content = body_parts[1] if len(body_parts) > 1 else "" # Append the Guid text to the end of the first paragraph of the body. # .rstrip() removes any trailing whitespace/newlines from the paragraph itself before appending. modified_first_paragraph_of_body = first_paragraph_of_body.rstrip() + guid_text_to_embed # Reconstruct the new body content with the embedded Guid. new_body_content_with_guid = modified_first_paragraph_of_body if rest_of_body_content: # Add back the rest of the body if it existed. new_body_content_with_guid += '\n\n' + rest_of_body_content # Construct the full new file content by combining the original metadata part and the new body. # This preserves the original metadata block verbatim (including comments, formatting, and original newline characters if any within it, # as metadata_part_from_file is a direct slice from original_file_content_universal_newlines which has \n newlines). full_new_content = metadata_part_from_file + new_body_content_with_guid try: with open(source_path, 'w', encoding='utf-8') as f: f.write(full_new_content) log.info(f"Successfully wrote updated content with embedded Guid to '{source_path}'.") except Exception as e: log.error(f"Failed to write updated content to '{source_path}': {e}") raise # Re-raise the exception to halt processing if file write fails # Set article.guid for the current Pelican run, so it's used for the feed item article.guid = new_guid_str log.debug(f"Set in-memory article.guid = '{new_guid_str}' for '{article.title}'.") item['unique_id'] = article.guid def register(): signals.feed_generated.connect(modify_feed)