diff --git a/swap_guids.py b/swap_guids.py index d81c631..7b925fa 100644 --- a/swap_guids.py +++ b/swap_guids.py @@ -1,5 +1,6 @@ import logging import pprint +import uuid from pelican import signals @@ -20,7 +21,64 @@ def modify_feed(context, feed): raise Exception(f"Article not found for title: {item_title}") if not hasattr(article, 'guid') or not article.guid: - raise Exception(f"Article '{article.title}' is missing a guid.") + log.info(f"Article '{article.title}' ({article.source_path}) is missing a guid. Generating and embedding one.") + new_guid_str = uuid.uuid4().hex + # The string to embed in the paragraph. Note the leading space. + guid_text_to_embed = f" Guid: {new_guid_str}" + + source_path = article.source_path + + # Reconstruct metadata header from article.metadata + # article.metadata keys are typically lowercase. Capitalize them for convention. + metadata_header_lines = [] + for key, value in article.metadata.items(): + if isinstance(value, list): + # Convert list items to string and join with comma (e.g., for tags) + metadata_header_lines.append(f"{key.capitalize()}: {', '.join(map(str, value))}") + else: + # Ensure value is string for concatenation + metadata_header_lines.append(f"{key.capitalize()}: {str(value)}") + + # article._content holds the raw Markdown content string (after metadata parsing) + if not hasattr(article, '_content'): + log.error(f"Article '{article.title}' does not have '_content' attribute. Cannot embed Guid into source file.") + # This is a critical issue for the requested operation. + raise Exception(f"Cannot find raw content for article '{article.title}' to embed Guid.") + + markdown_body = article._content + + # Split the markdown body into the first paragraph and the rest + # Paragraphs in Markdown are separated by one or more blank lines (\n\n) + parts = markdown_body.split('\n\n', 1) + first_paragraph_text = parts[0] + rest_of_body = parts[1] if len(parts) > 1 else "" + + # Append the Guid text to the end of the first paragraph + # .rstrip() removes any trailing whitespace/newlines from the paragraph itself before appending. + modified_first_paragraph = first_paragraph_text.rstrip() + guid_text_to_embed + + # Reconstruct the new markdown body + new_markdown_body = modified_first_paragraph + if rest_of_body: # Add back the rest of the body with the double newline separator + new_markdown_body += '\n\n' + rest_of_body + + # Combine metadata and new body to form the complete new file content + if metadata_header_lines: + full_new_content = "\n".join(metadata_header_lines) + "\n\n" + new_markdown_body + else: # No metadata, just the body + full_new_content = new_markdown_body + + try: + with open(source_path, 'w', encoding='utf-8') as f: + f.write(full_new_content) + log.info(f"Successfully wrote updated content with embedded Guid to '{source_path}'.") + except Exception as e: + log.error(f"Failed to write updated content to '{source_path}': {e}") + raise # Re-raise the exception to halt processing if file write fails + + # Set article.guid for the current Pelican run, so it's used for the feed item + article.guid = new_guid_str + log.debug(f"Set in-memory article.guid = '{new_guid_str}' for '{article.title}'.") item['unique_id'] = article.guid