feat: Handle missing article GUIDs by generating and embedding one in source file
This commit is contained in:
parent
02f2346c93
commit
86f3a08bbc
|
@ -1,5 +1,6 @@
|
||||||
import logging
|
import logging
|
||||||
import pprint
|
import pprint
|
||||||
|
import uuid
|
||||||
|
|
||||||
from pelican import signals
|
from pelican import signals
|
||||||
|
|
||||||
|
@ -20,7 +21,64 @@ def modify_feed(context, feed):
|
||||||
raise Exception(f"Article not found for title: {item_title}")
|
raise Exception(f"Article not found for title: {item_title}")
|
||||||
|
|
||||||
if not hasattr(article, 'guid') or not article.guid:
|
if not hasattr(article, 'guid') or not article.guid:
|
||||||
raise Exception(f"Article '{article.title}' is missing a guid.")
|
log.info(f"Article '{article.title}' ({article.source_path}) is missing a guid. Generating and embedding one.")
|
||||||
|
new_guid_str = uuid.uuid4().hex
|
||||||
|
# The string to embed in the paragraph. Note the leading space.
|
||||||
|
guid_text_to_embed = f" Guid: {new_guid_str}"
|
||||||
|
|
||||||
|
source_path = article.source_path
|
||||||
|
|
||||||
|
# Reconstruct metadata header from article.metadata
|
||||||
|
# article.metadata keys are typically lowercase. Capitalize them for convention.
|
||||||
|
metadata_header_lines = []
|
||||||
|
for key, value in article.metadata.items():
|
||||||
|
if isinstance(value, list):
|
||||||
|
# Convert list items to string and join with comma (e.g., for tags)
|
||||||
|
metadata_header_lines.append(f"{key.capitalize()}: {', '.join(map(str, value))}")
|
||||||
|
else:
|
||||||
|
# Ensure value is string for concatenation
|
||||||
|
metadata_header_lines.append(f"{key.capitalize()}: {str(value)}")
|
||||||
|
|
||||||
|
# article._content holds the raw Markdown content string (after metadata parsing)
|
||||||
|
if not hasattr(article, '_content'):
|
||||||
|
log.error(f"Article '{article.title}' does not have '_content' attribute. Cannot embed Guid into source file.")
|
||||||
|
# This is a critical issue for the requested operation.
|
||||||
|
raise Exception(f"Cannot find raw content for article '{article.title}' to embed Guid.")
|
||||||
|
|
||||||
|
markdown_body = article._content
|
||||||
|
|
||||||
|
# Split the markdown body into the first paragraph and the rest
|
||||||
|
# Paragraphs in Markdown are separated by one or more blank lines (\n\n)
|
||||||
|
parts = markdown_body.split('\n\n', 1)
|
||||||
|
first_paragraph_text = parts[0]
|
||||||
|
rest_of_body = parts[1] if len(parts) > 1 else ""
|
||||||
|
|
||||||
|
# Append the Guid text to the end of the first paragraph
|
||||||
|
# .rstrip() removes any trailing whitespace/newlines from the paragraph itself before appending.
|
||||||
|
modified_first_paragraph = first_paragraph_text.rstrip() + guid_text_to_embed
|
||||||
|
|
||||||
|
# Reconstruct the new markdown body
|
||||||
|
new_markdown_body = modified_first_paragraph
|
||||||
|
if rest_of_body: # Add back the rest of the body with the double newline separator
|
||||||
|
new_markdown_body += '\n\n' + rest_of_body
|
||||||
|
|
||||||
|
# Combine metadata and new body to form the complete new file content
|
||||||
|
if metadata_header_lines:
|
||||||
|
full_new_content = "\n".join(metadata_header_lines) + "\n\n" + new_markdown_body
|
||||||
|
else: # No metadata, just the body
|
||||||
|
full_new_content = new_markdown_body
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(source_path, 'w', encoding='utf-8') as f:
|
||||||
|
f.write(full_new_content)
|
||||||
|
log.info(f"Successfully wrote updated content with embedded Guid to '{source_path}'.")
|
||||||
|
except Exception as e:
|
||||||
|
log.error(f"Failed to write updated content to '{source_path}': {e}")
|
||||||
|
raise # Re-raise the exception to halt processing if file write fails
|
||||||
|
|
||||||
|
# Set article.guid for the current Pelican run, so it's used for the feed item
|
||||||
|
article.guid = new_guid_str
|
||||||
|
log.debug(f"Set in-memory article.guid = '{new_guid_str}' for '{article.title}'.")
|
||||||
|
|
||||||
item['unique_id'] = article.guid
|
item['unique_id'] = article.guid
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user