From 09fea2decdcdb760ac10fa4cd9916a20f22cadac Mon Sep 17 00:00:00 2001 From: "Tanner Collin (aider)" Date: Thu, 22 May 2025 19:18:27 -0600 Subject: [PATCH] feat: Save decoded data to files based on HAR paths --- har_parser.py | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/har_parser.py b/har_parser.py index 76326f1..3e20fe6 100644 --- a/har_parser.py +++ b/har_parser.py @@ -1,5 +1,7 @@ import json from urllib.parse import urlparse, parse_qs +import pathlib +import base64 data = {} names = {} @@ -131,6 +133,57 @@ def main(): truncated_value = str(value) # Convert non-string values to string print(f"'{key}': '{truncated_value}'") + # --- Save decoded files --- + print("\nSaving decoded files...") + output_root_dir = pathlib.Path('data') + files_saved_count = 0 + files_skipped_count = 0 + + for key_url, base64_content in data.items(): + # The key_url is what we stored from the 'url' query parameter. + # The request asks to "split the url by '?' and keep the first part." + # This key_url itself is unlikely to have a '?', but we follow the instruction. + name_lookup_key = key_url.split('?', 1)[0] + + original_file_path_str = names.get(name_lookup_key) + + if not original_file_path_str: + print(f"Warning: No file path found in 'names' for key '{name_lookup_key}' (from URL '{key_url}'). Skipping.") + files_skipped_count += 1 + continue + + if not base64_content: + print(f"Warning: No base64 content for key '{key_url}' (file path '{original_file_path_str}'). Skipping.") + files_skipped_count += 1 + continue + + try: + # Ensure the path from 'names' is treated as relative to the 'output_root_dir' + # by stripping any leading slashes. + relative_file_path = pathlib.Path(original_file_path_str.lstrip('/\\')) + target_file_path = output_root_dir / relative_file_path + + # Create parent directories + target_file_path.parent.mkdir(parents=True, exist_ok=True) + + # Decode base64 content and write to file + decoded_data = base64.b64decode(base64_content) + with open(target_file_path, 'wb') as f_out: + f_out.write(decoded_data) + print(f"Saved: {target_file_path}") + files_saved_count += 1 + except base64.binascii.Error as e: + print(f"Error decoding base64 for '{key_url}' (file path '{original_file_path_str}'): {e}. Skipping.") + files_skipped_count += 1 + except OSError as e: + print(f"Error writing file '{target_file_path}' for key '{key_url}': {e}. Skipping.") + files_skipped_count += 1 + except Exception as e: + print(f"An unexpected error occurred for key '{key_url}' (file path '{original_file_path_str}'): {e}. Skipping.") + files_skipped_count += 1 + + print(f"\nFile saving complete. Saved: {files_saved_count}, Skipped: {files_skipped_count}") + if __name__ == "__main__": main()