import json from urllib.parse import urlparse, parse_qs data = {} names = {} def main(): """ Opens a HAR archive file "data.har" and prints a list of files (URLs) found in the archive. """ har_file_path = "data.har" with open(har_file_path, 'r', encoding='utf-8') as f: har_data = json.load(f) entries = har_data.get('log', {}).get('entries', []) if not entries: print("No entries found in the HAR file.") return for entry in entries: response = entry.get('response', {}) content = response.get('content', {}) mime_type = content.get('mimeType', '') if not mime_type.startswith('image/'): continue request_obj = entry.get('request', {}) request_url_str = request_obj.get('url') if not request_url_str: # If the entry's main request URL is missing, skip. continue parsed_request_url = urlparse(request_url_str) query_params = parse_qs(parsed_request_url.query) # The key for our 'data' dict is the value of the 'url' query parameter # from the request's URL string. key_from_query_param = query_params.get('url', [None])[0] if not key_from_query_param: # If the 'url' query parameter is not found in the request_url_str, skip. continue # The value for our 'data' dict is the response's base64 encoded text. response_text = content.get('text') if response_text is not None: # Ensure response_text is not None (it can be an empty string for 0-byte files) data[key_from_query_param] = response_text # Second loop to process JSON entries for entry in entries: response = entry.get('response', {}) content = response.get('content', {}) mime_type = content.get('mimeType', '') # Check if the mimeType indicates JSON if 'json' not in mime_type.lower(): # Make check case-insensitive and broader continue response_text = content.get('text') if not response_text: continue try: json_data = json.loads(response_text) except json.JSONDecodeError: # If JSON parsing fails, skip this entry continue media_container = json_data.get('MediaContainer', {}) metadata_array = media_container.get('Metadata', []) if not isinstance(metadata_array, list): continue # Skip if Metadata is not a list for metadata_element in metadata_array: if not isinstance(metadata_element, dict): continue # Skip if metadata_element is not a dict media_array = metadata_element.get('Media', []) if not isinstance(media_array, list): continue # Skip if Media is not a list for media_element in media_array: if not isinstance(media_element, dict): continue # Skip if media_element is not a dict part_array = media_element.get('Part', []) if not isinstance(part_array, list): continue # Skip if Part is not a list for part_element in part_array: if not isinstance(part_element, dict): continue # Skip if part_element is not a dict part_key = part_element.get('key') part_file = part_element.get('file') if part_key is not None: names[part_key] = part_file print("\nProcessed data (truncated values):") if not data: print("No data was processed and stored.") else: for key, value in data.items(): if len(value) > 100: truncated_value = value[:100] + "..." else: truncated_value = value print(f"'{key}': '{truncated_value}'") if __name__ == "__main__": main()