import json from urllib.parse import urlparse, parse_qs data = {} def main(): """ Opens a HAR archive file "data.har" and prints a list of files (URLs) found in the archive. """ har_file_path = "data.har" with open(har_file_path, 'r', encoding='utf-8') as f: har_data = json.load(f) entries = har_data.get('log', {}).get('entries', []) if not entries: print("No entries found in the HAR file.") return for entry in entries: response = entry.get('response', {}) content = response.get('content', {}) mime_type = content.get('mimeType', '') if not mime_type.startswith('image/'): continue request_obj = entry.get('request', {}) request_url_str = request_obj.get('url') if not request_url_str: # If the entry's main request URL is missing, skip. continue parsed_request_url = urlparse(request_url_str) query_params = parse_qs(parsed_request_url.query) # The key for our 'data' dict is the value of the 'url' query parameter # from the request's URL string. key_from_query_param = query_params.get('url', [None])[0] if not key_from_query_param: # If the 'url' query parameter is not found in the request_url_str, skip. continue # The value for our 'data' dict is the response's base64 encoded text. response_text = content.get('text') if response_text is not None: # Ensure response_text is not None (it can be an empty string for 0-byte files) data[key_from_query_param] = response_text print("\nProcessed data (truncated values):") if not data: print("No data was processed and stored.") else: for key, value in data.items(): if len(value) > 100: truncated_value = value[:100] + "..." else: truncated_value = value print(f"'{key}': '{truncated_value}'") if __name__ == "__main__": main()