diff --git a/har_parser.py b/har_parser.py index e3603d1..2157094 100644 --- a/har_parser.py +++ b/har_parser.py @@ -1,4 +1,7 @@ import json +from urllib.parse import urlparse, parse_qs + +data = {} def main(): """ @@ -14,7 +17,6 @@ def main(): print("No entries found in the HAR file.") return - print("Files found in the HAR archive:") for entry in entries: response = entry.get('response', {}) content = response.get('content', {}) @@ -23,14 +25,30 @@ def main(): if not mime_type.startswith('image/'): continue - request = entry.get('request', {}) - url = request.get('url') - if url: - print(url) - else: - # This case might be less likely if we are filtering by mimeType, - # but kept for robustness if an image entry somehow lacks a URL. - print("Image entry found with no request URL.") + request_obj = entry.get('request', {}) + request_url_str = request_obj.get('url') + + if not request_url_str: + # If the entry's main request URL is missing, skip. + continue + + parsed_request_url = urlparse(request_url_str) + query_params = parse_qs(parsed_request_url.query) + + # The key for our 'data' dict is the value of the 'url' query parameter + # from the request's URL string. + key_from_query_param = query_params.get('url', [None])[0] + + if not key_from_query_param: + # If the 'url' query parameter is not found in the request_url_str, skip. + continue + + # The value for our 'data' dict is the response's base64 encoded text. + response_text = content.get('text') + + if response_text is not None: + # Ensure response_text is not None (it can be an empty string for 0-byte files) + data[key_from_query_param] = response_text if __name__ == "__main__":