56 lines
1.7 KiB
Python
56 lines
1.7 KiB
Python
import json
|
|
from urllib.parse import urlparse, parse_qs
|
|
|
|
data = {}
|
|
|
|
def main():
|
|
"""
|
|
Opens a HAR archive file "data.har" and prints a list of files (URLs)
|
|
found in the archive.
|
|
"""
|
|
har_file_path = "data.har"
|
|
with open(har_file_path, 'r', encoding='utf-8') as f:
|
|
har_data = json.load(f)
|
|
|
|
entries = har_data.get('log', {}).get('entries', [])
|
|
if not entries:
|
|
print("No entries found in the HAR file.")
|
|
return
|
|
|
|
for entry in entries:
|
|
response = entry.get('response', {})
|
|
content = response.get('content', {})
|
|
mime_type = content.get('mimeType', '')
|
|
|
|
if not mime_type.startswith('image/'):
|
|
continue
|
|
|
|
request_obj = entry.get('request', {})
|
|
request_url_str = request_obj.get('url')
|
|
|
|
if not request_url_str:
|
|
# If the entry's main request URL is missing, skip.
|
|
continue
|
|
|
|
parsed_request_url = urlparse(request_url_str)
|
|
query_params = parse_qs(parsed_request_url.query)
|
|
|
|
# The key for our 'data' dict is the value of the 'url' query parameter
|
|
# from the request's URL string.
|
|
key_from_query_param = query_params.get('url', [None])[0]
|
|
|
|
if not key_from_query_param:
|
|
# If the 'url' query parameter is not found in the request_url_str, skip.
|
|
continue
|
|
|
|
# The value for our 'data' dict is the response's base64 encoded text.
|
|
response_text = content.get('text')
|
|
|
|
if response_text is not None:
|
|
# Ensure response_text is not None (it can be an empty string for 0-byte files)
|
|
data[key_from_query_param] = response_text
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|