plex-photo-scraper/har_parser.py

67 lines
2.0 KiB
Python

import json
from urllib.parse import urlparse, parse_qs
data = {}
def main():
"""
Opens a HAR archive file "data.har" and prints a list of files (URLs)
found in the archive.
"""
har_file_path = "data.har"
with open(har_file_path, 'r', encoding='utf-8') as f:
har_data = json.load(f)
entries = har_data.get('log', {}).get('entries', [])
if not entries:
print("No entries found in the HAR file.")
return
for entry in entries:
response = entry.get('response', {})
content = response.get('content', {})
mime_type = content.get('mimeType', '')
if not mime_type.startswith('image/'):
continue
request_obj = entry.get('request', {})
request_url_str = request_obj.get('url')
if not request_url_str:
# If the entry's main request URL is missing, skip.
continue
parsed_request_url = urlparse(request_url_str)
query_params = parse_qs(parsed_request_url.query)
# The key for our 'data' dict is the value of the 'url' query parameter
# from the request's URL string.
key_from_query_param = query_params.get('url', [None])[0]
if not key_from_query_param:
# If the 'url' query parameter is not found in the request_url_str, skip.
continue
# The value for our 'data' dict is the response's base64 encoded text.
response_text = content.get('text')
if response_text is not None:
# Ensure response_text is not None (it can be an empty string for 0-byte files)
data[key_from_query_param] = response_text
print("\nProcessed data (truncated values):")
if not data:
print("No data was processed and stored.")
else:
for key, value in data.items():
if len(value) > 100:
truncated_value = value[:100] + "..."
else:
truncated_value = value
print(f"'{key}': '{truncated_value}'")
if __name__ == "__main__":
main()