import os import json import glob import shutil from urllib.parse import urlparse, parse_qs, unquote def main(): """ Parses all .json files in data/, extracts image file names, and copies them into data/open/ or data/closed/ based on the 'choice' field. """ data_dir = 'data' open_dir = os.path.join(data_dir, 'labelled/open') closed_dir = os.path.join(data_dir, 'labelled/closed') # This is an assumption based on the path found in the 'd' parameter of the image URL. # e.g., ...?d=.../data/hourly_photos/filename.jpg source_images_dir = os.path.join(data_dir, 'hourly_photos') os.makedirs(open_dir, exist_ok=True) os.makedirs(closed_dir, exist_ok=True) json_files = glob.glob(os.path.join(data_dir, '*.json')) for json_file_path in json_files: print(f"Processing {json_file_path}...") try: with open(json_file_path, 'r') as f: records = json.load(f) except (IOError, json.JSONDecodeError) as e: print(f" Warning: Could not read or parse JSON from {json_file_path}. Error: {e}") continue if not isinstance(records, list): print(f" Warning: JSON file {json_file_path} does not contain a list.") continue for record in records: if not isinstance(record, dict) or 'image' not in record or 'choice' not in record: print(f" Warning: Skipping invalid record in {json_file_path}: {record}") continue image_url = record['image'] choice = record['choice'] if not isinstance(choice, str): print(f" Warning: 'choice' is not a string. Skipping record: {record}") continue try: parsed_url = urlparse(image_url) query_params = parse_qs(parsed_url.query) if 'd' not in query_params: print(f" Warning: No 'd' parameter in image URL: {image_url}") continue image_path_encoded = query_params['d'][0] filename_encoded = os.path.basename(image_path_encoded) filename = unquote(filename_encoded) source_path = os.path.join(source_images_dir, filename) if not os.path.isfile(source_path): print(f" Warning: Source image not found or is not a file: {source_path}") continue dest_dir = None if choice.lower() == 'open': dest_dir = open_dir elif choice.lower() == 'closed': dest_dir = closed_dir else: print(f" Warning: Unknown choice '{choice}' for image {filename}. Skipping.") continue dest_path = os.path.join(dest_dir, filename) print(f" Copying {source_path} to {dest_path}") shutil.copy2(source_path, dest_path) except Exception as e: print(f" Error processing record {record}: {e}") if __name__ == '__main__': main()