88 lines
3.1 KiB
Python
88 lines
3.1 KiB
Python
import os
|
|
import json
|
|
import glob
|
|
import shutil
|
|
from urllib.parse import urlparse, parse_qs, unquote
|
|
|
|
def main():
|
|
"""
|
|
Parses all .json files in data/, extracts image file names, and copies
|
|
them into data/open/ or data/closed/ based on the 'choice' field.
|
|
"""
|
|
data_dir = 'data'
|
|
open_dir = os.path.join(data_dir, 'labelled/open')
|
|
closed_dir = os.path.join(data_dir, 'labelled/closed')
|
|
|
|
# This is an assumption based on the path found in the 'd' parameter of the image URL.
|
|
# e.g., ...?d=.../data/hourly_photos/filename.jpg
|
|
source_images_dir = os.path.join(data_dir, 'hourly_photos')
|
|
|
|
os.makedirs(open_dir, exist_ok=True)
|
|
os.makedirs(closed_dir, exist_ok=True)
|
|
|
|
json_files = glob.glob(os.path.join(data_dir, '*.json'))
|
|
|
|
for json_file_path in json_files:
|
|
print(f"Processing {json_file_path}...")
|
|
try:
|
|
with open(json_file_path, 'r') as f:
|
|
records = json.load(f)
|
|
except (IOError, json.JSONDecodeError) as e:
|
|
print(f" Warning: Could not read or parse JSON from {json_file_path}. Error: {e}")
|
|
continue
|
|
|
|
if not isinstance(records, list):
|
|
print(f" Warning: JSON file {json_file_path} does not contain a list.")
|
|
continue
|
|
|
|
for record in records:
|
|
if not isinstance(record, dict) or 'image' not in record or 'choice' not in record:
|
|
print(f" Warning: Skipping invalid record in {json_file_path}: {record}")
|
|
continue
|
|
|
|
image_url = record['image']
|
|
choice = record['choice']
|
|
|
|
if not isinstance(choice, str):
|
|
print(f" Warning: 'choice' is not a string. Skipping record: {record}")
|
|
continue
|
|
|
|
try:
|
|
parsed_url = urlparse(image_url)
|
|
query_params = parse_qs(parsed_url.query)
|
|
|
|
if 'd' not in query_params:
|
|
print(f" Warning: No 'd' parameter in image URL: {image_url}")
|
|
continue
|
|
|
|
image_path_encoded = query_params['d'][0]
|
|
|
|
filename_encoded = os.path.basename(image_path_encoded)
|
|
filename = unquote(filename_encoded)
|
|
|
|
source_path = os.path.join(source_images_dir, filename)
|
|
|
|
if not os.path.isfile(source_path):
|
|
print(f" Warning: Source image not found or is not a file: {source_path}")
|
|
continue
|
|
|
|
dest_dir = None
|
|
if choice.lower() == 'open':
|
|
dest_dir = open_dir
|
|
elif choice.lower() == 'closed':
|
|
dest_dir = closed_dir
|
|
else:
|
|
print(f" Warning: Unknown choice '{choice}' for image {filename}. Skipping.")
|
|
continue
|
|
|
|
dest_path = os.path.join(dest_dir, filename)
|
|
|
|
print(f" Copying {source_path} to {dest_path}")
|
|
shutil.copy2(source_path, dest_path)
|
|
|
|
except Exception as e:
|
|
print(f" Error processing record {record}: {e}")
|
|
|
|
if __name__ == '__main__':
|
|
main()
|