feat: add script to process JSON and sort images
This commit is contained in:
87
move.py
87
move.py
@@ -0,0 +1,87 @@
|
||||
import os
|
||||
import json
|
||||
import glob
|
||||
import shutil
|
||||
from urllib.parse import urlparse, parse_qs, unquote
|
||||
|
||||
def main():
|
||||
"""
|
||||
Parses all .json files in data/, extracts image file names, and copies
|
||||
them into data/open/ or data/closed/ based on the 'choice' field.
|
||||
"""
|
||||
data_dir = 'data'
|
||||
open_dir = os.path.join(data_dir, 'open')
|
||||
closed_dir = os.path.join(data_dir, 'closed')
|
||||
|
||||
# This is an assumption based on the path found in the 'd' parameter of the image URL.
|
||||
# e.g., ...?d=.../data/hourly_photos/filename.jpg
|
||||
source_images_dir = os.path.join(data_dir, 'hourly_photos')
|
||||
|
||||
os.makedirs(open_dir, exist_ok=True)
|
||||
os.makedirs(closed_dir, exist_ok=True)
|
||||
|
||||
json_files = glob.glob(os.path.join(data_dir, '*.json'))
|
||||
|
||||
for json_file_path in json_files:
|
||||
print(f"Processing {json_file_path}...")
|
||||
try:
|
||||
with open(json_file_path, 'r') as f:
|
||||
records = json.load(f)
|
||||
except (IOError, json.JSONDecodeError) as e:
|
||||
print(f" Warning: Could not read or parse JSON from {json_file_path}. Error: {e}")
|
||||
continue
|
||||
|
||||
if not isinstance(records, list):
|
||||
print(f" Warning: JSON file {json_file_path} does not contain a list.")
|
||||
continue
|
||||
|
||||
for record in records:
|
||||
if not isinstance(record, dict) or 'image' not in record or 'choice' not in record:
|
||||
print(f" Warning: Skipping invalid record in {json_file_path}: {record}")
|
||||
continue
|
||||
|
||||
image_url = record['image']
|
||||
choice = record['choice']
|
||||
|
||||
if not isinstance(choice, str):
|
||||
print(f" Warning: 'choice' is not a string. Skipping record: {record}")
|
||||
continue
|
||||
|
||||
try:
|
||||
parsed_url = urlparse(image_url)
|
||||
query_params = parse_qs(parsed_url.query)
|
||||
|
||||
if 'd' not in query_params:
|
||||
print(f" Warning: No 'd' parameter in image URL: {image_url}")
|
||||
continue
|
||||
|
||||
image_path_encoded = query_params['d'][0]
|
||||
|
||||
filename_encoded = os.path.basename(image_path_encoded)
|
||||
filename = unquote(filename_encoded)
|
||||
|
||||
source_path = os.path.join(source_images_dir, filename)
|
||||
|
||||
if not os.path.isfile(source_path):
|
||||
print(f" Warning: Source image not found or is not a file: {source_path}")
|
||||
continue
|
||||
|
||||
dest_dir = None
|
||||
if choice.lower() == 'open':
|
||||
dest_dir = open_dir
|
||||
elif choice.lower() == 'closed':
|
||||
dest_dir = closed_dir
|
||||
else:
|
||||
print(f" Warning: Unknown choice '{choice}' for image {filename}. Skipping.")
|
||||
continue
|
||||
|
||||
dest_path = os.path.join(dest_dir, filename)
|
||||
|
||||
print(f" Copying {source_path} to {dest_path}")
|
||||
shutil.copy2(source_path, dest_path)
|
||||
|
||||
except Exception as e:
|
||||
print(f" Error processing record {record}: {e}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
Reference in New Issue
Block a user