diff --git a/model.py b/model.py new file mode 100644 index 0000000..3da6be8 --- /dev/null +++ b/model.py @@ -0,0 +1,72 @@ +import torch.nn as nn +from PIL import Image, ImageDraw + +# Custom transform to crop a triangle from the lower right corner +class CropLowerRightTriangle(object): + """ + Crops a rectangular area from the lower right corner of an image, + then masks it to a triangle. + The user can adjust the geometry of the triangle. + """ + def __init__(self, triangle_width, triangle_height): + self.triangle_width = triangle_width + self.triangle_height = triangle_height + + def __call__(self, img): + img_width, img_height = img.size + + # Define the bounding box for the crop + left = img_width - self.triangle_width + top = img_height - self.triangle_height + right = img_width + bottom = img_height + + # Crop a rectangle from the lower right corner + cropped_img = img.crop((left, top, right, bottom)) + + # Create a triangular mask. The mask is the same size as the cropped rectangle. + mask = Image.new('L', (self.triangle_width, self.triangle_height), 0) + # The polygon vertices define the lower-right triangle within the rectangle. + # Vertices are (top-right, bottom-left, bottom-right). + polygon = [(self.triangle_width, 0), (0, self.triangle_height), (self.triangle_width, self.triangle_height)] + ImageDraw.Draw(mask).polygon(polygon, fill=255) + + # Create a black background image. + background = Image.new("RGB", cropped_img.size, (0, 0, 0)) + + # Paste the original cropped image onto the background using the mask. + # Where the mask is white, the image is pasted. Where black, it's not. + background.paste(cropped_img, (0, 0), mask) + + return background + +# Define the CNN +class GarageDoorCNN(nn.Module): + def __init__(self, resize_dim=64): + super(GarageDoorCNN, self).__init__() + self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1) + self.relu1 = nn.ReLU() + self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) + self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1) + self.relu2 = nn.ReLU() + self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) + self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1) + self.relu3 = nn.ReLU() + self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2) + + # Calculate the size of the flattened features after convolutions and pooling + final_dim = resize_dim // (2**3) # 3 pooling layers with stride 2 + self.fc1_input_features = 64 * final_dim * final_dim + + self.fc1 = nn.Linear(self.fc1_input_features, 512) + self.relu4 = nn.ReLU() + self.fc2 = nn.Linear(512, 2) # 2 classes: open, closed + + def forward(self, x): + x = self.pool1(self.relu1(self.conv1(x))) + x = self.pool2(self.relu2(self.conv2(x))) + x = self.pool3(self.relu3(self.conv3(x))) + x = x.view(-1, self.fc1_input_features) # Flatten the tensor + x = self.relu4(self.fc1(x)) + x = self.fc2(x) + return x diff --git a/sort.py b/sort.py index e69de29..c42ba4c 100644 --- a/sort.py +++ b/sort.py @@ -0,0 +1,85 @@ +import torch +import torch.nn.functional as F +from torchvision import transforms +from PIL import Image +import os +import shutil + +from model import CropLowerRightTriangle, GarageDoorCNN + +def sort_images(): + # --- Configuration --- + MODEL_PATH = 'garage_door_cnn.pth' + SOURCE_DIR = 'data/hourly_photos/' + DEST_DIR = 'data/sorted/open/' + + # These must match the parameters used during training + TRIANGLE_CROP_WIDTH = 556 + TRIANGLE_CROP_HEIGHT = 1184 + RESIZE_DIM = 64 + + # The classes are sorted alphabetically by ImageFolder: ['closed', 'open'] + CLASS_NAMES = ['closed', 'open'] + TARGET_CLASS = 'open' + TARGET_CLASS_IDX = CLASS_NAMES.index(TARGET_CLASS) + + # --- Setup --- + # Create destination directory if it doesn't exist + os.makedirs(DEST_DIR, exist_ok=True) + + # Set up device + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print(f"Using device: {device}") + + # Load model + model = GarageDoorCNN(resize_dim=RESIZE_DIM) + model.load_state_dict(torch.load(MODEL_PATH, map_location=device)) + model.to(device) + model.eval() + + # Define image transforms + data_transform = transforms.Compose([ + CropLowerRightTriangle(triangle_width=TRIANGLE_CROP_WIDTH, triangle_height=TRIANGLE_CROP_HEIGHT), + transforms.Resize((RESIZE_DIM, RESIZE_DIM)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + # --- Process Images --- + print(f"Scanning images in {SOURCE_DIR}...") + with torch.no_grad(): + for filename in os.listdir(SOURCE_DIR): + file_path = os.path.join(SOURCE_DIR, filename) + if os.path.isfile(file_path): + try: + image = Image.open(file_path).convert('RGB') + + # Apply transformations + input_tensor = data_transform(image) + input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model + input_batch = input_batch.to(device) + + # Get model output + output = model(input_batch) + + # Get probabilities and prediction + probabilities = F.softmax(output, dim=1) + confidence, pred_idx = torch.max(probabilities, 1) + + if pred_idx.item() == TARGET_CLASS_IDX: + print(f"Found 'open' image: {file_path} with confidence: {confidence.item():.4f}") + # Copy file + shutil.copy(file_path, os.path.join(DEST_DIR, filename)) + + except Exception as e: + print(f"Could not process file {file_path}: {e}") + + print("Sorting complete.") + +if __name__ == '__main__': + if not os.path.exists('garage_door_cnn.pth'): + print("Error: Model file 'garage_door_cnn.pth' not found. Please run train.py first.") + elif not os.path.isdir('data/hourly_photos'): + print("Error: Source directory 'data/hourly_photos' not found.") + else: + sort_images() diff --git a/train.py b/train.py index 4856f1f..053644f 100644 --- a/train.py +++ b/train.py @@ -6,75 +6,8 @@ from torchvision import datasets, transforms from PIL import Image, ImageDraw import os -# Custom transform to crop a triangle from the lower right corner -class CropLowerRightTriangle(object): - """ - Crops a rectangular area from the lower right corner of an image, - then masks it to a triangle. - The user can adjust the geometry of the triangle. - """ - def __init__(self, triangle_width, triangle_height): - self.triangle_width = triangle_width - self.triangle_height = triangle_height +from model import CropLowerRightTriangle, GarageDoorCNN - def __call__(self, img): - img_width, img_height = img.size - - # Define the bounding box for the crop - left = img_width - self.triangle_width - top = img_height - self.triangle_height - right = img_width - bottom = img_height - - # Crop a rectangle from the lower right corner - cropped_img = img.crop((left, top, right, bottom)) - - # Create a triangular mask. The mask is the same size as the cropped rectangle. - mask = Image.new('L', (self.triangle_width, self.triangle_height), 0) - # The polygon vertices define the lower-right triangle within the rectangle. - # Vertices are (top-right, bottom-left, bottom-right). - polygon = [(self.triangle_width, 0), (0, self.triangle_height), (self.triangle_width, self.triangle_height)] - ImageDraw.Draw(mask).polygon(polygon, fill=255) - - # Create a black background image. - background = Image.new("RGB", cropped_img.size, (0, 0, 0)) - - # Paste the original cropped image onto the background using the mask. - # Where the mask is white, the image is pasted. Where black, it's not. - background.paste(cropped_img, (0, 0), mask) - - return background - -# Define the CNN -class GarageDoorCNN(nn.Module): - def __init__(self, resize_dim=64): - super(GarageDoorCNN, self).__init__() - self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1) - self.relu1 = nn.ReLU() - self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) - self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1) - self.relu2 = nn.ReLU() - self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) - self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1) - self.relu3 = nn.ReLU() - self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2) - - # Calculate the size of the flattened features after convolutions and pooling - final_dim = resize_dim // (2**3) # 3 pooling layers with stride 2 - self.fc1_input_features = 64 * final_dim * final_dim - - self.fc1 = nn.Linear(self.fc1_input_features, 512) - self.relu4 = nn.ReLU() - self.fc2 = nn.Linear(512, 2) # 2 classes: open, closed - - def forward(self, x): - x = self.pool1(self.relu1(self.conv1(x))) - x = self.pool2(self.relu2(self.conv2(x))) - x = self.pool3(self.relu3(self.conv3(x))) - x = x.view(-1, self.fc1_input_features) # Flatten the tensor - x = self.relu4(self.fc1(x)) - x = self.fc2(x) - return x def train_model(): # --- Hyperparameters and Configuration ---