import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader, random_split from torchvision import datasets, transforms from PIL import Image, ImageDraw import os # Custom transform to crop a triangle from the lower right corner class CropLowerRightTriangle(object): """ Crops a rectangular area from the lower right corner of an image, then masks it to a triangle. The user can adjust the geometry of the triangle. """ def __init__(self, triangle_width, triangle_height): self.triangle_width = triangle_width self.triangle_height = triangle_height def __call__(self, img): img_width, img_height = img.size # Define the bounding box for the crop left = img_width - self.triangle_width top = img_height - self.triangle_height right = img_width bottom = img_height # Crop a rectangle from the lower right corner cropped_img = img.crop((left, top, right, bottom)) # Create a triangular mask. The mask is the same size as the cropped rectangle. mask = Image.new('L', (self.triangle_width, self.triangle_height), 0) # The polygon vertices define the lower-right triangle within the rectangle. # Vertices are (top-right, bottom-left, bottom-right). polygon = [(self.triangle_width, 0), (0, self.triangle_height), (self.triangle_width, self.triangle_height)] ImageDraw.Draw(mask).polygon(polygon, fill=255) # Create a black background image. background = Image.new("RGB", cropped_img.size, (0, 0, 0)) # Paste the original cropped image onto the background using the mask. # Where the mask is white, the image is pasted. Where black, it's not. background.paste(cropped_img, (0, 0), mask) return background # Define the CNN class GarageDoorCNN(nn.Module): def __init__(self, resize_dim=64): super(GarageDoorCNN, self).__init__() self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1) self.relu1 = nn.ReLU() self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1) self.relu2 = nn.ReLU() self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1) self.relu3 = nn.ReLU() self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2) # Calculate the size of the flattened features after convolutions and pooling final_dim = resize_dim // (2**3) # 3 pooling layers with stride 2 self.fc1_input_features = 64 * final_dim * final_dim self.fc1 = nn.Linear(self.fc1_input_features, 512) self.relu4 = nn.ReLU() self.fc2 = nn.Linear(512, 2) # 2 classes: open, closed def forward(self, x): x = self.pool1(self.relu1(self.conv1(x))) x = self.pool2(self.relu2(self.conv2(x))) x = self.pool3(self.relu3(self.conv3(x))) x = x.view(-1, self.fc1_input_features) # Flatten the tensor x = self.relu4(self.fc1(x)) x = self.fc2(x) return x def train_model(): # --- Hyperparameters and Configuration --- DATA_DIR = 'data/labelled' MODEL_SAVE_PATH = 'garage_door_cnn.pth' NUM_EPOCHS = 10 BATCH_SIZE = 32 LEARNING_RATE = 0.001 # For the custom crop transform. User can adjust these. TRIANGLE_CROP_WIDTH = 556 TRIANGLE_CROP_HEIGHT = 1184 RESIZE_DIM = 64 # Resize cropped image to this dimension (square) # --- Data Preparation --- # Define transforms data_transforms = transforms.Compose([ CropLowerRightTriangle(triangle_width=TRIANGLE_CROP_WIDTH, triangle_height=TRIANGLE_CROP_HEIGHT), transforms.Resize((RESIZE_DIM, RESIZE_DIM)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # Load dataset with ImageFolder full_dataset = datasets.ImageFolder(DATA_DIR, transform=data_transforms) # Split into training and validation sets train_size = int(0.8 * len(full_dataset)) val_size = len(full_dataset) - train_size train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size]) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False) # --- Model, Loss, Optimizer --- device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Using device: {device}") model = GarageDoorCNN(resize_dim=RESIZE_DIM).to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) # --- Training Loop --- print("Starting training...") for epoch in range(NUM_EPOCHS): model.train() running_loss = 0.0 for inputs, labels in train_loader: inputs, labels = inputs.to(device), labels.to(device) optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() * inputs.size(0) epoch_loss = running_loss / len(train_dataset) print(f"Epoch {epoch+1}/{NUM_EPOCHS}, Training Loss: {epoch_loss:.4f}") # --- Validation Loop --- model.eval() val_loss = 0.0 corrects = 0 with torch.no_grad(): for inputs, labels in val_loader: inputs, labels = inputs.to(device), labels.to(device) outputs = model(inputs) loss = criterion(outputs, labels) val_loss += loss.item() * inputs.size(0) _, preds = torch.max(outputs, 1) corrects += torch.sum(preds == labels.data) val_epoch_loss = val_loss / len(val_dataset) val_epoch_acc = corrects.double() / len(val_dataset) print(f"Validation Loss: {val_epoch_loss:.4f}, Accuracy: {val_epoch_acc:.4f}") # --- Save the trained model --- torch.save(model.state_dict(), MODEL_SAVE_PATH) print(f"Model saved to {MODEL_SAVE_PATH}") if __name__ == '__main__': # Check if data directory exists if not os.path.isdir('data/labelled/open') or not os.path.isdir('data/labelled/closed'): print("Error: Data directories 'data/open' and 'data/closed' not found.") print("Please create them and place your image snapshots inside.") else: train_model()