diff --git a/train.py b/train.py index e69de29..0e87226 100644 --- a/train.py +++ b/train.py @@ -0,0 +1,165 @@ +import torch +import torch.nn as nn +import torch.optim as optim +from torch.utils.data import DataLoader, random_split +from torchvision import datasets, transforms +from PIL import Image, ImageDraw +import os + +# Custom transform to crop a triangle from the lower right corner +class CropLowerRightTriangle(object): + """ + Crops a rectangular area from the lower right corner of an image, + then masks it to a triangle. + The user can adjust the geometry of the triangle. + """ + def __init__(self, triangle_width, triangle_height): + self.triangle_width = triangle_width + self.triangle_height = triangle_height + + def __call__(self, img): + img_width, img_height = img.size + + # Define the bounding box for the crop + left = img_width - self.triangle_width + top = img_height - self.triangle_height + right = img_width + bottom = img_height + + # Crop a rectangle from the lower right corner + cropped_img = img.crop((left, top, right, bottom)) + + # Create a triangular mask. The mask is the same size as the cropped rectangle. + mask = Image.new('L', (self.triangle_width, self.triangle_height), 0) + # The polygon vertices define the lower-right triangle within the rectangle. + # Vertices are (top-right, bottom-left, bottom-right). + polygon = [(self.triangle_width, 0), (0, self.triangle_height), (self.triangle_width, self.triangle_height)] + ImageDraw.Draw(mask).polygon(polygon, fill=255) + + # Create a black background image. + background = Image.new("RGB", cropped_img.size, (0, 0, 0)) + + # Paste the original cropped image onto the background using the mask. + # Where the mask is white, the image is pasted. Where black, it's not. + background.paste(cropped_img, (0, 0), mask) + + return background + +# Define the CNN +class GarageDoorCNN(nn.Module): + def __init__(self, resize_dim=64): + super(GarageDoorCNN, self).__init__() + self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1) + self.relu1 = nn.ReLU() + self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) + self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1) + self.relu2 = nn.ReLU() + self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) + self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1) + self.relu3 = nn.ReLU() + self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2) + + # Calculate the size of the flattened features after convolutions and pooling + final_dim = resize_dim // (2**3) # 3 pooling layers with stride 2 + self.fc1_input_features = 64 * final_dim * final_dim + + self.fc1 = nn.Linear(self.fc1_input_features, 512) + self.relu4 = nn.ReLU() + self.fc2 = nn.Linear(512, 2) # 2 classes: open, closed + + def forward(self, x): + x = self.pool1(self.relu1(self.conv1(x))) + x = self.pool2(self.relu2(self.conv2(x))) + x = self.pool3(self.relu3(self.conv3(x))) + x = x.view(-1, self.fc1_input_features) # Flatten the tensor + x = self.relu4(self.fc1(x)) + x = self.fc2(x) + return x + +def train_model(): + # --- Hyperparameters and Configuration --- + DATA_DIR = 'data' + MODEL_SAVE_PATH = 'garage_door_cnn.pth' + NUM_EPOCHS = 10 + BATCH_SIZE = 32 + LEARNING_RATE = 0.001 + # For the custom crop transform. User can adjust these. + TRIANGLE_CROP_WIDTH = 400 + TRIANGLE_CROP_HEIGHT = 400 + RESIZE_DIM = 64 # Resize cropped image to this dimension (square) + + # --- Data Preparation --- + # Define transforms + data_transforms = transforms.Compose([ + CropLowerRightTriangle(triangle_width=TRIANGLE_CROP_WIDTH, triangle_height=TRIANGLE_CROP_HEIGHT), + transforms.Resize((RESIZE_DIM, RESIZE_DIM)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + # Load dataset with ImageFolder + full_dataset = datasets.ImageFolder(DATA_DIR, transform=data_transforms) + + # Split into training and validation sets + train_size = int(0.8 * len(full_dataset)) + val_size = len(full_dataset) - train_size + train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size]) + + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True) + val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False) + + # --- Model, Loss, Optimizer --- + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print(f"Using device: {device}") + + model = GarageDoorCNN(resize_dim=RESIZE_DIM).to(device) + criterion = nn.CrossEntropyLoss() + optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) + + # --- Training Loop --- + print("Starting training...") + for epoch in range(NUM_EPOCHS): + model.train() + running_loss = 0.0 + for inputs, labels in train_loader: + inputs, labels = inputs.to(device), labels.to(device) + + optimizer.zero_grad() + outputs = model(inputs) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() * inputs.size(0) + + epoch_loss = running_loss / len(train_dataset) + print(f"Epoch {epoch+1}/{NUM_EPOCHS}, Training Loss: {epoch_loss:.4f}") + + # --- Validation Loop --- + model.eval() + val_loss = 0.0 + corrects = 0 + with torch.no_grad(): + for inputs, labels in val_loader: + inputs, labels = inputs.to(device), labels.to(device) + outputs = model(inputs) + loss = criterion(outputs, labels) + val_loss += loss.item() * inputs.size(0) + _, preds = torch.max(outputs, 1) + corrects += torch.sum(preds == labels.data) + + val_epoch_loss = val_loss / len(val_dataset) + val_epoch_acc = corrects.double() / len(val_dataset) + print(f"Validation Loss: {val_epoch_loss:.4f}, Accuracy: {val_epoch_acc:.4f}") + + # --- Save the trained model --- + torch.save(model.state_dict(), MODEL_SAVE_PATH) + print(f"Model saved to {MODEL_SAVE_PATH}") + +if __name__ == '__main__': + # Check if data directory exists + if not os.path.isdir('data/open') or not os.path.isdir('data/closed'): + print("Error: Data directories 'data/open' and 'data/closed' not found.") + print("Please create them and place your image snapshots inside.") + else: + train_model()