feat: add garage door CNN training script
Co-authored-by: aider (gemini/gemini-2.5-pro-preview-05-06) <aider@aider.chat>
This commit is contained in:
165
train.py
165
train.py
@@ -0,0 +1,165 @@
|
|||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.optim as optim
|
||||||
|
from torch.utils.data import DataLoader, random_split
|
||||||
|
from torchvision import datasets, transforms
|
||||||
|
from PIL import Image, ImageDraw
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Custom transform to crop a triangle from the lower right corner
|
||||||
|
class CropLowerRightTriangle(object):
|
||||||
|
"""
|
||||||
|
Crops a rectangular area from the lower right corner of an image,
|
||||||
|
then masks it to a triangle.
|
||||||
|
The user can adjust the geometry of the triangle.
|
||||||
|
"""
|
||||||
|
def __init__(self, triangle_width, triangle_height):
|
||||||
|
self.triangle_width = triangle_width
|
||||||
|
self.triangle_height = triangle_height
|
||||||
|
|
||||||
|
def __call__(self, img):
|
||||||
|
img_width, img_height = img.size
|
||||||
|
|
||||||
|
# Define the bounding box for the crop
|
||||||
|
left = img_width - self.triangle_width
|
||||||
|
top = img_height - self.triangle_height
|
||||||
|
right = img_width
|
||||||
|
bottom = img_height
|
||||||
|
|
||||||
|
# Crop a rectangle from the lower right corner
|
||||||
|
cropped_img = img.crop((left, top, right, bottom))
|
||||||
|
|
||||||
|
# Create a triangular mask. The mask is the same size as the cropped rectangle.
|
||||||
|
mask = Image.new('L', (self.triangle_width, self.triangle_height), 0)
|
||||||
|
# The polygon vertices define the lower-right triangle within the rectangle.
|
||||||
|
# Vertices are (top-right, bottom-left, bottom-right).
|
||||||
|
polygon = [(self.triangle_width, 0), (0, self.triangle_height), (self.triangle_width, self.triangle_height)]
|
||||||
|
ImageDraw.Draw(mask).polygon(polygon, fill=255)
|
||||||
|
|
||||||
|
# Create a black background image.
|
||||||
|
background = Image.new("RGB", cropped_img.size, (0, 0, 0))
|
||||||
|
|
||||||
|
# Paste the original cropped image onto the background using the mask.
|
||||||
|
# Where the mask is white, the image is pasted. Where black, it's not.
|
||||||
|
background.paste(cropped_img, (0, 0), mask)
|
||||||
|
|
||||||
|
return background
|
||||||
|
|
||||||
|
# Define the CNN
|
||||||
|
class GarageDoorCNN(nn.Module):
|
||||||
|
def __init__(self, resize_dim=64):
|
||||||
|
super(GarageDoorCNN, self).__init__()
|
||||||
|
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
|
||||||
|
self.relu1 = nn.ReLU()
|
||||||
|
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
|
||||||
|
self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
|
||||||
|
self.relu2 = nn.ReLU()
|
||||||
|
self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
|
||||||
|
self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
|
||||||
|
self.relu3 = nn.ReLU()
|
||||||
|
self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
|
||||||
|
|
||||||
|
# Calculate the size of the flattened features after convolutions and pooling
|
||||||
|
final_dim = resize_dim // (2**3) # 3 pooling layers with stride 2
|
||||||
|
self.fc1_input_features = 64 * final_dim * final_dim
|
||||||
|
|
||||||
|
self.fc1 = nn.Linear(self.fc1_input_features, 512)
|
||||||
|
self.relu4 = nn.ReLU()
|
||||||
|
self.fc2 = nn.Linear(512, 2) # 2 classes: open, closed
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.pool1(self.relu1(self.conv1(x)))
|
||||||
|
x = self.pool2(self.relu2(self.conv2(x)))
|
||||||
|
x = self.pool3(self.relu3(self.conv3(x)))
|
||||||
|
x = x.view(-1, self.fc1_input_features) # Flatten the tensor
|
||||||
|
x = self.relu4(self.fc1(x))
|
||||||
|
x = self.fc2(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
def train_model():
|
||||||
|
# --- Hyperparameters and Configuration ---
|
||||||
|
DATA_DIR = 'data'
|
||||||
|
MODEL_SAVE_PATH = 'garage_door_cnn.pth'
|
||||||
|
NUM_EPOCHS = 10
|
||||||
|
BATCH_SIZE = 32
|
||||||
|
LEARNING_RATE = 0.001
|
||||||
|
# For the custom crop transform. User can adjust these.
|
||||||
|
TRIANGLE_CROP_WIDTH = 400
|
||||||
|
TRIANGLE_CROP_HEIGHT = 400
|
||||||
|
RESIZE_DIM = 64 # Resize cropped image to this dimension (square)
|
||||||
|
|
||||||
|
# --- Data Preparation ---
|
||||||
|
# Define transforms
|
||||||
|
data_transforms = transforms.Compose([
|
||||||
|
CropLowerRightTriangle(triangle_width=TRIANGLE_CROP_WIDTH, triangle_height=TRIANGLE_CROP_HEIGHT),
|
||||||
|
transforms.Resize((RESIZE_DIM, RESIZE_DIM)),
|
||||||
|
transforms.ToTensor(),
|
||||||
|
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
||||||
|
])
|
||||||
|
|
||||||
|
# Load dataset with ImageFolder
|
||||||
|
full_dataset = datasets.ImageFolder(DATA_DIR, transform=data_transforms)
|
||||||
|
|
||||||
|
# Split into training and validation sets
|
||||||
|
train_size = int(0.8 * len(full_dataset))
|
||||||
|
val_size = len(full_dataset) - train_size
|
||||||
|
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])
|
||||||
|
|
||||||
|
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
|
||||||
|
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
|
||||||
|
|
||||||
|
# --- Model, Loss, Optimizer ---
|
||||||
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||||
|
print(f"Using device: {device}")
|
||||||
|
|
||||||
|
model = GarageDoorCNN(resize_dim=RESIZE_DIM).to(device)
|
||||||
|
criterion = nn.CrossEntropyLoss()
|
||||||
|
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
|
||||||
|
|
||||||
|
# --- Training Loop ---
|
||||||
|
print("Starting training...")
|
||||||
|
for epoch in range(NUM_EPOCHS):
|
||||||
|
model.train()
|
||||||
|
running_loss = 0.0
|
||||||
|
for inputs, labels in train_loader:
|
||||||
|
inputs, labels = inputs.to(device), labels.to(device)
|
||||||
|
|
||||||
|
optimizer.zero_grad()
|
||||||
|
outputs = model(inputs)
|
||||||
|
loss = criterion(outputs, labels)
|
||||||
|
loss.backward()
|
||||||
|
optimizer.step()
|
||||||
|
|
||||||
|
running_loss += loss.item() * inputs.size(0)
|
||||||
|
|
||||||
|
epoch_loss = running_loss / len(train_dataset)
|
||||||
|
print(f"Epoch {epoch+1}/{NUM_EPOCHS}, Training Loss: {epoch_loss:.4f}")
|
||||||
|
|
||||||
|
# --- Validation Loop ---
|
||||||
|
model.eval()
|
||||||
|
val_loss = 0.0
|
||||||
|
corrects = 0
|
||||||
|
with torch.no_grad():
|
||||||
|
for inputs, labels in val_loader:
|
||||||
|
inputs, labels = inputs.to(device), labels.to(device)
|
||||||
|
outputs = model(inputs)
|
||||||
|
loss = criterion(outputs, labels)
|
||||||
|
val_loss += loss.item() * inputs.size(0)
|
||||||
|
_, preds = torch.max(outputs, 1)
|
||||||
|
corrects += torch.sum(preds == labels.data)
|
||||||
|
|
||||||
|
val_epoch_loss = val_loss / len(val_dataset)
|
||||||
|
val_epoch_acc = corrects.double() / len(val_dataset)
|
||||||
|
print(f"Validation Loss: {val_epoch_loss:.4f}, Accuracy: {val_epoch_acc:.4f}")
|
||||||
|
|
||||||
|
# --- Save the trained model ---
|
||||||
|
torch.save(model.state_dict(), MODEL_SAVE_PATH)
|
||||||
|
print(f"Model saved to {MODEL_SAVE_PATH}")
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# Check if data directory exists
|
||||||
|
if not os.path.isdir('data/open') or not os.path.isdir('data/closed'):
|
||||||
|
print("Error: Data directories 'data/open' and 'data/closed' not found.")
|
||||||
|
print("Please create them and place your image snapshots inside.")
|
||||||
|
else:
|
||||||
|
train_model()
|
||||||
|
Reference in New Issue
Block a user