refactor: extract model to module and add image sorting script

Co-authored-by: aider (gemini/gemini-2.5-pro-preview-05-06) <aider@aider.chat>
2025-07-31 16:22:46 -06:00
parent 587f38b9ce
commit 5f18d8bce2
3 changed files with 158 additions and 68 deletions
--- a/model.py
+++ b/model.py
@@ -0,0 +1,72 @@
+import torch.nn as nn
+from PIL import Image, ImageDraw
+
+# Custom transform to crop a triangle from the lower right corner
+class CropLowerRightTriangle(object):
+    """
+    Crops a rectangular area from the lower right corner of an image,
+    then masks it to a triangle.
+    The user can adjust the geometry of the triangle.
+    """
+    def __init__(self, triangle_width, triangle_height):
+        self.triangle_width = triangle_width
+        self.triangle_height = triangle_height
+
+    def __call__(self, img):
+        img_width, img_height = img.size
+
+        # Define the bounding box for the crop
+        left = img_width - self.triangle_width
+        top = img_height - self.triangle_height
+        right = img_width
+        bottom = img_height
+
+        # Crop a rectangle from the lower right corner
+        cropped_img = img.crop((left, top, right, bottom))
+
+        # Create a triangular mask. The mask is the same size as the cropped rectangle.
+        mask = Image.new('L', (self.triangle_width, self.triangle_height), 0)
+        # The polygon vertices define the lower-right triangle within the rectangle.
+        # Vertices are (top-right, bottom-left, bottom-right).
+        polygon = [(self.triangle_width, 0), (0, self.triangle_height), (self.triangle_width, self.triangle_height)]
+        ImageDraw.Draw(mask).polygon(polygon, fill=255)
+
+        # Create a black background image.
+        background = Image.new("RGB", cropped_img.size, (0, 0, 0))
+        
+        # Paste the original cropped image onto the background using the mask.
+        # Where the mask is white, the image is pasted. Where black, it's not.
+        background.paste(cropped_img, (0, 0), mask)
+        
+        return background
+
+# Define the CNN
+class GarageDoorCNN(nn.Module):
+    def __init__(self, resize_dim=64):
+        super(GarageDoorCNN, self).__init__()
+        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
+        self.relu1 = nn.ReLU()
+        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
+        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
+        self.relu2 = nn.ReLU()
+        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
+        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
+        self.relu3 = nn.ReLU()
+        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
+        
+        # Calculate the size of the flattened features after convolutions and pooling
+        final_dim = resize_dim // (2**3) # 3 pooling layers with stride 2
+        self.fc1_input_features = 64 * final_dim * final_dim
+        
+        self.fc1 = nn.Linear(self.fc1_input_features, 512)
+        self.relu4 = nn.ReLU()
+        self.fc2 = nn.Linear(512, 2) # 2 classes: open, closed
+
+    def forward(self, x):
+        x = self.pool1(self.relu1(self.conv1(x)))
+        x = self.pool2(self.relu2(self.conv2(x)))
+        x = self.pool3(self.relu3(self.conv3(x)))
+        x = x.view(-1, self.fc1_input_features) # Flatten the tensor
+        x = self.relu4(self.fc1(x))
+        x = self.fc2(x)
+        return x