I am reimplementing the pytorch tutorial of the Pytorch cifar10 tutorial
But I want to use a different model. I don't want to use fully connected (in pytorch linear) layers and I want to add Batch Normalization.
My model looks like this:
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.pool = nn.MaxPool2d(2,2)
self.conv1 = nn.Conv2d(in_channels=3,out_channels=16,kernel_size=3, padding=1, padding_mode='zeros')
self.conv1_bn = nn.BatchNorm2d(16)
self.conv2 = nn.Conv2d(in_channels=16,out_channels=32,kernel_size=3, padding=1, padding_mode='zeros')
self.conv2_bn = nn.BatchNorm2d(32)
self.conv3 = nn.Conv2d(in_channels=32,out_channels=64,kernel_size=3, padding=1, padding_mode='zeros')
self.conv3_bn = nn.BatchNorm2d(64)
self.conv4 = nn.Conv2d(64,64,3, padding=1, padding_mode='zeros')
self.conv4_bn = nn.BatchNorm2d(64)
self.conv5 = nn.Conv2d(64,10,2,padding=0)
def forward(self, x): # x has shape (4,32,32,3)
x = self.pool(F.relu(self.conv1_bn(self.conv1(x)))) # feature map resolution is now 16*16
x = self.pool(F.relu(self.conv2_bn(self.conv2(x)))) # resolution now 8*8
x = self.pool(F.relu(self.conv3_bn(self.conv3(x)))) #resolution now 4*4
x = self.pool(F.relu(self.conv4_bn(self.conv4(x)))) # now 2*2
x = F.relu(self.conv5(x)) # The output shape is (batchsize, 1,1,10)
return x
Batchsize is 4 and image resolution is 32*32 so inputsize is 4,32,32,3 The convolution layers don't reduce the resolution size of the feature maps because of the padding. The resolution is halved with the maxpool layers. Conv5 gets an input with shape 4,2,2,64. Now I use filtersize 2 and no padding to get a resolution of 1*1. I have 10 classes so I use 10 filters. Each of the last filters should predict it's corresponding class. The shape of the output is now (4,1,1,10). But when I try to train this model the loss doesn't decrease. The amount of parameters of the tutorial model and my net are about the same at ~62k.
Here is the rest of the code. This is identical to the code in the tutorial but I have to reshape the output so it fits. (output in the tutorial was (4,10) and mine is 4,1,1,10)
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
shuffle=True, num_workers=2)
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
shuffle=False, num_workers=2)
net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net.to(device)
for epoch in range(2): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data[0].to(device), data[1].to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs) # I get the values as 4,1,1,10
outputs_reshaped = outputs.reshape(4,10)
loss = criterion(outputs_reshaped, labels)
loss.backward()
optimizer.step()
running_loss +=loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 2000))
running_loss = 0.0
My loss looks like this.
[1, 2000] loss: 2.348
[1, 2000] loss: 2.477
[1, 4000] loss: 2.482
[1, 6000] loss: 2.468
[1, 8000] loss: 2.471
[1, 10000] loss: 2.482
[1, 12000] loss: 2.485
[2, 2000] loss: 2.486
[2, 4000] loss: 2.470
[2, 6000] loss: 2.479
[2, 8000] loss: 2.481
[2, 10000] loss: 2.474
[2, 12000] loss: 2.470
My model doesn't seem to learn anything. Anyone an idea why this might happen?