1
votes

I'm new to pytorch and machine learning in general and I'm trying to create a simple convolutional neural net that classifies the MNIST handwritten digits. Unfortunately when I'm trying to train it, I get the following error:

ValueError: Expected input batch_size (288) to match target batch_size (64).

Here is the neural network code.

from torch import nn
from torch.nn.functional import relu, log_softmax

class MNIST_SimpleConv(nn.Module):
    def __init__(self):
        super(MNIST_SimpleConv, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=3,  out_channels=32, kernel_size=3, stride=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1)

        self.pool1 = nn.MaxPool2d(2, 2)

        self.dense1 = nn.Linear(4*4*64, 100)
        self.dense2 = nn.Linear(100, 10)

    def forward(self, x):
        x = relu(self.conv1(x))
        x = relu(self.conv2(x))
        x = self.pool1(x)

        x = x.view(-1, 4*4*64)

        x = relu(self.dense1(x))
        return log_softmax(self.dense2(x), dim=1)

And the training code is as follows:

from nets.conv import MNIST_SimpleConv
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torch.nn.functional import nll_loss
import torch.optim as optim
import torch
from torch import nn

MNIST_ROOT = "data/MNIST"

#prepare dataset
mnist_train_ds = datasets.ImageFolder(root=MNIST_ROOT+"/train", transform=transforms.Compose([
                           transforms.ToTensor()]))
mnist_test_ds  = datasets.ImageFolder(root=MNIST_ROOT+"/test", transform=transforms.Compose([
                           transforms.ToTensor()]))
mnist_train = DataLoader(mnist_train_ds, batch_size=64, shuffle=True, num_workers=6)
mnist_test  = DataLoader(mnist_test_ds, batch_size=64, shuffle=True, num_workers=6)

criterion = nn.CrossEntropyLoss()

def train(model, device, train_loader, optimizer, epoch):
    model.train()

    for batch_idx, (data, target) in enumerate(train_loader, 0):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


model = MNIST_SimpleConv().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

for epoch in range(1, 10):
    train(model, device, mnist_train , optimizer, epoch)

So far I have investigated how the dimensions of 'x' change while x is forwarded through the network.

Input: torch.Size([64, 3, 28, 28])

After x = relu(self.conv1(x)): torch.Size([64, 32, 26, 26])

After x = relu(self.conv2(x)): torch.Size([64, 64, 24, 24])

After x = self.pool1(x): torch.Size([64, 64, 12, 12])

After x = x.view(-1, 4*4*64) torch.Size([576, 1024])

After x = relu(self.dense1(x)) torch.Size([576, 100])

After x = log_softmax(self.dense2(x), dim=1) torch.Size([576, 10])

The error is probably caused by x = x.view(-1, 4*4*64) for some reason producing a tensor with the shape [576, 1024] instead of [64, 1024]. (If I understand this correctly the first dimension should be equal to the batch size which in my case is 64.)

What am I doing wrong?

1

1 Answers

0
votes

Passing a value of -1 to any dimension in view means that value of that particular dimension will be determined by other dimensions. for example:

x = torch.rand(1,10) # x.shape = [1,10]
x = x.view(-1, 5)  # x.shape = [2, 5]

In your case, if you want to merge all dimensions of output of pool1, then it should be something like this:

x = x.view(-1, 64*12,*12) # x.shape = [64, 9216]

Also, we have to update the input channels for self.dense1 in this case:

self.dense1 = nn.Linear(64*12*12, 100)

However, one thing we need to make sure is that output dimensions of self.pool1 are always going to be batch_size x 64 x 12 x 12, specifically the last two dimensions should stay 12 for the whole process. This can be made sure by fixing the input image dimensions across dataset.