1
votes

I moved to PyTorch from Keras. I'm very new to the whole moving to CUDA thing. I've spent hours, surfing the web and haven't been able to find anything? The fix is probably something a line or two. I'd appreciate it if someone knows how to solve this issue?

Here' my code, First I define my u-net model as a class of nn.Module like the following code:

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils


class unet(nn.Module):
def __init__(self):
    super(unet, self).__init__()
    self.conv1 = nn.Conv3d(1, 32, 3, padding=1)
    self.conv1_1 = nn.Conv3d(32, 32, 3, padding=1)
    self.conv2 = nn.Conv3d(32, 64, 3, padding=1)
    self.conv2_2 = nn.Conv3d(64, 64, 3, padding=1)
    self.conv3 = nn.Conv3d(64, 128, 3, padding=1)
    self.conv3_3 = nn.Conv3d(128, 128, 3, padding=1)
    self.convT1 = nn.ConvTranspose3d(128, 64, 3, stride=(2,2,2), padding=1, output_padding=1)
    self.conv4 = nn.Conv3d(128, 64, 3, padding=1)
    self.conv4_4 = nn.Conv3d(64, 64, 3, padding=1)
    self.convT2 = nn.ConvTranspose3d(64, 32, 3,stride=(2,2,2), padding=1, output_padding=1)
    self.conv5 = nn.Conv3d(64, 32, 3, padding=1)
    self.conv5_5 = nn.Conv3d(32, 32, 3, padding=1)
    self.conv6 = nn.Conv3d(32, 1 ,3, padding=1)


def forward(self, inputs):
    conv1 = F.relu(self.conv1(inputs))
    conv1 = F.relu(self.conv1_1(conv1))
    pool1 = F.max_pool3d(conv1, 2)

    conv2 = F.relu(self.conv2(pool1))
    conv2 = F.relu(self.conv2_2(conv2))
    pool2 = F.max_pool3d(conv2, 2)        
    conv3 = F.relu(self.conv3(pool2))
    conv3 = F.relu(self.conv3_3(conv3))        
    conv3 = self.convT1(conv3)

    up1 = torch.cat((conv3, conv2), dim=1)
    conv4 = F.relu(self.conv4(up1))
    conv4 = F.relu(self.conv4_4(conv4))

    conv4 = self.convT2(conv4)
    up2 = torch.cat((conv4, conv1), dim=1)
    conv5 = F.relu(self.conv5(up2))
    conv5 = F.relu(self.conv5_5(conv5))

    conv6 = F.relu(self.conv6(conv5))

    return conv6

Then I run my unet like the following code. note that when defining the module I set it to the cuda. I also set the input data and its labels to the cuda.

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = unet().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.MSELoss()
datasets = torch.utils.data.TensorDataset(data_recon, data_truth)
train_loader = DataLoader(datasets, batch_size=2, shuffle=True)

def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(1, n_epochs + 1):
        loss_train = 0
        for imgs, labels in train_loader:
            imgs.to(device)
            labels.to(device)
            outputs = model(imgs)
            loss = loss_fn(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_train += loss.item()


        print('{} Epoch {}, Training loss    {}'.format(datetime.datetime.now(), epoch, float(loss_train)))

training_loop(50, optimizer, model, loss_fn, train_loader)

But I get this error:


RuntimeError Traceback (most recent call last) in ----> 1 training_loop(50, optimizer, model, loss_fn, train_loader)

in training_loop(n_epochs, optimizer, model, loss_fn, train_loader) 5 imgs.to(device) 6 labels.to(device) ----> 7 outputs = model(imgs) 8 loss = loss_fn(outputs, labels) 9

/opt/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in call(self, *input, **kwargs) 491 result = self._slow_forward(*input, **kwargs) 492 else: --> 493 result = self.forward(*input, **kwargs) 494 for hook in self._forward_hooks.values(): 495 hook_result = hook(self, input, result)

in forward(self, inputs) 18 19 def forward(self, inputs): ---> 20 conv1 = F.relu(self.conv1(inputs)) 21 conv1 = F.relu(self.conv1_1(conv1)) 22 pool1 = F.max_pool3d(conv1, 2)

/opt/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py in call(self, *input, **kwargs) 491 result = self._slow_forward(*input, **kwargs) 492 else: --> 493 result = self.forward(*input, **kwargs) 494 for hook in self._forward_hooks.values(): 495 hook_result = hook(self, input, result)

/opt/anaconda3/lib/python3.7/site-packages/torch/nn/modules/conv.py in forward(self, input) 474 self.dilation, self.groups) 475 return F.conv3d(input, self.weight, self.bias, self.stride, --> 476 self.padding, self.dilation, self.groups) 477 478

RuntimeError: Expected object of backend CPU but got backend CUDA for argument #2 'weight'

I've spent hours, surfing the web and haven't been able to find anything? The fix is probably something a line or two. I'd appreciate it if someone knows how to solve this issue?

1

1 Answers

1
votes

The problem is with this line

imgs.to(device)
labels.to(device)

.to(device) returns a new Tensor and won't change imgs and labels. So the cuda error is valid. You can simply fix it by assigning new tensor as follows:

imgs = imgs.to(device)
labels = labels.to(device)