I am using a Unet model for semantic segmentation - I have a custom dataset of images and their masks both in .png format. I have looked in the online forums and tried stuff, but not much works? Any suggestions in how to resolve the error or improve the code would be helpful.
model.eval()
with torch.no_grad():
for xb, yb in val_dl:
yb_pred = model(xb.to(device))
# yb_pred = yb_pred["out"].cpu()
print(yb_pred.shape)
yb_pred = torch.argmax(yb_pred,axis = 1)
break
print(yb_pred.shape)
criteron = nn.CrossEntropyLoss(reduction = 'sum')
opt = optim.Adam(model.parameters(), lr = 3e-4)
def loss_batch(loss_func, output, target, opt = None):
loss = loss_func(output, target)
if opt is not None:
opt.zero_grad()
loss.backward()
opt.step()
return loss.item(), None
lr_scheduler = ReduceLROnPlateau(opt, mode = 'min', factor = 0.5, patience= 20, verbose = 1)
def get_lr(opt):
for param_group in opt.param_groups:
return param_group['lr']
current_lr = get_lr(opt)
print('current_lr = {}'.format(current_lr))
def loss_epoch(model, loss_func, dataset_dl, sanity_check = False, opt = None):
running_loss = 0.0
len_data = len(dataset_dl.dataset)
for xb, yb in dataset_dl:
xb = xb.to(device)
yb = yb.to(device)
# xb = torch.tensor(xbh, requires_grad=True)
output = model(xb)
loss_b, metric_b = loss_batch(loss_func, output, yb, opt)
running_loss += loss_b
if sanity_check is True:
break
loss = running_loss/float(len_data)
return loss, None
def train_val(model, params):
num_epochs = params["num_epochs"]
loss_func = params["loss_func"]
opt = params["optimizer"]
train_dl = params["train_dl"]
val_dl = params["val_dl"]
sanity_check = params["sanity_check"]
lr_scheduler = params["lr_scheduler"]
path2weights = params["path2weights"]
loss_history = {"train": [],
"val": []}
best_model_wts = copy.deepcopy(model.state_dict())
best_loss = float('inf')
for epoch in range(num_epochs):
current_lr = get_lr(opt)
print('Epoch {}/{}, current_lr = {}'.format(epoch, num_epochs - 1, current_lr))
with torch.enable_grad():
model.train()
train_loss, _ = loss_epoch(model, loss_func, train_dl, sanity_check, opt)
loss_history["train"].append(train_loss)
model.eval()
with torch.no_grad():
val_loss, _ = loss_epoch(model, loss_func, val_dl, sanity_check, opt)
loss_history["val"].append(val_loss)
if val_loss < best_loss:
best_loss = val_loss
best_model_wts = copy.deepcopy(model.state_dict())
torch.save(model.state_dict(), path2weights)
print("copied best model weights!!")
lr_scheduler.step(val_loss)
if current_lr != get_lr(opt):
print("Loading best model weights!!")
model.load_state_dict(best_model_wts)
print("train Loss: %.6f" %(train_loss))
print("val_loss: %.6f" %(val_loss))
print("-"*20)
model.load_state_dict(best_model_wts)
return model, loss_history, metric_history
path2models = "./models/"
if not os.path.exists(path2models):
os.mkdir(path2models)
param_train = {
"num_epochs": 10,
"loss_func": criteron,
"optimizer": opt,
"train_dl": train_dl,
"val_dl": val_dl,
"sanity_check": False,
"lr_scheduler": lr_scheduler,
"path2weights": path2models + "weights.pt"
model, loss_hist, _ = train_val(model, param_train)
The error message looks like - File "", line 10, in model, loss_hist, _ = train_val(model, param_train)
File "", line 27, in train_val val_loss, _ = loss_epoch(model, loss_func, val_dl, sanity_check, opt)
File "", line 13, in loss_epoch loss_b, metric_b = loss_batch(loss_func, output, yb, opt)
File "", line 6, in loss_batch loss.backward()
File "C:\Users\W540\anaconda3\lib\site-packages\torch\tensor.py", line 198, in backward torch.autograd.backward(self, gradient, retain_graph, create_graph)
File "C:\Users\W540\anaconda3\lib\site-packages\torch\autograd_init_.py", line 100, in backward allow_unreachable=True) # allow_unreachable flag
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
I am not sure which variable to set as require_grad = True or where I should enable grad...