I am working on a multi-output model where I need to weigh all output losses before calculating the overall loss. I have a customized model. fit()
training loop to achieve this.
As I need to calculate the sample-wise loss for all four outputs and fuse these sample-wise losses after applying weight, I have customized the standard code. Now, the loss is calculating sample-wise but while calculating the gradient, all gradient values are calculated as "None". I tried to put tape.watch(loss)
also, but it is not working. Kindly, help me to fix this issue.
class CustomModel(keras.Model):
def train_step(self, data):
print(tf.executing_eagerly())
# Unpack the data. Its structure depends on your model and
# on what you pass to `fit()`.
x, y = data
alpha = 0.1
loss = 0
y_pred_all = []
with tf.GradientTape() as tape:
bce = tf.keras.losses.BinaryCrossentropy(reduction=tf.keras.losses.Reduction.NONE)
for spl in range(1 if np.shape(x)[0] == None else np.shape(x)[0]):
tape.watch(loss)
tape.watch(loss_mean)
tape.watch(loss_element)
x_spl = np.reshape(x[spl], (1, np.shape(x)[1], np.shape(x)[2], np.shape(x)[3]))
y_pred = self(x_spl, training=True) # Forward pass
y_pred_all.append(y_pred)
loss_element = bce(y[spl], y_pred)
loss_mean = [np.mean(loss_element[0]), np.mean(loss_element[1]), np.mean(loss_element[2]), np.mean(loss_element[3])]
id = np.argmin(loss_mean)
for i, ele in enumerate(loss_mean):
if i == id:
loss_mean[i] *= 1
else:
loss_mean[i] *= alpha
loss = loss + np.sum(loss_mean)
# Compute gradients
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
# Update weights
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# Update metrics (includes the metric that tracks the loss)
self.compiled_metrics.update_state(y, y_pred_all)
# Return a dict mapping metric names to current value
return {m.name: m.result() for m in self.metrics}
UPDATE I did few changes as suggested by @rvinas Now it is calculating the gradient without any error but I am not sure if the changes I did are correct or not:
class CustomModel(keras.Model):
def train_step(self, data):
# print(tf.executing_eagerly())
# Unpack the data. Its structure depends on your model and
# on what you pass to `fit()`.
x, y = data
alpha = 0.1
loss = tf.Variable(0, dtype='float32')
y_pred_all = []
with tf.GradientTape() as tape:
bce = tf.keras.losses.BinaryCrossentropy(reduction=tf.keras.losses.Reduction.NONE)
for spl in tf.range(1 if tf.shape(x)[0] == None else tf.shape(x)[0]):
loss_mean=tf.convert_to_tensor([])
x_spl = tf.reshape(x[spl], (1, tf.shape(x)[1], tf.shape(x)[2], tf.shape(x)[3]))
y_pred = self(x_spl, training=True) # Forward pass
y_pred_all.append(y_pred)
loss_element = bce(y[spl], y_pred)
loss_mean = [tf.reduce_mean(loss_element[0]), tf.reduce_mean(loss_element[1]), tf.reduce_mean(loss_element[2]), tf.reduce_mean(loss_element[3])]
id = tf.argmin(loss_mean)
for i, ele in enumerate(loss_mean):
if i == id:
loss_mean[i] = tf.multiply(loss_mean[i], 1)
else:
loss_mean[i] = tf.multiply(loss_mean[i], alpha)
loss = tf.add(loss, tf.add(tf.add(tf.add(loss_mean[0],loss_mean[1]), loss_mean[2]), loss_mean[3]))
# Compute gradients
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
# Update weights
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# Update metrics (includes the metric that tracks the loss)
self.compiled_metrics.update_state(y, y_pred_all)
# Return a dict mapping metric names to current value
return {m.name: m.result() for m in self.metrics}
weights
with the same shape asloss
(i.e.,(batch_size, nb_elements)
) and compute the final weighted loss with something along the lines oftf.reduce_mean(weights * loss)
. The "for loop" within the gradient tape block should ideally be avoided. – rvinas