I have a model that is training (it goes through steps and epochs and evaluate losses) but weights are not training.
I trying to train a discriminator that would distinguish whether the image is synthetic or real. It's part of GANs model, I'm trying to build.
Basic structure is as followed:
I have two inputs: 1. image (could be real or synthetic) 2. labels (0 for real, 1 for synthetic)
Source Estimator is where I extract features from images. I had already trained the model and restored the weights and biases. These layers are frozen (not trainable).
def SourceEstimator(eye, name, trainable = True):
# source estimator and target representer shares the same structure.
# SE is not trainable, while TR is.
net = tf.layers.conv2d(eye, 32, 3, (1,1), padding='same', activation=tf.nn.leaky_relu, trainable=trainable, name=name+'_conv2d_1')
net = tf.layers.conv2d(net, 32, 3, (1,1), padding='same', activation=tf.nn.leaky_relu, trainable=trainable, name=name+'_conv2d_2')
net = tf.layers.conv2d(net, 64, 3, (1,1), padding='same', activation=tf.nn.leaky_relu, trainable=trainable, name=name+'_conv2d_3')
c3 = net
net = tf.layers.max_pooling2d(net, 3, (2,2), padding='same', name=name+'_maxpool_4')
net = tf.layers.conv2d(net, 80, 3, (1,1), padding='same', activation=tf.nn.leaky_relu, trainable=trainable, name=name+'_conv2d_5')
net = tf.layers.conv2d(net, 192, 3, (1,1), padding='same', activation=tf.nn.leaky_relu, trainable=trainable, name=name+'_conv2d_6')
c5 = net
return (c3, c5)
Discriminator is as followed:
def DiscriminatorModel(features, reuse=False):
with tf.variable_scope('discriminator', reuse=tf.AUTO_REUSE):
net = tf.layers.conv2d(features, 64, 3, 2, padding='same', kernel_initializer='truncated_normal', activation=tf.nn.leaky_relu, trainable=True, name='discriminator_c1')
net = tf.layers.conv2d(net, 128, 3, 2, padding='same', kernel_initializer='truncated_normal', activation=tf.nn.leaky_relu, trainable=True, name='discriminator_c2')
net = tf.layers.conv2d(net, 256, 3, 2, padding='same', kernel_initializer='truncated_normal', activation=tf.nn.leaky_relu, trainable=True, name='discriminator_c3')
net = tf.contrib.layers.flatten(net)
net = tf.layers.dense(net, units=1, activation=tf.nn.softmax, name='descriminator_out', trainable=True)
return net
Input goes to SourceEstimator model and extracts features (c3,c5).
Then c3 and c5 is concatenated along the channel axis and passed to discriminator model.
c3, c5 = CommonModel(self.left_eye, 'el', trainable=False)
c5 = tf.image.resize_images(c5, size=(self.config.img_size,self.config.img_size))
features = tf.concat([c3, c5], axis=3)
##---------------------------------------- DISCRIMINATOR ------------------------------------------##
with tf.variable_scope('discriminator'):
logit = DiscriminatorModel(features)
Finally losses and train_ops
##---------------------------------------- LOSSES ------------------------------------------##
with tf.variable_scope("discriminator_losses"):
self.loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logit, labels=self.label))
##---------------------------------------- TRAIN ------------------------------------------##
# optimizers
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
disc_optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)
self.disc_op = disc_optimizer.minimize(self.loss, global_step=self.global_step_tensor, name='disc_op')
Train steps and epochs. I'm using 32 batch size. And data generator class to get the image each step.
def train_epoch(self):
num_iter_per_epoch = self.train_data.get_size() // self.config.get('batch_size')
loop = tqdm(range(num_iter_per_epoch))
for i in loop:
dloss = self.train_step(i)
loop.set_postfix(loss='{:05.3f}'.format(dloss))
def train_step(self, i):
el, label = self.train_data.get_batch(i)
## ------------------- train discriminator -------------------##
feed_dict = {
self.model.left_eye: el,
self.model.label: label
}
_, dloss = self.sess.run([self.model.disc_op, self.model.loss], feed_dict=feed_dict)
return dloss
While the model is going through steps and epochs, weight remains unchanged.
Loss fluctuates during the training steps, but loss for every epoch is the same. For example, if I don't shuffle the dataset each epoch, loss on the graph will follow the same pattern each epoch.
Which I think means the model recognizes the different losses, but is not updating parameters according to the losses.
Here are few other things I tried and did not help:
- tried small and big learning rate (0.1 and 1e-8)
- tried with SourceEstimator layers trainable==True
- flipped labels (0 == synthetic, 1 == real)
- increased kernel sizes and filter sizes in discriminator.
I've been stuck on this problem for a while now, I really need some insights. Thanks in advance.
------EDIT 1-----
def initialize_uninitialized(sess):
global_vars = tf.global_variables()
is_initialized= sess.run([tf.is_variable_initialized(var) for var in global_vars])
not_initialized_vars = [v for (v, f) in zip(global_vars, is_initialized) if not f]
# for var in not_initialized_vars: # only for testing
# print(var.name)
if len(not_initialized_vars):
sess.run(tf.variables_initializer(not_initialized_vars))
self.sess = tf.Session()
## inbetween here I create data generator, model and restore pretrained model.
self.initilize_uninitialized(self.sess)
for current_epoch in range(self.model.current_epoch_tensor.eval(self.sess), self.config.num_epochs, 1)
self.train_epoch() # included above
self.sess.run(self.model.increment_current_epoch_tensor)