I tried to classify images of 45 classes of 700 images each and perform simple CNN classification with two layers: of batch size: 252, epoch: 30, learning rate: 0.0001, Image size: 256 by 256 by3. I tried to increase as well as decrease the learning rate. Also the data set was split in the ratio 08:0.1:0.1 for training:testing:validation. However the accuracy and loss remains unchanged the loss is always zero. This is the architecture:
#The FLAGS are used to assign constant values to several paths as well as variables that will be constantly used.
flags = tf.app.flags
flags.DEFINE_string('dataset_dir','//media//datapart//akshara//NWPU-RESISC45//NWPU-RESISC45//','//media//datapart//akshara//NWPU-RESISC45//NWPU-RESISC45//')
flags.DEFINE_float('validation_size', 0.1, 'Float: The proportion of examples in the dataset to be used for validation')
flags.DEFINE_float('test_size', 0.1, 'Float: The proportion of examples in the dataset to be used for test')
flags.DEFINE_integer('num_shards', 1, 'Int: Number of shards to split the TFRecord files into')
flags.DEFINE_integer('random_seed', 0, 'Int: Random seed to use for repeatability.')
flags.DEFINE_string('tfrecord_filename', None, 'String: The output filename to name your TFRecord file')
tf.app.flags.DEFINE_integer('target_image_height', 256, 'train input image height')
tf.app.flags.DEFINE_integer('target_image_width', 256, 'train input image width')
tf.app.flags.DEFINE_integer('batch_size', 252, 'batch size of training.')
tf.app.flags.DEFINE_integer('num_epochs', 30, 'epochs of training.')
tf.app.flags.DEFINE_float('learning_rate', 0.0001, 'learning rate of training.')
FLAGS = flags.FLAGS
img_size = 256
num_channels=3
num_classes=45
########################################################################################################################
########################################################################################################################
datapath_train = '//media//datapart//akshara//NWPU-RESISC45//NWPU-RESISC45//train//None_train_00000-of-00001.tfrecord'
datapath_validation = '//media//datapart//akshara//NWPU-RESISC45//NWPU-RESISC45//validation//None_validation_00000-of-00001.tfrecord'
datapath_test = '//media//datapart//akshara//NWPU-RESISC45//NWPU-RESISC45//test//None_test_00000-of-00001.tfrecord'
def _extract_fn(tfrecord):
features={
'image/encoded': tf.FixedLenFeature([], tf.string),
'image/format': tf.FixedLenFeature([], tf.string),
'image/class/label': tf.FixedLenFeature([], tf.int64),
'image/height': tf.FixedLenFeature([], tf.int64),
'image/width': tf.FixedLenFeature([], tf.int64),
'image/channels': tf.FixedLenFeature([],tf.int64)
}
parsed_example = tf.parse_single_example(tfrecord, features)
image_de = tf.io.decode_raw(parsed_example['image/encoded'],tf.uint8)
img_height = tf.cast(parsed_example['image/height'],tf.int32)
img_width = tf.cast(parsed_example['image/width'],tf.int32)
img_channel = tf.cast(parsed_example['image/channels'],tf.int32)
img_shape = tf.stack([img_height,img_width,img_channel])
label = tf.cast(parsed_example['image/class/label'],tf.int64)
image = tf.reshape(image_de,img_shape)
#label = parsed_example['image/class/label']
return image, img_shape, label
########################################################################################################################
#########################################################################################################################
"""
# Pipeline of dataset and iterator
dataset = tf.data.TFRecordDataset(datapath)
# Parse the record into tensors.
dataset = dataset.map(_extract_fn)
# Generate batches
dataset = dataset.batch(1)
# Create a one-shot iterator
iterator = dataset.make_one_shot_iterator()
image, img_shape, label = iterator.get_next()
with tf.Session() as sess:
try:
print(sess.run(img_shape))
image_batch=sess.run(image)
print(image_batch)
img_bas=tf.cast(image_batch,tf.uint8)
plt.imshow(image_batch[0,:,:,:]*255)
plt.show()
except tf.errors.OutOfRangeError:
pass"""
########################################################################################################################
########################################################################################################################
#INITIALIZATION FOR THE CNN ARCHITECTURE
#Layer 1
filter_size_conv1 = [5,5]
num_filters_conv1 = 32
filter_shape_pool1 = [2,2]
#Layer 2
filter_size_conv2 = [3,3]
num_filters_conv2 = 64
filter_shape_pool2 = [2,2]
#Placeholders
x = tf.placeholder(tf.float32, shape = [None, img_size,img_size,num_channels], name='x')
y = tf.placeholder(tf.int32, shape= [None], name = 'ytrue') #Output data placeholder
y_one_hot = tf.one_hot(y,45)
y_true_cls = tf.argmax(y_one_hot, dimension=1)
########################################################################################################################
########################################################################################################################
def new_conv_layer(input, num_input_channels, filter_size, num_filters, name):
with tf.variable_scope(name) as scope:
# Shape of the filter-weights for the convolution
shape = [filter_size, filter_size, num_input_channels, num_filters]
# Create new weights (filters) with the given shape
weights = tf.Variable(tf.truncated_normal(shape, stddev=0.05))
# Create new biases, one for each filter
biases = tf.Variable(tf.constant(0.05, shape=[num_filters]))
# TensorFlow operation for convolution
layer = tf.nn.conv2d(input=input, filter=weights, strides=[1, 1, 1, 1], padding='SAME')
# Add the biases to the results of the convolution.
layer += biases
return layer, weights
def new_pool_layer(input, name):
with tf.variable_scope(name) as scope:
# TensorFlow operation for convolution
layer = tf.nn.max_pool(value=input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
return layer
def new_relu_layer(input, name):
with tf.variable_scope(name) as scope:
# TensorFlow operation for convolution
layer = tf.nn.relu(input)
return layer
def new_fc_layer(input, num_inputs, num_outputs, name):
with tf.variable_scope(name) as scope:
# Create new weights and biases.
weights = tf.Variable(tf.truncated_normal([num_inputs, num_outputs], stddev=0.05))
biases = tf.Variable(tf.constant(0.05, shape=[num_outputs]))
# Multiply the input and weights, and then add the bias-values.
layer = tf.matmul(input, weights) + biases
return layer
# CONVOLUTIONAL LAYER 1
layer_conv1, weights_conv1 = new_conv_layer(input=x, num_input_channels=3, filter_size=5, num_filters=32, name ="conv1")
# Pooling Layer 1
layer_pool1 = new_pool_layer(layer_conv1, name="pool1")
# RelU layer 1
layer_relu1 = new_relu_layer(layer_pool1, name="relu1")
# CONVOLUTIONAL LAYER 2
layer_conv2, weights_conv2 = new_conv_layer(input=layer_pool1, num_input_channels=32, filter_size=3, num_filters=64, name= "conv2")
# Pooling Layer 2
layer_pool2 = new_pool_layer(layer_conv2, name="pool2")
# RelU layer 2
layer_relu2 = new_relu_layer(layer_pool2, name="relu2")
# FLATTENED LAYER
num_features = layer_relu2.get_shape()[1:4].num_elements()
layer_flat = tf.reshape(layer_pool2, [-1, num_features])
# FULLY-CONNECTED LAYER 1
layer_fc1 = new_fc_layer(layer_flat, num_inputs=num_features, num_outputs=1000, name="fc1")
# RelU layer 3
layer_relu3 = new_relu_layer(layer_fc1, name="relu3")
# FULLY-CONNECTED LAYER 2
layer_fc2 = new_fc_layer(input=layer_relu3, num_inputs=1000, num_outputs=45, name="fc2")
# Use Softmax function to normalize the output
with tf.variable_scope("Softmax"):
y_pred = tf.nn.softmax(layer_fc2)
y_pred_cls = tf.argmax(y_pred, dimension = 1)
# Use Cross entropy cost function
with tf.name_scope("cross_ent"):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits = layer_fc2, labels = y_pred)
cost = tf.reduce_mean(cross_entropy)
# Use Adam Optimizer
with tf.name_scope("optimizer"):
optimizer = tf.train.AdamOptimizer(learning_rate = FLAGS.learning_rate).minimize(cost)
# Accuracy
with tf.name_scope("accuracy"):
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# setup the initialisation operator
init_op = tf.global_variables_initializer()
# Pipeline of dataset and iterator
dataset_train = tf.data.TFRecordDataset(datapath_train)
dataset_validation = tf.data.TFRecordDataset(datapath_validation)
dataset_test = tf.data.TFRecordDataset(datapath_test)
# Parse the record into tensors.
dataset_train = dataset_train.map(_extract_fn)
dataset_validation = dataset_validation.map(_extract_fn)
dataset_test = dataset_test.map(_extract_fn)
# Generate batches
dataset_train = dataset_train.batch(FLAGS.batch_size)
iterator_train = dataset_train.make_initializable_iterator()
next_element_train = iterator_train.get_next()
dataset_validation = dataset_validation.batch(FLAGS.batch_size)
iterator_validation = dataset_validation.make_initializable_iterator()
next_element_validation = iterator_validation.get_next()
dataset_test = dataset_test.batch(FLAGS.batch_size)
iterator_test = dataset_test.make_initializable_iterator()
next_element_test = iterator_test.get_next()
print('\n Starting the CNN train')
# Initialize the FileWriter
writer = tf.summary.FileWriter("Training_FileWriter/")
"""
# create a summary for our cost and accuracy
train_cost_summary = tf.summary.scalar("train_cost", cost)
train_acc_summary = tf.summary.scalar("train_accuracy", accuracy)
test_cost_summary = tf.summary.scalar("test_cost", cost)
test_acc_summary = tf.summary.scalar("test_accuracy", accuracy)"""
#PERFORM THE CNN OPERATIONS
with tf.Session() as sess:
sess.run(init_op)
sess.run(iterator_test.initializer)
# Add the model graph to TensorBoard
writer.add_graph(sess.graph)
# Loop over number of epochs
print('\nTraining')
for epoch in range(FLAGS.num_epochs):
sess.run(iterator_train.initializer)
sess.run(iterator_validation.initializer)
start_time = time.time()
"""train_accuracy = 0
validation_accuracy = 0
acc_train_avg = 0
val_acc_avg = 0"""
for batch in range(0, int(25200/FLAGS.batch_size)):
img_train, shp_train, lbl_train = sess.run(next_element_train)
#_, loss_train, acc_train, _train_cost_summary, _train_acc_summary = sess.run([optimizer, cost, accuracy, train_cost_summary, train_acc_summary], feed_dict = {x: img_train, y: lbl_train})
_, loss_train, acc_train = sess.run([optimizer, cost, accuracy], feed_dict = {x: img_train, y: lbl_train})
#train_accuracy+=acc_train
#writer.add_summary(_train_cost_summary, epoch +1)
#writer.add_summary(_train_acc_summary, epoch +1)
end_time = time.time()
#acc_train_avg = (train_accuracy/(int(25200/FLAGS.batch_size)))
#TRAINING
print("Epoch "+str(epoch+1)+" completed : Time usage "+str(int(end_time-start_time))+" seconds")
print("\tAccuracy:")
print("\t- Training Loss:\t{}", loss_train)
print ("\t- Training Accuracy:\t{}",acc_train)
The output after training is as shown below:
Training
Epoch 1 completed : Time usage 122 seconds
Accuracy:
- Training Loss: {} 0.0
- Training Accuracy: {} 0.035714287
Validation
- Validation Accuracy: {} 0.035714287
Validation Loss: {} 0.0
Epoch 2 completed : Time usage 120 seconds
Accuracy:
- Training Loss: {} 0.0
- Training Accuracy: {} 0.035714287
Validation
- Validation Accuracy: {} 0.035714287
Validation Loss: {} 0.0
Epoch 3 completed : Time usage 120 seconds
Accuracy:
- Training Loss: {} 0.0
- Training Accuracy: {} 0.035714287
Validation
- Validation Accuracy: {} 0.035714287
Validation Loss: {} 0.0
Epoch 4 completed : Time usage 120 seconds
Accuracy:
- Training Loss: {} 0.0
- Training Accuracy: {} 0.035714287
Validation
- Validation Accuracy: {} 0.035714287
Validation Loss: {} 0.0
There is no learning of the model. I have inspected several times, the logic seems to be ok. What could be the probable reason why this is constant even after changing the learning rate, epoch and also i have tried to generate several datasets.