Tensorflow does not train CIFAR - 100 data

Question

I am trying to build a linear classifier with CIFAR - 100 using TensorFlow. I got the code from Martin Gorner's MNIST tutorial and change a bit. When I run this code, tensorflow does not training (code is running but accuracy remains 1.0 and loss(cross entropy remains as 4605.17), I don't know what is wrong, I am actually newbie to TF any help is appreciated.

import pickle
import numpy as np
import os
import tensorflow as tf
from tensorflow.python.framework import tensor_util
import math

#imports data
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict


cifar100_test = {}
cifar100_train = {}
labelMap = {}
labelNames = {}
# Load the raw CIFAR-10 data.
cifar100_test = unpickle('dataset/cifar-100-python/test')
cifar100_train = unpickle('dataset/cifar-100-python/train')
labelMap = unpickle('dataset/cifar-100-python/meta')
#tr for training data and te for testing data, X is data, Y is label
Xtr = cifar100_train[b'data']
Yr = cifar100_train[b'fine_labels']
Xte = cifar100_test[b'data']
Ye = cifar100_test[b'fine_labels']
classNames = labelMap[b'fine_label_names']



num_train = Xtr.shape[0]
num_test = Xte.shape[0]
num_class = len(classNames)

Ytr = np.zeros([num_train, num_class])
Yte = np.zeros([num_test, num_class])

Ytr[0:num_train, Yr[0:num_train]] = 1
Yte[0:num_test, Ye[0:num_test]] = 1


# As a sanity check, we print out the size of the training and test data.
print('Train data shape:', Xtr.shape)
print('Train Label shape:', Ytr.shape)
print('Test data shape:', Xte.shape)
print('Test Label shape:', Yte.shape)
print('Name of Predicted Class:', classNames[0]) #indice of the label name is the indice of the class.



Xtrain = Xtr#[:1000]
Xtest = Xte#[:100]
Ytrain = Ytr#[:1000]
Ytest = Yte#[:100]

print('Train data shape:', Xtrain.shape)
print('Train Label shape:', Ytrain.shape)
print('Test data shape:', Xtest.shape)
print('Test Label shape:', Ytest.shape)



Xtrain = np.reshape(Xtrain,(50000, 32, 32, 3)).transpose(0,1,2,3).astype(float)
Xtest = np.reshape(Xtest,(10000, 32, 32, 3)).transpose(0,1,2,3).astype(float)

Xbatches = np.split(Xtrain, 500); #second number is # of batches
Ybatches = np.split(np.asarray(Ytrain), 500);
                   
XtestB = np.split(Xtest, 100);
YtestB = np.split(Ytest, 100);                   

print('X # of batches:', len(Xbatches))
print('Y # of batches:', len(Ybatches))



# input X: 28x28 grayscale images, the first dimension (None) will index the images in the mini-batch
X = tf.placeholder(tf.float32, [100, 32, 32, 3])
# correct answers will go here
Y_ = tf.placeholder(tf.float32, [100, 100])
# weights W[784, 10]   784=28*28
W = tf.Variable(tf.zeros([3072, 100]))
# biases b[10]
b = tf.Variable(tf.zeros([100]))

# flatten the images into a single line of pixels
# -1 in the shape definition means "the only possible dimension that will preserve the number of elements"
XX = tf.reshape(X, [-1, 3072])

# The model
Y = tf.nn.softmax(tf.matmul(XX, W) + b)

# loss function: cross-entropy = - sum( Y_i * log(Yi) )
#                           Y: the computed output vector
#                           Y_: the desired output vector

# cross-entropy
# log takes the log of each element, * multiplies the tensors element by element
# reduce_mean will add all the components in the tensor
# so here we end up with the total cross-entropy for all images in the batch
cross_entropy = -tf.reduce_mean(Y_ * tf.log(Y)) * 1000.0  # normalized for batches of 100 images,
                                                          # *10 because  "mean" included an unwanted division by 10

# accuracy of the trained model, between 0 (worst) and 1 (best)
correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# training, learning rate = 0.005
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)


# init
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)



for i in range(500):
    # the backpropagation training step
    t, Loss = sess.run([train_step, cross_entropy], feed_dict={X: Xbatches[i], Y_: Ybatches[i]})
    print(Loss)
    print(i)
    

for i in range(100):
    print('accuracy:', sess.run(accuracy, feed_dict={X: XtestB[i], Y_: YtestB[i]}))

Can you try initializing your W variable with random values rather than zeroes? — mrry

ml4294 ml4294 · Accepted Answer · 2017-04-26T14:54:11

You compute the accuracy a hundred times after the training process is completed. Nothing will change there. You should place your print('accuracy:'....) within the for loop in which you perform the backpropagation:

for i in range(500):
    # the backpropagation training step
    t, Loss = sess.run([train_step, cross_entropy], feed_dict={X: Xbatches[i], Y_: Ybatches[i]})
    print(Loss)
    print(i)
    print('accuracy:', sess.run(accuracy, feed_dict={X: XtestB[i], Y_: YtestB[i]}))

Tensorflow does not train CIFAR - 100 data

2 Answers