I've a wrote a code based on this Tensorflow example . the issue that I'm having is that accuracy that I get doesn't make any sense ( it' either 1 or 0 ) so my question is what I'm missing here?
import tensorflow as tf
import numpy as np
import csv
import os
#defining batch fuuntion
def batch(iterable, n=1):
l = len(iterable)
for ndx in range(0, l, n):
yield iterable[ndx:min(ndx + n, l)]
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
Training_File = 'Training.csv'
Test_File = 'Test.csv'
numberOFClasses = 19
batchSize = 19
# read training data
filePointer = open(Training_File, 'r', newline='')
reader = csv.reader(filePointer)
Training_Data = []
Training_Labels = []
row = next(reader)
len(row)
#### Getting Training_Data and labels
for row in reader:
Training_Data.append(row[:-2])
Training_Labels.append(row[-1])
# close TrainingFile and getting Data and labels from Test
len(Training_Data)
filePointer.close();
filePointer =open(Test_File, 'r', newline='')
reader = csv.reader(filePointer)
Test_Data = []
Test_Labels=[]
row = next(reader)
for row in reader:
Test_Data.append(row[:-2])
Test_Labels.append(row[-1])
len(Test_Labels)
filePointer.close()
len(Training_Data[0])
x = tf.placeholder('float',[None,len(row[:-2])])
w = tf.Variable(tf.zeros([len(row[:-2]),numberOFClasses]))
b = tf.Variable(tf.zeros([numberOFClasses]))
model = tf.add(tf.matmul(x,w),b)
y_ = tf.placeholder(tf.float32,[None,numberOFClasses])
y = tf.nn.softmax(model)
cross_entropy= -tf.reduce_sum(y_*tf.log(y),reduction_indices=[1])
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
index =0
batch_xs = []
batch_ys = []
batch_txs= []
batch_tys= []
# Training processing
for i in batch(Training_Data,batchSize):
batch_xs.append(i)
for i in batch(Training_Labels,batchSize):
batch_ys.append(i)
for i in batch(Test_Data,batchSize):
batch_txs.append(i)
for i in batch(Test_Labels,batchSize):
batch_tys.append(i)
#print(np.reshape(batch_ys[len],(1,batchSize)))
for i in range(len(batch_xs) -1 ):
sess.run(train_step,feed_dict={x:batch_xs[i],y_:np.reshape(batch_ys[i],(1,batchSize))})
correct_prediction = tf.equal(tf.arg_max(y,1),tf.arg_max(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction,"float"))
for i in range(len(batch_txs) -1):
print(sess.run(accuracy,feed_dict={x:batch_txs[i],y_:np.reshape(batch_tys[i],(1,batchSize))}))
UPDATE I've changed the size of the batches:
.............................................
numberOFClasses = 19
batchSize = 19 * 3
....................................
for i in range(int(len(batch_xs)/batchSize) ):
print(sess.run(train_step,feed_dict={x:batch_xs[i],y_:np.reshape(batch_ys[i],(batchSize,numberOFClasses))}))
correct_prediction = tf.equal(tf.arg_max(y,1),tf.arg_max(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction,"float"))
for i in range(len(batch_txs) -1):
print(sess.run(accuracy,feed_dict={x:batch_txs[i],y_:np.reshape(batch_tys[i],(1,batchSize))}))
the result still the same, So I just don't get what I'm missing here
2ndUpdate
Running this part of the code : for j in range(len(batch_xs)-1): print(sess.run(train_step,feed_dict={x:batch_xs[j],y_:np.reshape(batch_ys[j],(numberOFClasses,3))}))
delivers a huge error message but I guess this part is relevant :
InvalidArgumentError (see above for traceback): Incompatible shapes: [19,3] vs. [57,19]
[[Node: mul = Mul[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](_recv_Placeholder_1_0, Log)]]
So since my batch size is tree times the number of classes, I should get 57 predictions-> Y_ .
Shaping the feeding of Y_ [57,1]
for j in range(len(batch_xs)-1): print(sess.run(train_step,feed_dict={x:batch_xs[j],y_:np.reshape(batch_ys[j],(batchSize,1))}))
the print delivers None as return value but no error which is (I guess) ok.
But running the accuracy part delivers 1 and 0 as mentioned in the beginning.
the Test and Train Data and labels are 100% correct !
here is part of the end of CSV file :

reshapes accordingly in the feed_dicts (to set the shape to (batch_size, numOfClasses)). If it is given as the class index for each sample (1-D vector of length batch_size, containing numbers from 0 to numClasses- 1), then you should change the reshape (to (batch_size, 1)), and replace argmax(y_) by y_. - gdelab