0
votes

When doing image classification tasks, I want to change the activation function used in the output layer of the neural network from softmax to sigmoid, but after using this method I got an error (The error information is as follows. No error will be reported when using the softmax function).

Softmax code before the change:

def build_graph(top_k):
    # with tf.device('/cpu:0'):
    keep_prob = tf.placeholder(dtype=tf.float32, shape=[], name='keep_prob')
    images = tf.placeholder(dtype=tf.float32, shape=[None, 64, 64, 1], name='image_batch')
    labels = tf.placeholder(dtype=tf.int64, shape=[None], name='label_batch')

    conv_1 = slim.conv2d(images, 64, [3, 3], 1, padding='SAME', scope='conv1')
    max_pool_1 = slim.max_pool2d(conv_1, [2, 2], [2, 2], padding='SAME')
    conv_2 = slim.conv2d(max_pool_1, 128, [3, 3], padding='SAME', scope='conv2')
    max_pool_2 = slim.max_pool2d(conv_2, [2, 2], [2, 2], padding='SAME')
    conv_3 = slim.conv2d(max_pool_2, 256, [3, 3], padding='SAME', scope='conv3')
    max_pool_3 = slim.max_pool2d(conv_3, [2, 2], [2, 2], padding='SAME')

    flatten = slim.flatten(max_pool_3)
    fc1 = slim.fully_connected(slim.dropout(flatten, keep_prob), 1024, activation_fn=tf.nn.tanh, scope='fc1')
    logits = slim.fully_connected(slim.dropout(fc1, keep_prob), FLAGS.charset_size, activation_fn=None, scope='fc2')
        # logits = slim.fully_connected(flatten, FLAGS.charset_size, activation_fn=None, reuse=reuse, scope='fc')
    loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels))
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logits, 1), labels), tf.float32))

    global_step = tf.get_variable("step", [], initializer=tf.constant_initializer(0.0), trainable=False)
    rate = tf.train.exponential_decay(2e-4, global_step, decay_steps=2000, decay_rate=0.97, staircase=True)
    train_op = tf.train.AdamOptimizer(learning_rate=rate).minimize(loss, global_step=global_step)
    probabilities = tf.nn.softmax(logits)

Sigmoid code after the change:

def build_graph(top_k):
    # with tf.device('/cpu:0'):
    keep_prob = tf.placeholder(dtype=tf.float32, shape=[], name='keep_prob')
    images = tf.placeholder(dtype=tf.float32, shape=[None, 64, 64, 1], name='image_batch')
    labels = tf.placeholder(dtype=tf.int64, shape=[None], name='label_batch')

    conv_1 = slim.conv2d(images, 64, [3, 3], 1, padding='SAME', scope='conv1')
    max_pool_1 = slim.max_pool2d(conv_1, [2, 2], [2, 2], padding='SAME')
    conv_2 = slim.conv2d(max_pool_1, 128, [3, 3], padding='SAME', scope='conv2')
    max_pool_2 = slim.max_pool2d(conv_2, [2, 2], [2, 2], padding='SAME')
    conv_3 = slim.conv2d(max_pool_2, 256, [3, 3], padding='SAME', scope='conv3')
    max_pool_3 = slim.max_pool2d(conv_3, [2, 2], [2, 2], padding='SAME')

    flatten = slim.flatten(max_pool_3)
    fc1 = slim.fully_connected(slim.dropout(flatten, keep_prob), 1024, activation_fn=tf.nn.tanh, scope='fc1')
    logits = slim.fully_connected(slim.dropout(fc1, keep_prob), FLAGS.charset_size, activation_fn=None, scope='fc2')
        # logits = slim.fully_connected(flatten, FLAGS.charset_size, activation_fn=None, reuse=reuse, scope='fc')
    # loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels))
    loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels))
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logits, 1), labels), tf.float32))

    global_step = tf.get_variable("step", [], initializer=tf.constant_initializer(0.0), trainable=False)
    rate = tf.train.exponential_decay(2e-4, global_step, decay_steps=2000, decay_rate=0.97, staircase=True)
    train_op = tf.train.AdamOptimizer(learning_rate=rate).minimize(loss, global_step=global_step)
    # probabilities = tf.nn.softmax(logits)
    probabilities = tf.sigmoid(logits)

Code error message:

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Traceback (most recent call last):
  File "/usr/local/python3/lib/python3.6/site-packages/tensorflow/python/framework/tensor_shape.py", line 846, in merge_with
    self.assert_same_rank(other)
  File "/usr/local/python3/lib/python3.6/site-packages/tensorflow/python/framework/tensor_shape.py", line 891, in assert_same_rank
    other))
ValueError: Shapes (?,) and (?, 600) must have the same rank

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/python3/lib/python3.6/site-packages/tensorflow/python/ops/nn_impl.py", line 164, in sigmoid_cross_entropy_with_logits
    labels.get_shape().merge_with(logits.get_shape())
  File "/usr/local/python3/lib/python3.6/site-packages/tensorflow/python/framework/tensor_shape.py", line 852, in merge_with
    raise ValueError("Shapes %s and %s are not compatible" % (self, other))
ValueError: Shapes (?,) and (?, 600) are not compatible

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "chinese_rec_my_sigmoid.py", line 379, in <module>
    tf.app.run()
  File "/usr/local/python3/lib/python3.6/site-packages/tensorflow/python/platform/app.py", line 125, in run
    _sys.exit(main(argv))
  File "chinese_rec_my_sigmoid.py", line 346, in main
    train()
  File "chinese_rec_my_sigmoid.py", line 160, in train
    graph = build_graph(top_k=1)
  File "chinese_rec_my_sigmoid.py", line 116, in build_graph
    loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels))
  File "/usr/local/python3/lib/python3.6/site-packages/tensorflow/python/ops/nn_impl.py", line 167, in sigmoid_cross_entropy_with_logits
    (logits.get_shape(), labels.get_shape()))
ValueError: logits and labels must have the same shape ((?, 600) vs (?,))

Note: (1)No error will be reported when using the softmax function. (2)The number 600 is the number of image classification categories.

1

1 Answers

0
votes

The problem in the line that calculates the loss using sigmoid_cross_entropy_with_logits. Unlike sparse_softmax_cross_entropy_with_logits, sigmoid_cross_entropy_with_logits expects the logits tensor and the labels tensor to have the same shape and type. In your case logits have BatchSize x 600 shape but the labels tensor have BatchSize x 1 shape.

I think you should encode the labels into a BatchSize x 600 shape using the one hot encoding if your label values are 1-600 (or 0 -599) before using sigmoid.

However, I beleive using softmax will give you better results than sigmoid for multiclass classification.