from __future__ import print_function
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras import backend as K

def sacc(y_true, y_pred):
    return K.mean(K.equal(K.round(y_true), K.round(y_pred)))

batch_size = 256
epochs = 10000
img_rows, img_cols, channels = 32, 32, 3 # input image dimensions

if K.image_data_format() == 'channels_first':
    X_train = X_train.reshape(X_train.shape[0], channels, img_rows, img_cols)
    X_val = X_val.reshape(X_val.shape[0], channels, img_rows, img_cols)
    X_test = X_test.reshape(X_test.shape[0], channels, img_rows, img_cols)
    input_shape = (channels, img_rows, img_cols)
    X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, channels)
    X_val = X_val.reshape(X_val.shape[0], img_rows, img_cols, channels)
    X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, channels)
    input_shape = (img_rows, img_cols, channels)

print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

model = Sequential()
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same', input_shape=input_shape))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dense(512, activation='relu'))

model.compile(loss=keras.losses.mean_squared_error, optimizer=keras.optimizers.Adadelta(), metrics=[sacc])

history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(X_val, y_val))

score = model.evaluate(X_val, y_val, verbose=0)

print('Test loss:', score[0])
print('Test accuracy:', score[1])

I have this keras model which has regression as the final layer instead of softmax. I am trying to predict a score (0.0 to 100.0) for a given image. The issue I am seeing is it is taking >10000 epochs to train and MSE decreases very slowly. Is that expected or is there something wrong with my code? Any tips or comments appreciated.

you could try different optimizers, like sgd,adam, or rmsprop, those may converge faster than AdadeltaDJK
Also, you can explore the learning rate in them, such as optimizer = Adam(lr=0.001) -- Higher learning rates do faster training, but may be too much and simply "miss the point", diverging from good results.Daniel Möller
Thanks. I will give that a try.Nira
@Nira how did you go with this?Francis Kim

1 Answers


You should avoid using ReLU as your activation function in output layer because it gives continuous output in range 0 to infinity. You want your output to be between 0 to 100.0. For regression problem, you CAN create output layer without any activation function as you are interested in numerical values without any transformation i.e. to avoid using reverse_transform later on test data to get actual numerical values.

Regression problems are sensitive to outliers so check if there are any outliers. Other than this, try using rmsprop or adam for regression and do a grid search on different learning rate.