from __future__ import print_function
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras import backend as K
def sacc(y_true, y_pred):
return K.mean(K.equal(K.round(y_true), K.round(y_pred)))
batch_size = 256
epochs = 10000
img_rows, img_cols, channels = 32, 32, 3 # input image dimensions
if K.image_data_format() == 'channels_first':
X_train = X_train.reshape(X_train.shape[0], channels, img_rows, img_cols)
X_val = X_val.reshape(X_val.shape[0], channels, img_rows, img_cols)
X_test = X_test.reshape(X_test.shape[0], channels, img_rows, img_cols)
input_shape = (channels, img_rows, img_cols)
else:
X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, channels)
X_val = X_val.reshape(X_val.shape[0], img_rows, img_cols, channels)
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, channels)
input_shape = (img_rows, img_cols, channels)
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
model = Sequential()
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same', input_shape=input_shape))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.compile(loss=keras.losses.mean_squared_error, optimizer=keras.optimizers.Adadelta(), metrics=[sacc])
history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(X_val, y_val))
score = model.evaluate(X_val, y_val, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
I have this keras model which has regression as the final layer instead of softmax. I am trying to predict a score (0.0 to 100.0) for a given image. The issue I am seeing is it is taking >10000 epochs to train and MSE decreases very slowly. Is that expected or is there something wrong with my code? Any tips or comments appreciated.
sgd
,adam
, orrmsprop
, those may converge faster thanAdadelta
– DJKoptimizer = Adam(lr=0.001)
-- Higher learning rates do faster training, but may be too much and simply "miss the point", diverging from good results. – Daniel Möller