Keras - Custom Metric producing different values in console vs. model.evaluate()

Question

I am doing a neural network regression problem using Keras with a TensorFlow backend. I need a RMS Error value to determine my model's performance. Unfortunately this does not exist as a out-of-the-box metric for Keras so I made a custom metric. So far so good. My problem though is that the value of my RMS Error in the console is different than when I use model.evaluate() and print to file. Any ideas as to why this is?

#! /usr/bin/env python

'''
Trains 7D QuaLiKiz-NN with a single output (efiTG)
'''

from __future__ import print_function

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization
from keras.optimizers import RMSprop, adam, Adam
from keras.initializers import TruncatedNormal
from keras import regularizers
from keras import backend as K
import pandas
import numpy
import sys
import os

# Define new Metric: rmse = Root Mean Square Error
def rmse(y_true, y_pred):
    return K.mean(K.sum(K.square( y_true-y_pred )))

# Gets the current file name. Useful for procedurally generating output/log             
files.
file_name =  os.path.basename(sys.argv[0][:-3])

# Define neural network parameters
batch_size = 10
#num_classes = 1
epochs = 1


# Load Data (which is in HDF5 or .h5 format)
store = pandas.HDFStore("unstable_training_gen2_7D_nions0_flat_filter7.h5")
target_df = store['efiITG_GB'].to_frame()  # This one is relatively easy to     
train
input_df = store['input']

# Puts inputs and outputs in the same pandas dataframe.
# Also only keeps overlapping entries.
joined_dataFrame = target_df.join(input_df)

# Remove all negative values
joined_dataFrame = joined_dataFrame[(joined_dataFrame['efiITG_GB']>0)
        & (joined_dataFrame['Ati']>0)
        & (joined_dataFrame['Ate']>0)
        & (joined_dataFrame['An']>0)
        & (joined_dataFrame['qx']>0)
        & (joined_dataFrame['smag']>0)
        & (joined_dataFrame['x']>0)
        & (joined_dataFrame['Ti_Te']>0)]

# Shuffles dataset
shuffled_joined_dataFrame =    
    joined_dataFrame.reindex(numpy.random.permutation(
                                                joined_dataFrame.index))

# Normalizes data (no standardization necessary due to dataset design)
shuffled_joined_dataFrame =     
    shuffled_joined_dataFrame/shuffled_joined_dataFrame \
    .max().astype(numpy.float64)

# Creates a pandas dataframe for the outputs
shuffled_clean_output_df = shuffled_joined_dataFrame['efiITG_GB']

# Creates a pandas dataframe for the inputs
shuffled_clean_input_df = shuffled_joined_dataFrame.drop('efiITG_GB',     
        axis=1)

# Creates training dataset (90% of total data) for outputs
y_train = shuffled_clean_output_df.iloc[:int(
    numpy.round(len(shuffled_clean_output_df)*0.9))]

# Creates training dataset (90% of total data) for inputs
x_train = shuffled_clean_input_df.iloc[:int(
    numpy.round(len(shuffled_clean_input_df)*0.9))]

# Creates testing dataset (10% of total data) for outputs
y_test = shuffled_clean_output_df.iloc[int(
    numpy.round(len(shuffled_clean_output_df)*0.9)):]

# Creates testing dataset (10% of total data) for inputs
x_test = shuffled_clean_input_df.iloc[int(
    numpy.round(len(shuffled_clean_input_df)*0.9)):]

# Deletes pandas dataframes that are no longer needed
del target_df, input_df

# Closes the HDFStore. This is good practice.
store.close()

# Define neural network
model = Sequential()
model.add(Dense(30,
        activation='tanh',
        kernel_initializer=TruncatedNormal(mean=0.0, stddev=0.05,     
        seed=None),
        kernel_regularizer=regularizers.l2(0.0000001),
        input_shape=(7,)))
model.add(Dense(30,
        activation='tanh',
        kernel_initializer=TruncatedNormal(mean=0.0, stddev=0.05, 
        seed=None),
        kernel_regularizer=regularizers.l2(0.0000001)))
model.add(Dense(30,
        activation='tanh',
        kernel_initializer=TruncatedNormal(mean=0.0, stddev=0.05, 
        seed=None),
        kernel_regularizer=regularizers.l2(0.0000001)))
model.add(Dense(1,
        activation='linear'))
#model.add(keras.layers.normalization.BatchNormalization())
model.summary()

# Add CallBacks (including TensorBoard)
tbCallBack = keras.callbacks.TensorBoard(
        log_dir='TensorBoard_logs/' + str(file_name), write_graph=True)
EarlyStoppingCallBack = keras.callbacks.EarlyStopping(
        monitor='val_loss', min_delta=0, patience=5, verbose=0, mode='auto')

model.compile(loss='mean_squared_error',   #categorical_crossentropy
              #optimizer='adam',
              optimizer=Adam(lr=0.001, beta_1=0.9, beta_2=0.999),
              metrics=['accuracy', "mae", "mean_squared_error", rmse])

history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    shuffle=True,
                    verbose=2,
                    validation_data=(x_test, y_test),
                    callbacks=[tbCallBack, EarlyStoppingCallBack])
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

print("score")
print(score)

print("model.metrics_names")
print(model.metrics_names)

# Create output file
OutputFile = open("./Loss-Values/" +str(file_name) +".txt", "w+")
OutputFile.write("Test loss: " + str(score[0]) + "\n")
OutputFile.write("Test accuracy: " + str(score[1]) + "\n")
OutputFile.write("val_mean_absolute_error: " +str(score[2]) + "\n")
OutputFile.write("val_mean_squared_error: " +str(score[3]) + "\n")
OutputFile.write("RMSE: " +str(score[4]) + "\n")
OutputFile.close()

# creates a HDF5 file 'my_model.h5'
model.save("./Saved-Networks/" + str(file_name) +".h5")

I'm just training for 1 epoch for debugging purposes. My console output is:

Epoch 1/1
 - 76s - loss: 0.0224 - acc: 1.5288e-06 - mean_absolute_error: 0.1145 -        
mean_squared_error: 0.0224 - rmse: 0.2237 - val_loss: 0.0116 - val_acc: 
0.0000e+00 - val_mean_absolute_error: 0.0787 - val_mean_squared_error:     
0.0116 - val_rmse: 0.1156
Test loss: 0.0115768997629
Test accuracy: 0.0
score
[0.011576899762892088, 0.0, 0.078728127306243859, 0.01156135053880431,     
0.36995773298057744]
model.metrics_names
['loss', 'acc', 'mean_absolute_error', 'mean_squared_error', 'rmse']
[Finished in 108.322s]

As you can see, the values for

['loss', 'acc', 'mean_absolute_error', 'mean_squared_error', 'rmse']

are fine and agree with the values from

Epoch 1/1 - 76s - loss: 0.0224 - acc: 1.5288e-06 - mean_absolute_error: 0.1145 -
mean_squared_error: 0.0224 - rmse: 0.2237 - val_loss: 0.0116 - val_acc: 0.0000e+00 - val_mean_absolute_error: 0.0787 - val_mean_squared_error:
0.0116 - val_rmse: 0.1156

whilst the value

['loss', 'acc', 'mean_absolute_error', 'mean_squared_error', 'rmse']

is different. Any ideas why this custom Keras metric rmse is behaving differently?

P.S. My output file reads:

Test loss: 0.0115768997629
Test accuracy: 0.0
val_mean_absolute_error: 0.0787281273062
val_mean_squared_error: 0.0115613505388
RMSE: 0.369957732981

Jakub Bartczuk Jakub Bartczuk · Accepted Answer · 2018-02-19T12:24:26

K.mean(K.sum(K.square( y_true-y_pred )))

Is not rmse. It's rather sum of squares.

You don't need the K.sum call. Also you need to take a square root.

Keras - Custom Metric producing different values in console vs. model.evaluate()

1 Answers