Keras multi-class classification loss is too high

Question

I am training a model in multi class classification to generate texts. Below is a sample of the dataset.

state	district	month	rainfall	max_temp	min_temp	max_rh	min_rh	wind_speed	advice
Orissa	Kendrapada	february	0.0	34.6	19.4	88.2	29.6	12.0	chances of foot rot disease in paddy crop; apply urea at 3 weeks after transplanting at active tillering stage for paddy;......
Jharkhand	Saraikela Kharsawan	february	0	35.2	16.6	29.4	11.2	3.6	provide straw mulch and go for intercultural operations to avoid moisture losses from soil; chance of leaf blight disease in potato crop; .......

Below is my code through which the model is made.

def create_model():
    input1 = tf.keras.layers.Input(shape=(1,), name='state')
    input2 = tf.keras.layers.Input(shape=(1,), name='district')
    input3 = tf.keras.layers.Input(shape=(1,), name='month')
    input4 = tf.keras.layers.Input(shape=(1,), name='rainfall')
    input5 = tf.keras.layers.Input(shape=(1,), name='max_temp')
    input6 = tf.keras.layers.Input(shape=(1,), name='min_temp')
    input7 = tf.keras.layers.Input(shape=(1,), name='max_rh')
    input8 = tf.keras.layers.Input(shape=(1,), name='min_rh')
    input9 = tf.keras.layers.Input(shape=(1,), name='wind_speed')
    xz= [input1, input2, input3, input4, input5, input6, input7, input8, input9]
    x1= layers.Dense(128, activation='relu')(input1)
    x2=layers.Dense(128, activation='relu')(input2)
    x3=layers.Dense(128, activation='relu')(input3)
    x4=layers.Dense(128, activation='relu')(input4)
    x5=layers.Dense(128, activation='relu')(input5)
    x6=layers.Dense(128, activation='relu')(input6)
    x7=layers.Dense(128, activation='relu')(input7)
    x8=layers.Dense(128, activation='relu')(input8)
    x9=layers.Dense(128, activation='relu')(input9)
    base_model =  layers.Add()([x1,x2, x3, x4, x5, x6, x7, x8, x9])
    first_output = layers.Dense(30, name='output_1')(base_model) 
    second_output = layers.Dense(30, name='output_2')(base_model)
    third_output = layers.Dense(30, name='output_3')(base_model)
    fourth_output = layers.Dense(30, name='output_4')(base_model)
    fifth_output = layers.Dense(30, name='output_5')(base_model)
    models = tf.keras.Model(inputs=xz,
                  outputs=[first_output, second_output, third_output, fourth_output, fifth_output])
    return models

The code for my model compilation.

model=create_model()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
model.compile(optimizer=optimizer,
              loss={'output_1': 'categorical_crossentropy', 
                    'output_2': 'categorical_crossentropy',
                    'output_3': 'categorical_crossentropy',
                    'output_4': 'categorical_crossentropy',
                    'output_5': 'categorical_crossentropy'},
              metrics={'output_1':tf.keras.metrics.Accuracy(),
                       'output_2':tf.keras.metrics.Accuracy(),
                       'output_3':tf.keras.metrics.Accuracy(),
                       'output_4':tf.keras.metrics.Accuracy(),
                       'output_5':tf.keras.metrics.Accuracy()})

Finally, the problem I am facing, the loss and accuracy. Loss is too high.

Epoch 499/500
2/2 [==============================] - 0s 11ms/step - loss: 66362.0130 - output_1_loss: 5827.9458 - output_2_loss: 10478.4935 - output_3_loss: 16566.5957 - output_4_loss: 16831.8887 - output_5_loss: 16657.0967 - output_1_accuracy: 0.0000e+00 - output_2_accuracy: 0.0000e+00 - output_3_accuracy: 0.0000e+00 - output_4_accuracy: 0.0000e+00 - output_5_accuracy: 0.0000e+00
Epoch 500/500
2/2 [==============================] - 0s 11ms/step - loss: 66362.0130 - output_1_loss: 5827.9458 - output_2_loss: 10478.4935 - output_3_loss: 16566.5957 - output_4_loss: 16831.8887 - output_5_loss: 16657.0967 - output_1_accuracy: 0.0000e+00 - output_2_accuracy: 0.0000e+00 - output_3_accuracy: 0.0000e+00 - output_4_accuracy: 0.0000e+00 - output_5_accuracy: 0.0000e+00

Kindly help me and correct me where I am wrong. I am total newbie to this field.

Alternative Model Update

model = tf.keras.Sequential([
  feature_layer,
  layers.Dense(128, activation='relu'),
  layers.Dense(128, activation='relu'),
  layers.Dropout(.1),
  layers.Dense(150),
])
opt = Adam(learning_rate=0.01)
model.compile(optimizer=opt,
              loss='mean_squared_error',
              metrics=['accuracy'])

It have the [5,30] shaped input reshaped to [150].

What classes are you trying to predict? Only possible classes I see are state, district and month. — yudhiesh
have you tried to reduce the learning rate? maybe it is case of exploding gradient — Vinson Ciawandy
The classes I am trying to predict are the advices which are in shape of [5,30]. Actually further in my code I had separated the [5,30] single column into 5 colums each with a tensor of shape [30]. — Santosh Kumar
@yudhiesh Well, no they are not one hot encoded. I used the Keras text preprocessing's Tokenizer and pad_sequences. — Santosh Kumar

Experience_In_AI Experience_In_AI · Accepted Answer · 2021-04-26T04:33:16

To enhance the model structure please see the following example code, including a "model_simple" alternative for the original network. Train the both with the same input data, vary the structure of the "model_simple" and find out what structure results in the best accuracy.

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers


def create_model():
    input1 = tf.keras.layers.Input(shape=(1,), name='state')
    input2 = tf.keras.layers.Input(shape=(1,), name='district')
    input3 = tf.keras.layers.Input(shape=(1,), name='month')
    input4 = tf.keras.layers.Input(shape=(1,), name='rainfall')
    input5 = tf.keras.layers.Input(shape=(1,), name='max_temp')
    input6 = tf.keras.layers.Input(shape=(1,), name='min_temp')
    input7 = tf.keras.layers.Input(shape=(1,), name='max_rh')
    input8 = tf.keras.layers.Input(shape=(1,), name='min_rh')
    input9 = tf.keras.layers.Input(shape=(1,), name='wind_speed')
    xz= [input1,input2,input3,input4,input5,input6,input7,input8,input9]
    x1= layers.Dense(128, activation='relu')(input1)
    x2=layers.Dense(128, activation='relu')(input2)
    x3=layers.Dense(128, activation='relu')(input3)
    x4=layers.Dense(128, activation='relu')(input4)
    x5=layers.Dense(128, activation='relu')(input5)
    x6=layers.Dense(128, activation='relu')(input6)
    x7=layers.Dense(128, activation='relu')(input7)
    x8=layers.Dense(128, activation='relu')(input8)
    x9=layers.Dense(128, activation='relu')(input9)
    base_model =  layers.Add()([x1,x2, x3, x4, x5, x6, x7, x8, x9])
    first_output = layers.Dense(30,name='output_1')(base_model)
    second_output = layers.Dense(30,name='output_2')(base_model)
    third_output= layers.Dense(30,name='output_3')(base_model)
    fourth_output= layers.Dense(30,name='output_4')(base_model)
    fifth_output = layers.Dense(30,name='output_5')(base_model)
    models = tf.keras.Model(inputs=xz,
                  outputs=[first_output,second_output,third_output,fourth_output,fifth_output])
    return models

def create_model_simple():
    input1 = tf.keras.layers.Input(shape=(1,), name='state')
    input2 = tf.keras.layers.Input(shape=(1,), name='district')
    input3 = tf.keras.layers.Input(shape=(1,), name='month')
    input4 = tf.keras.layers.Input(shape=(1,), name='rainfall')
    input5 = tf.keras.layers.Input(shape=(1,), name='max_temp')
    input6 = tf.keras.layers.Input(shape=(1,), name='min_temp')
    input7 = tf.keras.layers.Input(shape=(1,), name='max_rh')
    input8 = tf.keras.layers.Input(shape=(1,), name='min_rh')
    input9 = tf.keras.layers.Input(shape=(1,), name='wind_speed')
    #xz= [input1,input2,input3,input4,input5,input6,input7,input8,input9]
    #x1=layers.Dense(128, activation='relu')(input1)
    #x2=layers.Dense(128, activation='relu')(input2)
    #x3=layers.Dense(128, activation='relu')(input3)
    #x4=layers.Dense(128, activation='relu')(input4)
    #x5=layers.Dense(128, activation='relu')(input5)
    #x6=layers.Dense(128, activation='relu')(input6)
    #x7=layers.Dense(128, activation='relu')(input7)
    #x8=layers.Dense(128, activation='relu')(input8)
    #x9=layers.Dense(128, activation='relu')(input9)
    yhdistelma=layers.concatenate([input1,input2, input3, input4, input5, input6, input7, input8, input9])
    #base_model =  layers.Add()([x1,x2, x3, x4, x5, x6, x7, x8, x9])
    first_output = layers.Dense(30,name='output_1')(yhdistelma)
    second_output = layers.Dense(30,name='output_2')(yhdistelma)
    third_output= layers.Dense(30,name='output_3')(yhdistelma)
    fourth_output= layers.Dense(30,name='output_4')(yhdistelma)
    fifth_output = layers.Dense(30,name='output_5')(yhdistelma)
    models = tf.keras.Model(inputs=[input1,input2,input3,input4,input5, input6, input7, input8, input9],
                  outputs=[first_output,second_output,third_output,fourth_output,fifth_output])
    return models

model=create_model()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
model.compile(optimizer=optimizer,
              loss={'output_1': 'categorical_crossentropy',
                    'output_2': 'categorical_crossentropy',
                    'output_3': 'categorical_crossentropy',
                    'output_4': 'categorical_crossentropy',
                    'output_5': 'categorical_crossentropy'},
              metrics={'output_1':tf.keras.metrics.Accuracy(),
                       'output_2':tf.keras.metrics.Accuracy(),
                       'output_3':tf.keras.metrics.Accuracy(),
                       'output_4':tf.keras.metrics.Accuracy(),
                       'output_5':tf.keras.metrics.Accuracy()})

model.summary()

keras.utils.plot_model(model,'model_structure.png',show_dtype=True)


#Let's create a more simple model version:
model_simple=create_model_simple()

model.compile(optimizer=optimizer,
              loss={'output_1': 'categorical_crossentropy',
                    'output_2': 'categorical_crossentropy',
                    'output_3': 'categorical_crossentropy',
                    'output_4': 'categorical_crossentropy',
                    'output_5': 'categorical_crossentropy'},
              metrics={'output_1':tf.keras.metrics.Accuracy(),
                       'output_2':tf.keras.metrics.Accuracy(),
                       'output_3':tf.keras.metrics.Accuracy(),
                       'output_4':tf.keras.metrics.Accuracy(),
                       'output_5':tf.keras.metrics.Accuracy()})

model_simple.summary()

keras.utils.plot_model(model_simple,'model_simple_structure.png',show_dtype=True)

...especially, please note that the key difference between your original and more simple model is that "Add" has been replaced with "Concatenate". The "Add" results in output size of same than one of its inputs, but the size of "Concatenate" output is much much higher, that kind of things may have an effect for the performance.

Keras multi-class classification loss is too high

1 Answers