CNN Training in Keras freezes

Question

I am training a CNN model in Keras (Tensorflow backend). I have used on the fly augmentation with fit_generator(). The model takes images aa input and is supposed to predict the steering angle for a self driving car. The training just freezes after this point. I have tried changing the batch size, learning rate etc, but it doesn't work.

The training freezes at the end of first epoch.

Please help!

[BATCH_SIZE=32
INPUT_IMAGE_ROWS=160
INPUT_IMAGE_COLS=320
INPUT_IMAGE_CHANNELS=3
AUGMENTATION_NUM_BINS=200
NUM_EPOCHS=3
AUGMENTATION_BIN_MAX_PERC=0.5
AUGMENTATION_FACTOR=3

import csv
import cv2
import numpy as np
from random import shuffle
from sklearn.model_selection import train_test_split
import keras
from keras.callbacks import Callback
import math
from keras.preprocessing.image import *

print("\nLoading the dataset from file ...")

def load_dataset(file_path):
    dataset = \[\]
    with open(file_path) as csvfile:
        reader = csv.reader(csvfile)
        for line in reader:
            try:
                dataset.append({'center':line\[0\], 'left':line\[1\], 'right':line\[2\], 'steering':float(line\[3\]),
                            'throttle':float(line\[4\]), 'brake':float(line\[5\]), 'speed':float(line\[6\])})
            except:
                continue # some images throw error during loading
    return dataset

dataset = load_dataset('C:\\Users\\kiit1\\Documents\\steering angle prediction\\dataset_coldivision\\data\\driving_log.csv')
print("Loaded {} samples from file {}".format(len(dataset),'C:\\Users\\kiit1\\Documents\\steering angle prediction\\dataset_coldivision\\data\\driving_log.csv'))



print("Partioning the dataset:")

shuffle(dataset)

#partitioning data into 80% training, 19% validation and 1% testing

X_train,X_validation=train_test_split(dataset,test_size=0.2)
X_validation,X_test=train_test_split(X_validation,test_size=0.05)

print("X_train has {} elements.".format(len(X_train)))
print("X_validation has {} elements.".format(len(X_validation)))
print("X_test has {} elements.".format(len(X_test)))
print("Partitioning the dataset complete.")


def generate_batch_data(dataset, batch_size = 32):

    global augmented_steering_angles
    global epoch_steering_count
    global epoch_bin_hits
    batch_images = np.zeros((batch_size, INPUT_IMAGE_ROWS, INPUT_IMAGE_COLS, INPUT_IMAGE_CHANNELS))
    batch_steering_angles = np.zeros(batch_size)

    while 1:
        for batch_index in range(batch_size):

            # select a random image from the dataset
            image_index = np.random.randint(len(dataset))
            image_data = dataset\[image_index\]

            while 1:
                try:
                    image, steering_angle = load_and_augment_image(image_data)

                except:
                    continue


                bin_idx = int (steering_angle * AUGMENTATION_NUM_BINS / 2)

                if( epoch_bin_hits\[bin_idx\] < epoch_steering_count/AUGMENTATION_NUM_BINS*AUGMENTATION_BIN_MAX_PERC
                    or epoch_steering_count<500 ):
                    batch_images\[batch_index\] = image
                    batch_steering_angles\[batch_index\] = steering_angle
                    augmented_steering_angles.append(steering_angle)

                    epoch_bin_hits\[bin_idx\] = epoch_bin_hits\[bin_idx\] + 1
                    epoch_steering_count = epoch_steering_count + 1
                    break

        yield batch_images, batch_steering_angles

print("\nTraining the model ...")

class LifecycleCallback(keras.callbacks.Callback):

    def on_epoch_begin(self, epoch, logs={}):
        pass

    def on_epoch_end(self, epoch, logs={}):
        global epoch_steering_count
        global epoch_bin_hits
        global bin_range
        epoch_steering_count = 0
        epoch_bin_hits = {k:0 for k in range(-bin_range, bin_range)}

    def on_batch_begin(self, batch, logs={}):
        pass

    def on_batch_end(self, batch, logs={}):
        self.losses.append(logs.get('loss'))

    def on_train_begin(self, logs={}):
        print('Beginning training')
        self.losses = \[\]

    def on_train_end(self, logs={}):
        print('Ending training')

# Compute the correct number of samples per epoch based on batch size
def compute_samples_per_epoch(array_size, batch_size):
    num_batches = array_size / batch_size
    samples_per_epoch = math.ceil(num_batches)
    samples_per_epoch = samples_per_epoch * batch_size
    return samples_per_epoch


def load_and_augment_image(image_data, side_camera_offset=0.2):

    # select a value between 0 and 2 to swith between center, left and right image
    index = np.random.randint(3)
    if (index==0):
        image_file = image_data\['left'\].strip()
        angle_offset = side_camera_offset
    elif (index==1):
        image_file = image_data\['center'\].strip()
        angle_offset = 0.
    elif (index==2):
        image_file = image_data\['right'\].strip()
        angle_offset = - side_camera_offset

    steering_angle = image_data\['steering'\] + angle_offset

    image = cv2.imread(image_file)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # apply a misture of several augumentation methods
    image, steering_angle = random_transform(image, steering_angle)

    return image, steering_angle


augmented_steering_angles = \[\]

epoch_steering_count = 0
bin_range = int(AUGMENTATION_NUM_BINS / 4 * 3)
epoch_bin_hits = {k:0 for k in range(-bin_range, bin_range)}



#flips image about y-axis
def horizontal_flip(image,steering_angle):

    flipped_image=cv2.flip(image,1);
    steering_angle=-steering_angle
    return flipped_image,steering_angle

def translate(image,steering_angle,width_shift_range=50.0,height_shift_range=5.0):

    tx = width_shift_range * np.random.uniform() - width_shift_range / 2
    ty = height_shift_range * np.random.uniform() - height_shift_range / 2

     # new steering angle
    steering_angle += tx / width_shift_range * 2 * 0.2

    transformed_matrix=np.float32(\[\[1,0,tx\],\[0,1,ty\]\])
    rows,cols=(image.shape\[0\],image.shape\[1\])

    translated_image=cv2.warpAffine(image,transformed_matrix,(cols,rows))
    return translated_image,steering_angle

def brightness(image,bright_increase=None):

    if(image.shape\[2\]>1):
        image_hsv=cv2.cvtColor(image,cv2.COLOR_RGB2HSV)
    else:
        image_hsv=image

    if bright_increase:
        image_hsv\[:,:,2\] += bright_increase
    else:
        bright_increase = int(30 * np.random.uniform(-0.3,1))
        image_hsv\[:,:,2\] = image\[:,:,2\] + bright_increase

    image = cv2.cvtColor(image_hsv, cv2.COLOR_HSV2RGB)
    return image

def rotation(image,rotation_range=5):

    image=random_rotation(image,rotation_range);
    return image


# Shift range for each channels
def channel_shift(image, intensity=30, channel_axis=2):

    image = random_channel_shift(image, intensity, channel_axis)
    return image

# Crop and resize the image
def crop_resize_image(image, cols=INPUT_IMAGE_COLS, rows=INPUT_IMAGE_ROWS, top_crop_perc=0.1, bottom_crop_perc=0.2):

    height = image.shape\[0\]
    width= image.shape\[1\]

    # crop top and bottom
    top_rows = int(height*top_crop_perc)
    bottom_rows = int(height*bottom_crop_perc)
    image = image\[top_rows:height-bottom_rows, 0:width\]

    # resize to the final sizes even the aspect ratio is destroyed
    image = cv2.resize(image, (cols, rows), interpolation=cv2.INTER_LINEAR)
    return image


# Apply a sequence of random tranformations for a better generalization and to prevent overfitting
def random_transform(image, steering_angle):

    # all further transformations are done on the smaller image to reduce the processing time
    image = crop_resize_image(image)

    # every second image is flipped horizontally
    if np.random.random() < 0.5:
        image, steering_angle = horizontal_flip(image, steering_angle)

    image, steering_angle = translate(image, steering_angle)
    image = rotation(image)
    image = brightness(image)
    image = channel_shift(image)

    return img_to_array(image), steering_angle



from keras.models import Sequential, Model
from keras.layers.core import Lambda, Dense, Activation, Flatten, Dropout
from keras.layers.convolutional import Cropping2D, Convolution2D
from keras.layers.advanced_activations import ELU
from keras.layers.noise import GaussianNoise
from keras.optimizers import Adam


print("\nBuilding and compiling the model ...")

model = Sequential()
model.add(Lambda(lambda x: (x / 127.5) - 1.0, input_shape=(INPUT_IMAGE_ROWS, INPUT_IMAGE_COLS, INPUT_IMAGE_CHANNELS)))
    # Conv Layer1 of 16 filters having size(8, 8) with strides (4,4)
model.add(Convolution2D(16, 8, 8, subsample=(4, 4), border_mode="same"))
model.add(ELU())
    # Conv Layer1 of 32 filters having size(5, 5) with strides (2,2)
model.add(Convolution2D(32, 5, 5, subsample=(2, 2), border_mode="same"))
model.add(ELU())
    # Conv Layer1 of 64 filters having size(5, 5) with strides (2,2)
model.add(Convolution2D(64, 5, 5, subsample=(2, 2), border_mode="same"))
model.add(Flatten())
model.add(Dropout(.5))
model.add(ELU())
model.add(Dense(512))
model.add(Dropout(.5))
model.add(ELU())
model.add(Dense(1))

model.summary()
adam = Adam(lr=0.0001)
model.compile(loss='mse', optimizer=adam)



lifecycle_callback = LifecycleCallback()

train_generator = generate_batch_data(X_train, BATCH_SIZE)
validation_generator = generate_batch_data(X_validation, BATCH_SIZE)

samples_per_epoch = compute_samples_per_epoch((len(X_train)*AUGMENTATION_FACTOR), BATCH_SIZE)
nb_val_samples = compute_samples_per_epoch((len(X_validation)*AUGMENTATION_FACTOR), BATCH_SIZE)

history = model.fit_generator(train_generator,
                              validation_data = validation_generator,
                              samples_per_epoch = ((len(X_train) // BATCH_SIZE ) * BATCH_SIZE) * 2,
                              nb_val_samples = ((len(X_validation) // BATCH_SIZE ) * BATCH_SIZE) * 2,
                              nb_epoch = NUM_EPOCHS, verbose=1,
                              )

print("\nTraining the model ended.")][1]

Autonomous Autonomous · Accepted Answer · 2018-04-24T21:04:54

You have a weird structure for the data generator and that is most likely causing this issue, though I cannot be completely sure.

You structure is as follows:

while 1:
    ....
    for _ in range(batch_size):
        randomly select an image  # this is inefficient, see below for comments
        while 1:
            process image
            if epoch is not done:
                collect images in a list
                break
    yield ...

Now,

Do not choose images randomly at each iteration. Instead shuffle your dataset once at the starting of each epoch and then choose sequentially.
As far as I understood, if epoch is not done, then break is a typo. Did you mean if epoch is not done then collect images, otherwise break? Your break is inside the if which means when it enters if for the first time, it will come out of the innermost while 1 loop. Surely not what you intend to do, right?
The yield is outside the for loop. You should yield each batch, so if for is iterating over batches, then yield should be inside for.

The structure of a basic data generator should be:

while 1:
    shuffle entire dataset once  # not applicable for massive datasets
    for _ in range(n_batches_per_epoch):
        get a data batch

        Optionally, do some preprocessing  # preferably on the entire batch, 
        not one by one, you could also preprocess the entire dataset if its simple
        enough, such as mean subtraction.

        yield batches, labels

I would suggest you to again write the data generator. You could see the myGenerator() function on this page for a basic data generator. Once you write the generator, then test it as a stand-alone function to make sure it outputs the data indefinitely and keeps the track of epochs.

CNN Training in Keras freezes

2 Answers