My input is of shape (1, 12000, 250, 150, 3) with labels as (1, 12000, 2) for the CNN. In other words, I am training a CNN on 250x150x3 images with 2 classes; [1,0] or [0,1].
This is ultimately to create a bot to play flappy bird. I have been told that adding LSTMs to classify a few frame concurrently is the way to go. So far I got to 0.984 val_acc with the following purely CNN architecture.
model.add(Conv2D(32, 3, 3, border_mode='same', input_shape=(250,150,3), activation='relu'))
model.add(Conv2D(32, 3, 3, border_mode='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, 3, 3, border_mode='same', activation='relu'))
model.add(Conv2D(64, 3, 3, border_mode='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, 3, 3, border_mode='same', activation='relu'))
model.add(Conv2D(128, 3, 3, border_mode='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(256, 3, 3, border_mode='same', activation='relu'))
model.add(Conv2D(256, 3, 3, border_mode='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
#model.add(LSTM(100, input_shape=(32, 32, 19), return_sequences=True))
model.add(Dense(2))
model.add(Activation('sigmoid'))
model.summary()
The accuracy:
Epoch 15/100
12800/12800 [==============================] - 89s 7ms/step - loss: 0.0390 - acc: 0.9889 - val_loss: 0.1422 - val_acc: 0.9717
Epoch 16/100
12800/12800 [==============================] - 89s 7ms/step - loss: 0.0395 - acc: 0.9883 - val_loss: 0.0917 - val_acc: 0.9821ss: - ETA: 1s - loss: 0.0399 - acc:
Epoch 17/100
12800/12800 [==============================] - 89s 7ms/step - loss: 0.0357 - acc: 0.9902 - val_loss: 0.1383 - val_acc: 0.9816
Epoch 18/100
12800/12800 [==============================] - 89s 7ms/step - loss: 0.0452 - acc: 0.9871 - val_loss: 0.1153 - val_acc: 0.9750
Epoch 19/100
12800/12800 [==============================] - 90s 7ms/step - loss: 0.0417 - acc: 0.9892 - val_loss: 0.1641 - val_acc: 0.9668
Epoch 20/100
12800/12800 [==============================] - 90s 7ms/step - loss: 0.0339 - acc: 0.9904 - val_loss: 0.0927 - val_acc: 0.9840
I have tried add a LSTM layer but I'm not sure what is going wrong:
ValueError Traceback (most recent call last)
<ipython-input-6-59e402ac3b8a> in <module>
26 model.add(Dropout(0.5))
27
---> 28 model.add(LSTM(100, input_shape=(32, 19), return_sequences=True))
29
30 model.add(Dense(2))
E:\Applications\Anaconda3\envs\pygpu\lib\site-packages\keras\engine\sequential.py in add(self, layer)
179 self.inputs = network.get_source_inputs(self.outputs[0])
180 elif self.outputs:
--> 181 output_tensor = layer(self.outputs[0])
182 if isinstance(output_tensor, list):
183 raise TypeError('All layers in a Sequential model '
E:\Applications\Anaconda3\envs\pygpu\lib\site-packages\keras\layers\recurrent.py in __call__(self, inputs, initial_state, constants, **kwargs)
530
531 if initial_state is None and constants is None:
--> 532 return super(RNN, self).__call__(inputs, **kwargs)
533
534 # If any of `initial_state` or `constants` are specified and are Keras
E:\Applications\Anaconda3\envs\pygpu\lib\site-packages\keras\engine\base_layer.py in __call__(self, inputs, **kwargs)
412 # Raise exceptions in case the input is not compatible
413 # with the input_spec specified in the layer constructor.
--> 414 self.assert_input_compatibility(inputs)
415
416 # Collect input shapes to build layer.
E:\Applications\Anaconda3\envs\pygpu\lib\site-packages\keras\engine\base_layer.py in assert_input_compatibility(self, inputs)
309 self.name + ': expected ndim=' +
310 str(spec.ndim) + ', found ndim=' +
--> 311 str(K.ndim(x)))
312 if spec.max_ndim is not None:
313 ndim = K.ndim(x)
ValueError: Input 0 is incompatible with layer lstm_2: expected ndim=3, found ndim=2
Keras docs says the arguments for LSTM are (units, input shape) and so on. I also read somewhere that TimeDistributed() is no longer needed so I didnt include it. Did I make a mistake in calculating the input shape for LSTM or am I missing something else completely?
Edit 1: I have removed flatten() layer and moved LSTM layer to right after conv layers, before fc layers. I have also added a reshape() so as to reshape the 4 dim output of the 4th conv layer to 3 dim which can then be input to the LSTM layer.
model.add(Conv2D(32, 3, 3, border_mode='same', input_shape=(250,150,3), activation='relu'))
model.add(Conv2D(32, 3, 3, border_mode='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
output_1 = model.output_shape
model.add(Conv2D(64, 3, 3, border_mode='same', activation='relu'))
model.add(Conv2D(64, 3, 3, border_mode='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
output_2 = model.output_shape
model.add(Conv2D(128, 3, 3, border_mode='same', activation='relu'))
model.add(Conv2D(128, 3, 3, border_mode='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
output_3 = model.output_shape
model.add(Conv2D(256, 3, 3, border_mode='same', activation='relu'))
model.add(Conv2D(256, 3, 3, border_mode='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
output_4 = model.output_shape
model.add(Reshape((15, 9)))
output_5 = model.output_shape
model.add(LSTM(100, input_shape=(15, 9, 256), return_sequences=True))
These are the shapes of every output:
Conv_1: (None, 125, 75, 32)
Conv_2: (None, 62, 37, 64)
Conv_3: (None, 31, 18, 128)
Conv_4: (None, 15, 9, 256)
The following occurs when I tried to reshape the conv_4 so as to get a 3 dim input into LSTM:
ValueError Traceback (most recent call last)
<ipython-input-21-7f5240e41ae4> in <module>
22 output_4 = model.output_shape
23
---> 24 model.add(Reshape((15, 9)))
25 output_5 = model.output_shape
26 model.add(LSTM(100, input_shape=(15, 9, 256), return_sequences=True))
E:\Applications\Anaconda3\envs\pygpu\lib\site-packages\keras\engine\sequential.py in add(self, layer)
179 self.inputs = network.get_source_inputs(self.outputs[0])
180 elif self.outputs:
--> 181 output_tensor = layer(self.outputs[0])
182 if isinstance(output_tensor, list):
183 raise TypeError('All layers in a Sequential model '
E:\Applications\Anaconda3\envs\pygpu\lib\site-packages\keras\engine\base_layer.py in __call__(self, inputs, **kwargs)
472 if all([s is not None
473 for s in to_list(input_shape)]):
--> 474 output_shape = self.compute_output_shape(input_shape)
475 else:
476 if isinstance(input_shape, list):
E:\Applications\Anaconda3\envs\pygpu\lib\site-packages\keras\layers\core.py in compute_output_shape(self, input_shape)
396 # input shape known? then we can compute the output shape
397 return (input_shape[0],) + self._fix_unknown_dimension(
--> 398 input_shape[1:], self.target_shape)
399
400 def call(self, inputs):
E:\Applications\Anaconda3\envs\pygpu\lib\site-packages\keras\layers\core.py in _fix_unknown_dimension(self, input_shape, output_shape)
384 output_shape[unknown] = original // known
385 elif original != known:
--> 386 raise ValueError(msg)
387
388 return tuple(output_shape)
ValueError: total size of new array must be unchanged
Any help is greatly appreciated.