I'm trying to implement a 3D facial recognition algorithm using CNNs with multiple classes. I have an image generator for rgb images, and an image generator for depth images (grayscale). As I have two distinct inputs, I made two different CNN models, one with shape=(height, width, 3) and another with shape=(height, width, 1). Independently I can fit the models with its respective image generator, but after concatenating the two branches and merging both image generators, I got this warning and error:
WARNING:tensorflow:Model was constructed with shape (None, 400, 400, 1) for input KerasTensor(type_spec=TensorSpec(shape=(None, 400, 400, 1), dtype=tf.float32, name='Depth_Input_input'), name='Depth_Input_input', description="created by layer 'Depth_Input_input'"), but it was called on an input with incompatible shape (None, None)
"ValueError: Input 0 of layer Depth_Input is incompatible with the layer: : expected min_ndim=4, found ndim=2. Full shape received: (None, None)"
What can i do to solve this? Thanks
Here is my code:
height=400
width=400
shape=(height,width)
# ########################### RGB ############################
model_rgb = tf.keras.models.Sequential()
model_rgb.add(Conv2D(filters=16, kernel_size=3, activation='relu', name="RGB_Input", input_shape=(height,width, 3)))
model_rgb.add(MaxPooling2D(pool_size=2))
model_rgb.add(Dropout(0.3))
model_rgb.add(Conv2D(filters=32, kernel_size=3, activation='relu'))
model_rgb.add(MaxPooling2D(pool_size=2))
model_rgb.add(Conv2D(filters=32, kernel_size=3, activation='relu'))
model_rgb.add(MaxPooling2D(pool_size=2))
model_rgb.add(Conv2D(filters=64, kernel_size=3, activation='relu'))
model_rgb.add(MaxPooling2D(pool_size=2))
model_rgb.add(Conv2D(filters=64, kernel_size=3, activation='relu'))
model_rgb.add(MaxPooling2D(pool_size=2))
#model_rgb.add(Dropout(0.2))
model_rgb.add(Conv2D(filters=128, kernel_size=3, activation='relu'))
model_rgb.add(MaxPooling2D(pool_size=2))
#model_rgb.add(Dropout(0.2))
model_rgb.add(Flatten())
model_rgb.add(Dense(units=512, activation='relu'))
model_rgb.add(Dropout(0.3))
model_rgb.add(Dense(units=128, activation='relu'))
model_rgb.add(Dropout(0.3))
# ########################### DEPTH ###########################
model_depth = tf.keras.models.Sequential()
model_depth.add(Conv2D(filters=16, kernel_size=3, activation='relu', name="Depth_Input", input_shape=(height, width, 1)))
model_depth.add(MaxPooling2D(pool_size=2))
model_depth.add(Dropout(0.3))
model_depth.add(Conv2D(filters=16, kernel_size=3, activation='relu'))
model_depth.add(MaxPooling2D(pool_size=2))
model_depth.add(Conv2D(filters=32, kernel_size=3, activation='relu'))
model_depth.add(MaxPooling2D(pool_size=2))
model_depth.add(Conv2D(filters=32, kernel_size=3, activation='relu'))
model_depth.add(MaxPooling2D(pool_size=2))
model_depth.add(Conv2D(filters=64, kernel_size=3, activation='relu'))
model_depth.add(MaxPooling2D(pool_size=2))
model_depth.add(Conv2D(filters=64, kernel_size=3, activation='relu'))
model_depth.add(MaxPooling2D(pool_size=2))
model_depth.add(Flatten())
model_depth.add(Dense(units=512, activation='relu'))
model_depth.add(Dropout(0.3))
model_depth.add(Dense(units=128, activation='relu'))
model_depth.add(Dropout(0.3))
#### Concatenating branches ####
merge = Concatenate()([model_rgb.output, model_depth.output])
merged_out = Dense(units=16, activation='relu')(merge)
merged_out = Dense(units=2, activation='softmax')(merged_out)
merged_model = Model([model_rgb.input, model_depth.input], merged_out)
merged_model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
history_merged = merged_model.fit(gen_flow,
epochs=70,
shuffle=True,
)
Here is the code for the generators:
train_datagen = ImageDataGenerator(rescale=1./255,
rotation_range=20,
width_shift_range=0.4,
height_shift_range=0.4,
shear_range=0.4,
zoom_range=0.4,
horizontal_flip=True,
fill_mode='nearest')
val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)
# ########################### RGB ###########################
print("RGB Generators: \n")
train_generator_rgb = train_datagen.flow_from_directory(directory=train_data_rgb, target_size=shape,
class_mode='categorical',
batch_size=16)
val_generator_rgb = val_datagen.flow_from_directory(directory=val_data_rgb,
target_size=shape,
class_mode='categorical',
batch_size=12)
# ########################### --- ###########################
# ########################### DEPTH ###########################
print("\n\nDepth Generators: \n")
train_generator_depth = train_datagen.flow_from_directory(directory=train_data_depth,
target_size=shape,
color_mode="grayscale",
class_mode='categorical',
batch_size=16)
val_generator_depth = val_datagen.flow_from_directory(directory=val_data_depth,
target_size=shape,
color_mode="grayscale",
class_mode='categorical',
batch_size=12)
# ########################### ----- ###########################
def gen_flow_for_two_inputs(X1, X2):
while True:
X1i = train_generator_rgb.next()
X2i = train_generator_depth.next()
yield [X1i[0], X2i[1]], X1i[1]
# Create generator
gen_flow = gen_flow_for_two_inputs(train_data_rgb, train_data_depth)
target_size=shape
parameter. You declared the same oneshape = (height, width)
for both the depth and RGB inputs, but the RGB input should also have a channel input. This would explain the error you're getting. TF can handle the batch dimension but the rest should fit. See this post for reference: stackoverflow.com/questions/64138041/… – Ophir S