We tried the transfer learning with Keras ResNet50 application (Tensorflow as backend) with our own dataset for 2000 classes with 14000 images as training set and 5261 images as validation set. The training results we got are much different in both loss and accuracy for training vs validation. Then, we tried to use the same images for both training and validation, i.e. trained with 14000 images and validated with the same 14000 images, training results for the attempt are similar, i.e. high training accuracy and low validation accuracy.
Keras version: 2.1.6
Tensorflow version: 1.8.0
Code (same dataset for both training and validation) as below,
from __future__ import print_function
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input, decode_predictions
from keras.models import *
from keras.layers import *
from keras.callbacks import *
from keras.preprocessing.image import ImageDataGenerator
from datetime import datetime
from keras.optimizers import SGD
import numpy as np
batch_size = 28 # tweak to your GPUs capacity
img_height = 224 # ResNetInceptionv2 & Xception like 299, ResNet50 & VGG like 224
img_width = img_height
channels = 3
input_shape = (img_height, img_width, channels)
best_model = 'best_model.h5'
train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
train_generator = train_datagen.flow_from_directory(
'data/train', # this is the target directory
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='categorical')
classes = len(train_generator.class_indices)
n_of_train_samples = train_generator.samples
callbacks = [ModelCheckpoint(filepath=best_model, verbose=0, save_best_only=True),
EarlyStopping(monitor='val_acc', patience=3, verbose=0)]
base_model = ResNet50(input_shape=input_shape, weights='imagenet', include_top=False)
# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional ResNet50 layers
for layer in base_model.layers:
layer.trainable = False
pool_layer = [layer for layer in base_model.layers if layer.name == 'avg_pool'][0]
base_model = Model(base_model.input, pool_layer.input)
base_model.layers.pop()
dropout=[.25,.25]
dense=1024
last = base_model.output
a = MaxPooling2D(pool_size=(7,7),name='maxpool')(last)
b = AveragePooling2D(pool_size=(7,7),name='avgpool')(last)
x = concatenate([a,b], axis = 1)
x = Flatten()(x)
x = Dense(dense, init='uniform', activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(dropout[0])(x)
x = Dense(classes, activation='softmax')(x)
model = Model(base_model.input, outputs=x)
print("Start time: %s" % str(datetime.now()))
# compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer=SGD(lr=1e-2, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy'])
# train the model on the new data for a few epochs
model.fit_generator(
train_generator,
steps_per_epoch=n_of_train_samples//batch_size,
epochs=3,
validation_data=train_generator,
validation_steps=n_of_train_samples//batch_size,
callbacks=callbacks)
print("End time: %s" % str(datetime.now()))
Training result as below
Found 14306 images belonging to 2000 classes.
Start time: 2018-05-21 10:51:34.459545
Epoch 1/3
510/510 [==============================] - 10459s 21s/step - loss: 5.6433 - acc: 0.1538 - val_loss: 9.8465 - val_acc: 0.0024
Epoch 2/3
510/510 [==============================] - 10258s 20s/step - loss: 1.3632 - acc: 0.8550 - val_loss: 10.3264 - val_acc: 0.0044
Epoch 3/3
510/510 [==============================] - 63640s 125s/step - loss: 0.2367 - acc: 0.9886 - val_loss: 10.4537 - val_acc: 0.0034
End time: 2018-05-22 10:17:42.028052
We understood that we shouldn't use the same dataset for both training and validation but we just could not understand why Keras give us high differences in both loss and accuracy for training vs validation when the dataset are the same for both training and validation.
ps. We tried the same dataset, i.e 2000 classes with 14000 images as training set and 5261 images as validation set with fast.ai library ResNet50 and the training loss and validation loss are not much difference. Codes and results with fast.ai library as below
from fastai.imports import *
from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *
from datetime import datetime
PATH = "data/"
sz=224
arch=resnet50
bs=28
tfms = tfms_from_model(arch, sz)
data = ImageClassifierData.from_paths(PATH, tfms=tfms, bs=bs)
learn = ConvLearner.pretrained(arch, data, precompute=False)
print("Start time: %s" % str(datetime.now()))
learn.fit(1e-2, 5)
print("End time: %s" % str(datetime.now()))
Start time: 2018-05-02 18:08:51.644750
0%| | 1/487 [00:14<2:00:00, 14.81s/it, loss=tensor(7.5704)]
[0. 6.13229 5.2504 0.26458]
[1. 3.70098 2.74378 0.6752 ]
[2. 1.80197 1.08414 0.88106]
[3. 0.83221 0.50391 0.9424 ]
[4. 0.45565 0.31056 0.95554]
End time: 2018-05-03 00:27:13.147758