I am trying to train a Tensorflow Object Detection model (pretrained on COCO) on my own dataset. I have followed this guide: https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/running_pets.md And I think I have correctly converted my data to tfrecord files. As my work computer is on windows and not powerful enough, I must run the training on the cloud, so I decided to use Google's Cloud ML Engine. I have managed to make the job run, however I only get a repeated warning throughout the training and evaluation :
I should be seeing the result of each step but there is only those messages, until the end of the job. I have tried with mobilenet and faster_rcnn. I have no idea where this comes from, if it's a problem with my tfrecords or with Cloud ML Engine or anything else. Would anyone have an idea ?
Here is the code that creates my TFRecord files:
import tensorflow as tf
from PIL import Image
import json
import os
import tensorflow as tf
from object_detection.utils import dataset_util
flags = tf.app.flags
flags.DEFINE_string('output_path', 'data/tfrecords/train.record', 'Path to output TFRecord')
flags.DEFINE_string('dataset_path', 'data/train_test/', 'Path to dataset')
flags.DEFINE_string('images_directory', 'formatted_train/', 'directory of images inside dataset directory')
flags.DEFINE_string('boxes_filepath', 'formatted_train.json', 'json filepath inside dataset directory')
FLAGS = flags.FLAGS
IMAGES_DIRECTORY = FLAGS.dataset_path + FLAGS.images_directory
BOXES_PATH = FLAGS.dataset_path + FLAGS.boxes_filepath
label_map = {
"signature": 1,
"paraphe": 2,
"coche": 3
}
def create_tf_example(frame, image_name):
with Image.open(IMAGES_DIRECTORY + image_name) as img:
img_weight, img_height = img.size
height = img_height # Image height
width = img_weight # Image width
filename = image_name # Filename of the image. Empty if image is not from file
encoded_image_data = tf.gfile.FastGFile(IMAGES_DIRECTORY + filename, 'rb').read()
image_format = b'png' # b'jpeg' or b'png'
print("Width: {}".format(width))
print("Height: {}".format(height))
xmins = [] # List of normalized left x coordinates in bounding box (1 per box)
xmaxs = [] # List of normalized right x coordinates in bounding box
# (1 per box)
ymins = [] # List of normalized top y coordinates in bounding box (1 per box)
ymaxs = [] # List of normalized bottom y coordinates in bounding box
# (1 per box)
classes_text = [] # List of string class name of bounding box (1 per box)
classes = [] # List of integer class id of bounding box (1 per box)
for box in frame:
current_xmin = box["x1"] / box["width"]
current_xmax = box["x2"] / box["width"]
current_ymin = box["y1"] / box["height"]
current_ymax = box["y2"] / box["height"]
current_class_text = box["tags"][0]
current_class = label_map[current_class_text]
print("Processing bounding box...")
print("Xmin: {}".format(current_xmin))
print("Xmax: {}".format(current_xmax))
print("ymin: {}".format(current_ymin))
print("ymax: {}".format(current_ymax))
print("Class text: {}".format(current_class_text))
print("Class: {}".format(current_class))
print()
xmins.append(current_xmin)
xmaxs.append(current_xmax)
ymins.append(current_ymin)
ymaxs.append(current_ymax)
classes_text.append(current_class_text.encode("utf8"))
classes.append(current_class)
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename.encode("utf8")),
'image/source_id': dataset_util.bytes_feature(filename.encode("utf8")),
'image/encoded': dataset_util.bytes_feature(encoded_image_data),
'image/format': dataset_util.bytes_feature(image_format),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example
def main(_):
writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
image_list = os.listdir(IMAGES_DIRECTORY)
full_frame_data = json.load(open(BOXES_PATH, "r"))
examples = []
for img_index, img_name in enumerate(image_list):
str_img_index = str(img_index)
if str_img_index in full_frame_data["frames"].keys():
print("Processing image {} with index {}...".format(img_name, img_index))
frame_data = full_frame_data["frames"][str_img_index]
print(frame_data)
current_tf_example = create_tf_example(frame_data, img_name)
writer.write(current_tf_example.SerializeToString())
writer.close()
if __name__ == '__main__':
tf.app.run()
And here is my config file (for the mobilenet). I have tried with 3 & the original 90 for num_classes (not sure if I should change that, I want to predict 3 new classes):
model {
ssd {
num_classes: 3
image_resizer {
fixed_shape_resizer {
height: 300
width: 300
}
}
feature_extractor {
type: "ssd_mobilenet_v2"
depth_multiplier: 1.0
min_depth: 16
conv_hyperparams {
regularizer {
l2_regularizer {
weight: 3.99999989895e-05
}
}
initializer {
truncated_normal_initializer {
mean: 0.0
stddev: 0.0299999993294
}
}
activation: RELU_6
batch_norm {
decay: 0.999700009823
center: true
scale: true
epsilon: 0.0010000000475
train: true
}
}
use_depthwise: true
}
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
}
}
similarity_calculator {
iou_similarity {
}
}
box_predictor {
convolutional_box_predictor {
conv_hyperparams {
regularizer {
l2_regularizer {
weight: 3.99999989895e-05
}
}
initializer {
truncated_normal_initializer {
mean: 0.0
stddev: 0.0299999993294
}
}
activation: RELU_6
batch_norm {
decay: 0.999700009823
center: true
scale: true
epsilon: 0.0010000000475
train: true
}
}
min_depth: 0
max_depth: 0
num_layers_before_predictor: 0
use_dropout: false
dropout_keep_probability: 0.800000011921
kernel_size: 3
box_code_size: 4
apply_sigmoid_to_scores: false
}
}
anchor_generator {
ssd_anchor_generator {
num_layers: 6
min_scale: 0.20000000298
max_scale: 0.949999988079
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
aspect_ratios: 3.0
aspect_ratios: 0.333299994469
}
}
post_processing {
batch_non_max_suppression {
score_threshold: 0.300000011921
iou_threshold: 0.600000023842
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
normalize_loss_by_num_matches: true
loss {
localization_loss {
weighted_smooth_l1 {
}
}
classification_loss {
weighted_sigmoid {
}
}
hard_example_miner {
num_hard_examples: 3000
iou_threshold: 0.990000009537
loss_type: CLASSIFICATION
max_negatives_per_positive: 3
min_negatives_per_image: 3
}
classification_weight: 1.0
localization_weight: 1.0
}
}
}
train_config {
batch_size: 24
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
ssd_random_crop {
}
}
optimizer {
rms_prop_optimizer {
learning_rate {
exponential_decay_learning_rate {
initial_learning_rate: 0.00400000018999
decay_steps: 800720
decay_factor: 0.949999988079
}
}
momentum_optimizer_value: 0.899999976158
decay: 0.899999976158
epsilon: 1.0
}
}
fine_tune_checkpoint: "gs://my-bucket/data/model.ckpt"
num_steps: 200000
fine_tune_checkpoint_type: "detection"
}
train_input_reader {
label_map_path: "gs://my-bucket/data/label_map.pbtxt"
tf_record_input_reader {
input_path: "gs://my-bucket/data/train.record"
}
}
eval_config {
num_examples: 8000
max_evals: 10
use_moving_averages: false
}
eval_input_reader {
label_map_path: "gs://my-bucket/data/label_map.pbtxt"
shuffle: false
num_readers: 1
tf_record_input_reader {
input_path: "gs://my-bucket/data/val.record"
}
}
Thank you
num_examples
in the eval_config section to be larger than the total number of examples in your tf record files. – tombstone