I'm training tensorflow object detection API with my own data. The model I'm using is ssd_mobilenet_v1
with pretrained coco checkpoint.
My dataset consist of 12 classes and each class has 110 images so total 1320 of them.
This works fine, but classification loss increases at some point.
I think insufficient of dataset doesn't matter in training phase because they are all similar; actually I extracted them from video.
So what should do? Should I stop training around 10k iteration? or is there any possible parameter tuning or data augmentation?
This is my config file which I just adjusted directories and data_augmentation_option
and hard_example_miner
.
model {
ssd {
num_classes: 12
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
}
}
similarity_calculator {
iou_similarity {
}
}
anchor_generator {
ssd_anchor_generator {
num_layers: 6
min_scale: 0.2
max_scale: 0.95
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
aspect_ratios: 3.0
aspect_ratios: 0.3333
}
}
image_resizer {
fixed_shape_resizer {
height: 320
width: 180
}
}
box_predictor {
convolutional_box_predictor {
min_depth: 0
max_depth: 0
num_layers_before_predictor: 0
use_dropout: false
dropout_keep_probability: 0.8
kernel_size: 1
box_code_size: 4
apply_sigmoid_to_scores: false
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true,
scale: true,
center: true,
decay: 0.9997,
epsilon: 0.001,
}
}
}
}
feature_extractor {
type: 'ssd_mobilenet_v1'
min_depth: 16
depth_multiplier: 1.0
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true,
scale: true,
center: true,
decay: 0.9997,
epsilon: 0.001,
}
}
}
loss {
classification_loss {
weighted_sigmoid {
}
}
localization_loss {
weighted_smooth_l1 {
}
}
hard_example_miner {
num_hard_examples: 600
iou_threshold: 0.99
loss_type: CLASSIFICATION
max_negatives_per_positive: 3
min_negatives_per_image: 0
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
batch_size: 96
optimizer {
rms_prop_optimizer: {
learning_rate: {
exponential_decay_learning_rate {
initial_learning_rate: 0.004
decay_steps: 800720
decay_factor: 0.95
}
}
momentum_optimizer_value: 0.9
decay: 0.9
epsilon: 1.0
}
}
fine_tune_checkpoint: "/home/dev1/tensorflow/training/data/checkpoint/ssd_mobilenet_v1_coco_2018_01_28/model.ckpt"
fine_tune_checkpoint_type: "detection"
from_detection_checkpoint: true
num_steps: 100000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
ssd_random_crop {
}
}
data_augmentation_options {
random_adjust_brightness {
}
}
}
train_input_reader: {
tf_record_input_reader {
input_path: "/home/dev1/tensorflow/training/data/train.record"
}
label_map_path: "/home/dev1/tensorflow/training/data/config/label.pbtxt"
}
eval_config: {
num_examples: 132
max_evals: 20
}
eval_input_reader: {
tf_record_input_reader {
input_path: "/home/dev1/tensorflow/training/data/test.record"
}
label_map_path: "/home/dev1/tensorflow/training/data/config/label.pbtxt"
shuffle: false
num_readers: 1
}
decay_steps
to 5000 to reduce learning rate at 5000 step – Shayan Tabatabaee