I'm working on a convolutional neural network in TensorFlow and having trouble with the dropout layers. As recommended, I'm passing a keep_probability placeholder to the graph and setting the value to 0.5 during training, and 1.0 during validation and testing. When observing the training process, the results are good for the validation set. However, when I test the network after training, the network fails. I do not understand why the network is failing on the test set when it works on the validation set?
I've added the code for training, testing and for the graph itself.
Code for training the network:
with tf.Graph().as_default():
#Probablitity that the neuron's output will be kept during dropout
keep_probability = tf.placeholder(tf.float32, name="keep_probabilty")
global_step = tf.Variable(0, trainable=False)
images, labels = Inputs.datasetInputs(image_filenames, label_filenames, FLAGS.batch_size)
val_images, val_labels = Inputs.datasetInputs(val_image_filenames, val_label_filenames, FLAGS.batch_size)
train_data_node = tf.placeholder(tf.float32, shape=[FLAGS.batch_size, FLAGS.image_h, FLAGS.image_w, 3])
train_labels_node = tf.placeholder(tf.int64, shape=[FLAGS.batch_size, FLAGS.image_h, FLAGS.image_w, 1])
phase_train = tf.placeholder(tf.bool, name='phase_train')
logits = model.inference(train_data_node, phase_train, FLAGS.batch_size, keep_probability) #tensor, nothing calculated yet
loss = model.cal_loss(logits, train_labels_node)
# Build a Graph that trains the model with one batch of examples and updates the model parameters.
train_op = model.train(loss, global_step)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
""" Starting iterations to train the network """
for step in range(startstep, startstep + FLAGS.max_steps):
image_batch ,label_batch = sess.run(fetches=[images, labels])
# since we still use mini-batches in eval, still set bn-layer phase_train = True
feed_dict = {
train_data_node: image_batch,
train_labels_node: label_batch,
phase_train: True,
keep_probability: 0.5
}
_, loss_value = sess.run(fetches=[train_op, loss], feed_dict=feed_dict)
if step % 10 == 0:
num_examples_per_step = FLAGS.batch_size
examples_per_sec = num_examples_per_step / duration
sec_per_batch = float(duration)
# eval current training batch pre-class accuracy
pred = sess.run(fetches=logits, feed_dict=feed_dict)
Utils.per_class_acc(pred, label_batch)
if step % 100 == 0 or (step + 1) == FLAGS.max_steps:
""" Validate training by running validation dataset """
total_val_loss = 0.0
hist = np.zeros((FLAGS.num_class, FLAGS.num_class))
for test_step in range(TEST_ITER):
val_images_batch, val_labels_batch = sess.run(fetches=[val_images, val_labels])
feed_dict = {
train_data_node: val_images_batch,
train_labels_node: val_labels_batch,
phase_train: True,
keep_probability: 1.0 #During testing droput should be turned off -> 100% chance for keeping variable
}
_val_loss, _val_pred = sess.run(fetches=[loss, logits], feed_dict=feed_dict)
(...)
Code for testing the network:
keep_probability = tf.placeholder(tf.float32, name="keep_probabilty")
image_filenames, label_filenames = Inputs.get_filename_list(FLAGS.test_dir)
test_data_node = tf.placeholder(tf.float32, shape=[testing_batch_size, FLAGS.image_h, FLAGS.image_w, FLAGS.image_c]) #360, 480, 3
test_labels_node = tf.placeholder(tf.int64, shape=[FLAGS.test_batch_size, FLAGS.image_h, FLAGS.image_w, 1])
phase_train = tf.placeholder(tf.bool, name='phase_train')
logits = model.inference(test_data_node, phase_train, testing_batch_size, keep_probability)
loss = model.cal_loss(logits, test_labels_node)
pred = tf.argmax(logits, dimension=3)
with tf.Session() as sess:
# Load checkpoint
saver.restore(sess, FLAGS.model_ckpt_dir)
images, labels = Inputs.get_all_test_data(image_filenames, label_filenames)
threads = tf.train.start_queue_runners(sess=sess)
hist = np.zeros((FLAGS.num_class, FLAGS.num_class))
step=0
for image_batch, label_batch in zip(images, labels):
feed_dict = { #maps graph elements to values
test_data_node: image_batch,
test_labels_node: label_batch,
phase_train: False,
keep_probability: 1.0 #During testing droput should be turned off -> 100% chance for keeping variable
}
dense_prediction, im = sess.run(fetches=[logits, pred], feed_dict=feed_dict)
(...)
The graph:
def inference(images, phase_train, batch_size, keep_prob):
conv1_1 = conv_layer_with_bn(images, [7, 7, images.get_shape().as_list()[3], 64], phase_train, name="conv1_1")
conv1_2 = conv_layer_with_bn(conv1_1, [7, 7, 64, 64], phase_train, name="conv1_2")
dropout1 = tf.layers.dropout(conv1_2, rate=(1-keep_prob), training=phase_train, name="dropout1")
pool1, pool1_indices = tf.nn.max_pool_with_argmax(dropout1, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME', name='pool1')
conv2_1 = conv_layer_with_bn(pool1, [7, 7, 64, 64], phase_train, name="conv2_1")
conv2_2 = conv_layer_with_bn(conv2_1, [7, 7, 64, 64], phase_train, name="conv2_2")
dropout2 = tf.layers.dropout(conv2_2, rate=(1-keep_prob), training=phase_train, name="dropout2")
pool2, pool2_indices = tf.nn.max_pool_with_argmax(dropout2, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME', name='pool2')
conv3_1 = conv_layer_with_bn(pool2, [7, 7, 64, 64], phase_train, name="conv3_1")
conv3_2 = conv_layer_with_bn(conv3_1, [7, 7, 64, 64], phase_train, name="conv3_2")
conv3_3 = conv_layer_with_bn(conv3_2, [7, 7, 64, 64], phase_train, name="conv3_3")
dropout3 = tf.layers.dropout(conv3_3, rate=(1-keep_prob), training=phase_train, name="dropout3")
pool3, pool3_indices = tf.nn.max_pool_with_argmax(dropout3, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME', name='pool3')
conv4_1 = conv_layer_with_bn(pool3, [7, 7, 64, 64], phase_train, name="conv4_1")
conv4_2 = conv_layer_with_bn(conv4_1, [7, 7, 64, 64], phase_train, name="conv4_2")
conv4_3 = conv_layer_with_bn(conv4_2, [7, 7, 64, 64], phase_train, name="conv4_3")
dropout4 = tf.layers.dropout(conv4_3, rate=(1-keep_prob), training=phase_train, name="dropout4")
pool4, pool4_indices = tf.nn.max_pool_with_argmax(dropout4, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME', name='pool4')
conv5_1 = conv_layer_with_bn(pool4, [7, 7, 64, 64], phase_train, name="conv5_1")
conv5_2 = conv_layer_with_bn(conv5_1, [7, 7, 64, 64], phase_train, name="conv5_2")
conv5_3 = conv_layer_with_bn(conv5_2, [7, 7, 64, 64], phase_train, name="conv5_3")
dropout5 = tf.layers.dropout(conv5_3, rate=(1-keep_prob), training=phase_train, name="dropout5")
pool5, pool5_indices = tf.nn.max_pool_with_argmax(dropout5, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME', name='pool5')
""" End of encoder """
""" Start decoder """
dropout5_decode = tf.layers.dropout(pool5, rate=(1-keep_prob), training=phase_train, name="dropout5_decode")
upsample5 = deconv_layer(dropout5_decode, [2, 2, 64, 64], [batch_size, FLAGS.image_h//16, FLAGS.image_w//16, 64], 2, "up5")
conv_decode5_1 = conv_layer_with_bn(upsample5, [7, 7, 64, 64], phase_train, True, name="conv_decode5_1")
conv_decode5_2 = conv_layer_with_bn(conv_decode5_1, [7, 7, 64, 64], phase_train, True, name="conv_decode5_2")
conv_decode5_3 = conv_layer_with_bn(conv_decode5_2, [7, 7, 64, 64], phase_train, True, name="conv_decode5_3")
dropout4_decode = tf.layers.dropout(conv_decode5_3, rate=(1-keep_prob), training=phase_train, name="dropout4_decode")
upsample4 = deconv_layer(dropout4_decode, [2, 2, 64, 64], [batch_size, FLAGS.image_h//8, FLAGS.image_w//8, 64], 2, "up4")
conv_decode4_1 = conv_layer_with_bn(upsample4, [7, 7, 64, 64], phase_train, True, name="conv_decode4_1")
conv_decode4_2 = conv_layer_with_bn(conv_decode4_1, [7, 7, 64, 64], phase_train, True, name="conv_decode4_2")
conv_decode4_3 = conv_layer_with_bn(conv_decode4_2, [7, 7, 64, 64], phase_train, True, name="conv_decode4_3")
dropout3_decode = tf.layers.dropout(conv_decode4_3, rate=(1-keep_prob), training=phase_train, name="dropout3_decode")
upsample3 = deconv_layer(dropout3_decode, [2, 2, 64, 64], [batch_size, FLAGS.image_h//4, FLAGS.image_w//4, 64], 2, "up3")
conv_decode3_1 = conv_layer_with_bn(upsample3, [7, 7, 64, 64], phase_train, True, name="conv_decode3_1")
conv_decode3_2 = conv_layer_with_bn(conv_decode3_1, [7, 7, 64, 64], phase_train, True, name="conv_decode3_2")
conv_decode3_3 = conv_layer_with_bn(conv_decode3_2, [7, 7, 64, 64], phase_train, True, name="conv_decode3_3")
dropout2_decode = tf.layers.dropout(conv_decode3_3, rate=(1-keep_prob), training=phase_train, name="dropout2_decode")
upsample2= deconv_layer(dropout2_decode, [2, 2, 64, 64], [batch_size, FLAGS.image_h//2, FLAGS.image_w//2, 64], 2, "up2")
conv_decode2_1 = conv_layer_with_bn(upsample2, [7, 7, 64, 64], phase_train, True, name="conv_decode2_1")
conv_decode2_2 = conv_layer_with_bn(conv_decode2_1, [7, 7, 64, 64], phase_train, True, name="conv_decode2_2")
dropout1_decode = tf.layers.dropout(conv_decode2_2, rate=(1-keep_prob), training=phase_train, name="dropout1_deconv")
upsample1 = deconv_layer(dropout1_decode, [2, 2, 64, 64], [batch_size, FLAGS.image_h, FLAGS.image_w, 64], 2, "up1")
conv_decode1_1 = conv_layer_with_bn(upsample1, [7, 7, 64, 64], phase_train, True, name="conv_decode1_1")
conv_decode1_2 = conv_layer_with_bn(conv_decode1_1, [7, 7, 64, 64], phase_train, True, name="conv_decode1_2")
""" End of decoder """
""" Start Classify """
# output predicted class number (2)
with tf.variable_scope('conv_classifier') as scope:
shape=[1, 1, 64, FLAGS.num_class]
kernel = _variable_with_weight_decay('weights', shape=shape, initializer=tf.contrib.layers.variance_scaling_initializer(), #orthogonal_initializer()
wd=None)
conv = tf.nn.conv2d(conv_decode1_2, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [FLAGS.num_class], tf.constant_initializer(0.0))
conv_classifier = tf.nn.bias_add(conv, biases, name=scope.name) #tf.nn.bias_add is an activation function. Simple add that specifies 1-D tensor bias
#logit = conv_classifier = prediction
return conv_classifier
Aucun commentaire:
Enregistrer un commentaire