testing: Tensorflow batch_norm (tf.contrib.layers.batch_norm) does not work during testing with shared weights

lundi 24 juillet 2017

Tensorflow batch_norm (tf.contrib.layers.batch_norm) does not work during testing with shared weights

I'm trying to use a Siamese CNN to train a stereo matching network. Training works fine, but if I set is_training = False, I get super high test errors, higher than a randomly initialized network. What I've tried:

output the single layers before and after batch_norm --> batch norm definitely is applied during training and testing. Also normlized layers in testing mode are similar to those of training mode, so the running averages seem to be applied!
outputting the running average after each iteration --> they definitely change
Save model and restore --> batch_norm weights are definitely saved and they are not just 1 and 0, but seem to make sense.
remove normalization layer --> works fine but the error converges at 0.3, so batch_norm gives better results (at least with training mode) and converges 3 times faster

The training error starts around 3.2 and goes to 0.15 after 2000 iterations. The test error starts around 3.2 goes down to 2.6 and then rises to 7. I should also mention that I'm training on a randomly generated training set on which I also test. So when I say test error, I'm actually testing on the same set, I just set is_training = False. So the difference between training and testing error comes just from the training/testing mode of batch_norm. That's why I don't see how this could be an overfitting problem.

I think it has something to do with the fact that I share weights between the two branches of the Siamese network. But then it doesn't make sense that training works fine, and that the test error decreases in the beginning.

I would appreciate any kind of hint that could help! Thanks !

Btw I'm usng python 3.5 and tensorflow 1.2.0

def conv_relu(input, kernel_shape, bias_shape, phase,reuse,scope):
    with tf.variable_scope(scope,reuse=reuse):
        weights = tf.get_variable("weights", kernel_shape, initializer = tf.contrib.layers.xavier_initializer_conv2d()) #xavier
        biases = tf.get_variable("biases", bias_shape, initializer=tf.contrib.layers.xavier_initializer())
        conv = conv2d(input, weights)
        normal = tf.contrib.layers.batch_norm(tf.nn.bias_add(conv,biases), 
                                      center=True, scale=True, 
                                      is_training=phase, decay=0.9,
                                      scope='bn')

        return tf.nn.relu(normal)

def network(input,reuse,disp):
    h1_ = conv_relu(input, [3, 3, n_channels, n_units], [n_units],phase,reuse, 'conv1')
    h2_ = conv_relu_pool(h1_, [3, 3, n_units, n_units], [n_units],phase,reuse, 'conv2')
    h3_ = conv_relu(h2_, [3, 3, n_units, n_units], [n_units],phase,reuse,'conv3')
    h4_ = conv_relu_pool(h3_, [3, 3, n_units, n_units], [n_units],phase,reuse,'conv4')
    h5_ = conv_relu(h4_, [3, 3, n_units, n_units], [n_units],phase,reuse,'conv5')
    h6_ = conv_relu(h5_, [3, 3, n_units, n_units], [n_units],phase,reuse,'conv6')
    h7_ = conv(h6_, [3, 3, n_units, n_units], [n_units],reuse,'conv7')
    h8_ = deconv(h7_, [3, 3, n_units, n_units], [n_units], [batch,HimSize,tf.cast(imSize/2+(disp/2),tf.int32),n_units],reuse,'conv8')
    h9_ = deconv(h8_, [3, 3, n_units, n_units], [n_units], [batch,imSize,imSize + disp,n_units],reuse,'conv9')
    return h9_

with tf.name_scope('ImLeft'):
    x_left = tf.placeholder(tf.float32, shape=[None, imSize, imSize, n_channels])
with tf.name_scope('ImRight'):    
    x_right = tf.placeholder(tf.float32, shape=[None, imSize, imSize +MaxDisp, n_channels])
with tf.name_scope('Labels'):
    y_ = tf.placeholder(tf.float32, shape=[None,imSize,imSize])

y_onehot = tf.one_hot(tf.cast(y_,tf.int32),n_classes,axis=3)    
phase = tf.placeholder(tf.bool)

with tf.variable_scope("siamese_network") as scope:
    h9_left = network(input = x_left,reuse= False,disp = 0)
    h9_right = network(input = x_right,reuse = True,disp = MaxDisp)

I then do some transformations and get the output layer:

output = tf.reshape(output_layer_t,[batch*imSize*imSize,n_classes])
with tf.name_scope('Loss'):
    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = tf.reshape(y_onehot,(batch*imSize*imSize,n_classes)), logits = output))


update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
    train_step = tf.train.AdamOptimizer(learn_rate).minimize(cross_entropy)

#run session
saver = tf.train.Saver()
sess = tf.Session()
sess.run(tf.global_variables_initializer())
summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())

for i in range(1000):
    #print(tf.get_collection(tf.GraphKeys.VARIABLES, scope="siamese_network"))
    idx = np.random.randint(160, size=batch)
    _,loss_,summary = sess.run([train_step,cross_entropy,merged_summary_op], feed_dict={x_left:trainImsLeft[idx,:,:].reshape(-1,imSize,imSize,1),
                                        x_right:trainImsRight[idx,:,:].reshape(-1,imSize,imSize+MaxDisp,1),
                                        y_:trainLabs[idx,:,:].reshape(-1,imSize,imSize), phase:True})

    summary_writer.add_summary(summary, i)
    if (i+1) % 2 == 0:
        print('------',i)

        loss1 = sess.run([cross_entropy], feed_dict={x_left:trainImsLeft.reshape(-1,imSize,imSize,1),
                                        x_right:trainImsRight.reshape(-1,imSize,imSize+MaxDisp,1),
                                        y_:trainLabs.reshape(-1,imSize,imSize), phase:True})
        loss0 = sess.run([cross_entropy], feed_dict={x_left:trainImsLeft.reshape(-1,imSize,imSize,1),
                                        x_right:trainImsRight.reshape(-1,imSize,imSize+MaxDisp,1),
                                        y_:trainLabs.reshape(-1,imSize,imSize), phase:False})
        print('training loss: ',loss1)
        print('testing loss: ',loss0)


saver.save(sess, MODEL_FILENAME)

testing

lundi 24 juillet 2017

Tensorflow batch_norm (tf.contrib.layers.batch_norm) does not work during testing with shared weights

Aucun commentaire:

Enregistrer un commentaire