diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py index 6e41f7f4d6c..de3d8cefaaa 100644 --- a/tensorflow/contrib/layers/python/layers/layers.py +++ b/tensorflow/contrib/layers/python/layers/layers.py @@ -173,9 +173,10 @@ def _fused_batch_norm( `data_format` is `NHWC` and the second dimension if `data_format` is `NCHW`. decay: decay for the moving average. Reasonable values for `decay` are close - to 1.0, typically in the multiple-nines range: 0.999, 0.99, 0.9, etc. Lower - `decay` value (recommend trying `decay`=0.9) if model experiences reasonably - good training performance but poor validation and/or test performance. + to 1.0, typically in the multiple-nines range: 0.999, 0.99, 0.9, etc. + Lower `decay` value (recommend trying `decay`=0.9) if model experiences + reasonably good training performance but poor validation and/or test + performance. center: If True, subtract `beta`. If False, `beta` is ignored. scale: If True, multiply by `gamma`. If False, `gamma` is not used. When the next layer is linear (also e.g. `nn.relu`), this can be @@ -630,16 +631,12 @@ def batch_norm( if need_moments: # Calculate the moments based on the individual batch. if batch_weights is None: - # Use a copy of moving_mean as a shift to compute more reliable moments. - shift = math_ops.add(moving_mean, 0) if data_format == DATA_FORMAT_NCHW: - shift = array_ops.reshape(shift, params_shape_broadcast) - mean, variance = nn.moments(inputs, moments_axes, shift=shift, - keep_dims=True) + mean, variance = nn.moments(inputs, moments_axes, keep_dims=True) mean = array_ops.reshape(mean, [-1]) variance = array_ops.reshape(variance, [-1]) else: - mean, variance = nn.moments(inputs, moments_axes, shift=shift) + mean, variance = nn.moments(inputs, moments_axes) else: if data_format == DATA_FORMAT_NCHW: mean, variance = nn.weighted_moments(inputs, moments_axes, @@ -1383,7 +1380,7 @@ def fully_connected(inputs, Raises: ValueError: if x has rank less than 2 or if its last dimension is not set. """ - if not (isinstance(num_outputs, six.integer_types)): + if not isinstance(num_outputs, six.integer_types): raise ValueError('num_outputs should be int or long, got %s.', num_outputs) layer_variable_getter = _build_variable_getter({'bias': 'biases'}) diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py index 1b0a8b12728..d1b35e33c26 100644 --- a/tensorflow/contrib/layers/python/layers/layers_test.py +++ b/tensorflow/contrib/layers/python/layers/layers_test.py @@ -2356,7 +2356,7 @@ class BatchNormTest(test.TestCase): else: image_shape = (batch_size, channels, height, width) axis = (0, 2, 3) - image_values = np.random.rand(*image_shape) + 2 + image_values = np.random.rand(*image_shape) + 256 expected_mean = np.mean(image_values, axis=axis) expected_var = np.var(image_values, axis=axis) if fused: @@ -2393,9 +2393,9 @@ class BatchNormTest(test.TestCase): # The outputs should be close to 0.0 mean and 1.0 variance self.assertAllClose( np.mean( - np_output, axis=axis), [0] * channels, rtol=0.1, atol=0.1) + np_output, axis=axis), [0] * channels, rtol=0.001, atol=0.001) self.assertAllClose( - np.var(np_output, axis=axis), [1] * channels, rtol=0.1, atol=0.1) + np.var(np_output, axis=axis), [1] * channels, rtol=0.01, atol=0.01) # The gradients should change slowly while updating moving_mean. max_diff = np.max(np.abs(images_gradients_value - new_images_gradients)) self.assertGreaterEqual(max_diff, 0.0) @@ -2558,25 +2558,29 @@ class LayerNormTest(test.TestCase): # output_train and output_eval should be the same. self.assertAllClose(sess.run([output_train]), sess.run([output_eval])) - def doOutputTest(self, input_shape): - with self.test_session() as sess: - input_values = np.random.rand(*input_shape) - inputs = constant_op.constant( - input_values, shape=input_shape, dtype=dtypes.float32) - output_op = _layers.layer_norm(inputs, scope='LN') - # Initialize all variables - sess.run(variables_lib.global_variables_initializer()) - # The mean and variance of the output should be close to 0 and 1 - # respectively. - moments_axis = tuple([i for i in range(1, len(input_shape))]) - outputs = sess.run(output_op) - expected_mean = np.zeros(input_shape[0]) - expected_var = np.ones(input_shape[0]) - mean = np.mean(outputs, axis=moments_axis) - var = np.var(outputs, axis=moments_axis) - tol = 1e-5 - self.assertAllClose(mean, expected_mean, rtol=tol, atol=tol) - self.assertAllClose(var, expected_var, rtol=tol, atol=tol) + def doOutputTest(self, input_shape, tol=1e-3): + for mu in [0.0, 1e2]: + for sigma in [1.0, 0.1]: + input_values = np.random.rand(*input_shape) * sigma + mu + expected_mean = np.zeros(input_shape[0]) + expected_var = np.ones(input_shape[0]) + with ops.Graph().as_default() as g: + with self.test_session(graph=g) as sess: + inputs = constant_op.constant(input_values, shape=input_shape, + dtype=dtypes.float32) + output_op = _layers.layer_norm(inputs, scope='LN') + # Initialize all variables + sess.run(variables_lib.global_variables_initializer()) + # The mean and variance of the output should be close to 0 and 1 + # respectively. + moments_axis = tuple([i for i in range(1, len(input_shape))]) + outputs = sess.run(output_op) + # Make sure that there are no NaNs + self.assertFalse(np.isnan(outputs).any()) + mean = np.mean(outputs, axis=moments_axis) + var = np.var(outputs, axis=moments_axis) + self.assertAllClose(mean, expected_mean, rtol=tol, atol=tol) + self.assertAllClose(var, expected_var, rtol=tol, atol=tol) def testOutput2DInput(self): self.doOutputTest((10, 300)) @@ -2584,6 +2588,12 @@ class LayerNormTest(test.TestCase): def testOutput4DInput(self): self.doOutputTest((100, 10, 10, 3)) + def testOutputSmallInput(self): + self.doOutputTest((10, 10, 10, 30)) + + def testOutputBigInput(self): + self.doOutputTest((1, 100, 100, 1)) + class MaxPool2DTest(test.TestCase): diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py index 1daf765ab9d..ac5aef7de95 100644 --- a/tensorflow/python/layers/normalization.py +++ b/tensorflow/python/layers/normalization.py @@ -178,16 +178,13 @@ class BatchNormalization(base._Layer): # pylint: disable=protected-access broadcast_gamma = None if training_value is not False: - # Use a copy of moving_mean as a shift to compute more reliable moments. - shift = math_ops.add(self.moving_mean, 0) if needs_broadcasting: - shift = array_ops.reshape(shift, broadcast_shape) broadcast_mean, broadcast_variance = nn.moments( - inputs, reduction_axes, shift=shift, keep_dims=True) + inputs, reduction_axes, keep_dims=True) mean = array_ops.reshape(broadcast_mean, [-1]) variance = array_ops.reshape(broadcast_variance, [-1]) else: - mean, variance = nn.moments(inputs, reduction_axes, shift=shift) + mean, variance = nn.moments(inputs, reduction_axes) # Prepare updates if necessary. if not self.updates: diff --git a/tensorflow/python/layers/normalization_test.py b/tensorflow/python/layers/normalization_test.py index 00c392e299b..93efc09ca06 100644 --- a/tensorflow/python/layers/normalization_test.py +++ b/tensorflow/python/layers/normalization_test.py @@ -63,16 +63,25 @@ class BNTest(test.TestCase): bn = normalization_layers.BatchNormalization( axis=1, epsilon=epsilon, momentum=0.9) inputs = variables.Variable( - np.random.random((5, 4, 3)), dtype=dtypes.float32) + np.random.random((5, 4, 3)) + 100, dtype=dtypes.float32) training = array_ops.placeholder(dtype='bool') outputs = bn.apply(inputs, training=training) with self.test_session() as sess: # Test training with placeholder learning phase. sess.run(variables.global_variables_initializer()) + + np_gamma, np_beta = sess.run([bn.gamma, bn.beta]) + np_gamma = np.reshape(np_gamma, (1, 4, 1)) + np_beta = np.reshape(np_beta, (1, 4, 1)) + for _ in range(100): np_output, _, _ = sess.run([outputs] + bn.updates, feed_dict={training: True}) + # Verify that the axis is normalized during training. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) + self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) # Verify that the statistics are updated during training. moving_mean, moving_var = sess.run([bn.moving_mean, bn.moving_variance]) @@ -83,14 +92,6 @@ class BNTest(test.TestCase): self.assertAllClose(mean, moving_mean, atol=1e-2) self.assertAllClose(variance, moving_var, atol=1e-2) - # Verify that the axis is normalized during training. - np_gamma, np_beta = sess.run([bn.gamma, bn.beta]) - np_gamma = np.reshape(np_gamma, (1, 4, 1)) - np_beta = np.reshape(np_beta, (1, 4, 1)) - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - # Test inference with placeholder learning phase. np_output = sess.run(outputs, feed_dict={training: False}) @@ -104,16 +105,23 @@ class BNTest(test.TestCase): bn = normalization_layers.BatchNormalization( axis=2, epsilon=epsilon, momentum=0.9) inputs = variables.Variable( - np.random.random((5, 4, 3)), dtype=dtypes.float32) + np.random.random((5, 4, 3)) + 100, dtype=dtypes.float32) training = array_ops.placeholder(dtype='bool') outputs = bn.apply(inputs, training=training) with self.test_session() as sess: # Test training with placeholder learning phase. sess.run(variables.global_variables_initializer()) + np_gamma, np_beta = sess.run([bn.gamma, bn.beta]) + np_gamma = np.reshape(np_gamma, (1, 1, 3)) + np_beta = np.reshape(np_beta, (1, 1, 3)) for _ in range(100): np_output, _, _ = sess.run([outputs] + bn.updates, feed_dict={training: True}) + # Verify that the axis is normalized during training. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) + self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) # Verify that the statistics are updated during training. moving_mean, moving_var = sess.run([bn.moving_mean, bn.moving_variance]) @@ -124,14 +132,6 @@ class BNTest(test.TestCase): self.assertAllClose(mean, moving_mean, atol=1e-2) self.assertAllClose(variance, moving_var, atol=1e-2) - # Verify that the axis is normalized during training. - np_gamma, np_beta = sess.run([bn.gamma, bn.beta]) - np_gamma = np.reshape(np_gamma, (1, 1, 3)) - np_beta = np.reshape(np_beta, (1, 1, 3)) - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - # Test inference with placeholder learning phase. np_output = sess.run(outputs, feed_dict={training: False}) @@ -145,16 +145,23 @@ class BNTest(test.TestCase): bn = normalization_layers.BatchNormalization( axis=1, epsilon=epsilon, momentum=0.9) inputs = variables.Variable( - np.random.random((5, 4, 3, 6)), dtype=dtypes.float32) + np.random.random((5, 4, 3, 6)) + 100, dtype=dtypes.float32) training = array_ops.placeholder(dtype='bool') outputs = bn.apply(inputs, training=training) with self.test_session() as sess: # Test training with placeholder learning phase. sess.run(variables.global_variables_initializer()) + np_gamma, np_beta = sess.run([bn.gamma, bn.beta]) + np_gamma = np.reshape(np_gamma, (1, 4, 1, 1)) + np_beta = np.reshape(np_beta, (1, 4, 1, 1)) for _ in range(100): np_output, _, _ = sess.run([outputs] + bn.updates, feed_dict={training: True}) + # Verify that the axis is normalized during training. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) + self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) # Verify that the statistics are updated during training. moving_mean, moving_var = sess.run([bn.moving_mean, bn.moving_variance]) @@ -165,14 +172,6 @@ class BNTest(test.TestCase): self.assertAllClose(mean, moving_mean, atol=1e-2) self.assertAllClose(variance, moving_var, atol=1e-2) - # Verify that the axis is normalized during training. - np_gamma, np_beta = sess.run([bn.gamma, bn.beta]) - np_gamma = np.reshape(np_gamma, (1, 4, 1, 1)) - np_beta = np.reshape(np_beta, (1, 4, 1, 1)) - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - # Test inference with placeholder learning phase. np_output = sess.run(outputs, feed_dict={training: False}) @@ -186,16 +185,23 @@ class BNTest(test.TestCase): bn = normalization_layers.BatchNormalization( axis=2, epsilon=epsilon, momentum=0.9) inputs = variables.Variable( - np.random.random((5, 4, 3, 6)), dtype=dtypes.float32) + np.random.random((5, 4, 3, 6)) + 100, dtype=dtypes.float32) training = array_ops.placeholder(dtype='bool') outputs = bn.apply(inputs, training=training) with self.test_session() as sess: # Test training with placeholder learning phase. sess.run(variables.global_variables_initializer()) + np_gamma, np_beta = sess.run([bn.gamma, bn.beta]) + np_gamma = np.reshape(np_gamma, (1, 1, 3, 1)) + np_beta = np.reshape(np_beta, (1, 1, 3, 1)) for _ in range(100): np_output, _, _ = sess.run([outputs] + bn.updates, feed_dict={training: True}) + # Verify that the axis is normalized during training. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) + self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) # Verify that the statistics are updated during training. moving_mean, moving_var = sess.run([bn.moving_mean, bn.moving_variance]) @@ -206,14 +212,6 @@ class BNTest(test.TestCase): self.assertAllClose(mean, moving_mean, atol=1e-2) self.assertAllClose(variance, moving_var, atol=1e-2) - # Verify that the axis is normalized during training. - np_gamma, np_beta = sess.run([bn.gamma, bn.beta]) - np_gamma = np.reshape(np_gamma, (1, 1, 3, 1)) - np_beta = np.reshape(np_beta, (1, 1, 3, 1)) - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - # Test inference with placeholder learning phase. np_output = sess.run(outputs, feed_dict={training: False}) @@ -227,16 +225,23 @@ class BNTest(test.TestCase): bn = normalization_layers.BatchNormalization( axis=3, epsilon=epsilon, momentum=0.9) inputs = variables.Variable( - np.random.random((5, 4, 3, 6)), dtype=dtypes.float32) + np.random.random((5, 4, 3, 6)) + 100, dtype=dtypes.float32) training = array_ops.placeholder(dtype='bool') outputs = bn.apply(inputs, training=training) with self.test_session() as sess: # Test training with placeholder learning phase. sess.run(variables.global_variables_initializer()) + np_gamma, np_beta = sess.run([bn.gamma, bn.beta]) + np_gamma = np.reshape(np_gamma, (1, 1, 1, 6)) + np_beta = np.reshape(np_beta, (1, 1, 1, 6)) for _ in range(100): np_output, _, _ = sess.run([outputs] + bn.updates, feed_dict={training: True}) + # Verify that the axis is normalized during training. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) + self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) # Verify that the statistics are updated during training. moving_mean, moving_var = sess.run([bn.moving_mean, bn.moving_variance]) @@ -247,14 +252,6 @@ class BNTest(test.TestCase): self.assertAllClose(mean, moving_mean, atol=1e-2) self.assertAllClose(variance, moving_var, atol=1e-2) - # Verify that the axis is normalized during training. - np_gamma, np_beta = sess.run([bn.gamma, bn.beta]) - np_gamma = np.reshape(np_gamma, (1, 1, 1, 6)) - np_beta = np.reshape(np_beta, (1, 1, 1, 6)) - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - # Test inference with placeholder learning phase. np_output = sess.run(outputs, feed_dict={training: False}) @@ -268,17 +265,25 @@ class BNTest(test.TestCase): bn = normalization_layers.BatchNormalization( axis=-1, epsilon=epsilon, momentum=0.9) inputs = variables.Variable( - np.random.random((5, 4, 3, 6)), dtype=dtypes.float32) + np.random.random((5, 4, 3, 6)) + 100, dtype=dtypes.float32) training = array_ops.placeholder(dtype='bool') outputs = bn.apply(inputs, training=training) with self.test_session() as sess: # Test training with placeholder learning phase. sess.run(variables.global_variables_initializer()) + np_gamma, np_beta = sess.run([bn.gamma, bn.beta]) + np_gamma = np.reshape(np_gamma, (1, 1, 1, 6)) + np_beta = np.reshape(np_beta, (1, 1, 1, 6)) for _ in range(100): np_output, _, _ = sess.run([outputs] + bn.updates, feed_dict={training: True}) + # Verify that the axis is normalized during training. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) + self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) + # Verify that the statistics are updated during training. moving_mean, moving_var = sess.run([bn.moving_mean, bn.moving_variance]) np_inputs = sess.run(inputs) @@ -288,14 +293,6 @@ class BNTest(test.TestCase): self.assertAllClose(mean, moving_mean, atol=1e-2) self.assertAllClose(variance, moving_var, atol=1e-2) - # Verify that the axis is normalized during training. - np_gamma, np_beta = sess.run([bn.gamma, bn.beta]) - np_gamma = np.reshape(np_gamma, (1, 1, 1, 6)) - np_beta = np.reshape(np_beta, (1, 1, 1, 6)) - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - # Test inference with placeholder learning phase. np_output = sess.run(outputs, feed_dict={training: False}) @@ -309,15 +306,22 @@ class BNTest(test.TestCase): bn = normalization_layers.BatchNormalization( axis=-1, epsilon=epsilon, momentum=0.9) inputs = variables.Variable( - np.random.random((5, 4, 3, 6)), dtype=dtypes.float32) + np.random.random((5, 4, 3, 6)) + 100, dtype=dtypes.float32) outputs_training = bn.apply(inputs, training=True) outputs_infer = bn.apply(inputs, training=False) with self.test_session() as sess: # Test training with placeholder learning phase. sess.run(variables.global_variables_initializer()) + np_gamma, np_beta = sess.run([bn.gamma, bn.beta]) + np_gamma = np.reshape(np_gamma, (1, 1, 1, 6)) + np_beta = np.reshape(np_beta, (1, 1, 1, 6)) for _ in range(100): np_output, _, _ = sess.run([outputs_training] + bn.updates) + # Verify that the axis is normalized during training. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0., places=2) + self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) # Verify that the statistics are updated during training. moving_mean, moving_var = sess.run([bn.moving_mean, bn.moving_variance]) @@ -328,14 +332,6 @@ class BNTest(test.TestCase): self.assertAllClose(mean, moving_mean, atol=1e-2) self.assertAllClose(variance, moving_var, atol=1e-2) - # Verify that the axis is normalized during training. - np_gamma, np_beta = sess.run([bn.gamma, bn.beta]) - np_gamma = np.reshape(np_gamma, (1, 1, 1, 6)) - np_beta = np.reshape(np_beta, (1, 1, 1, 6)) - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - # Test inference with placeholder learning phase. np_output = sess.run(outputs_infer) @@ -367,9 +363,16 @@ class BNTest(test.TestCase): with self.test_session() as sess: # Test training with placeholder learning phase. sess.run(variables.global_variables_initializer()) + np_gamma, np_beta = sess.run([gamma, beta]) + np_gamma = np.reshape(np_gamma, (1, 1, 1, 6)) + np_beta = np.reshape(np_beta, (1, 1, 1, 6)) for _ in range(100): np_output, _, _ = sess.run([outputs] + updates, feed_dict={training: True}) + # Verify that the axis is normalized during training. + normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta + self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) + self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) # Verify that the statistics are updated during training. np_moving_mean, np_moving_var = sess.run([moving_mean, moving_variance]) @@ -380,14 +383,6 @@ class BNTest(test.TestCase): self.assertAllClose(np_mean, np_moving_mean, atol=1e-2) self.assertAllClose(np_variance, np_moving_var, atol=1e-2) - # Verify that the axis is normalized during training. - np_gamma, np_beta = sess.run([gamma, beta]) - np_gamma = np.reshape(np_gamma, (1, 1, 1, 6)) - np_beta = np.reshape(np_beta, (1, 1, 1, 6)) - normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) - self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) - # Test inference with placeholder learning phase. np_output = sess.run(outputs, feed_dict={training: False}) @@ -448,7 +443,7 @@ class BNTest(test.TestCase): np_gamma = np.reshape(np_gamma, (1, 1, 1, 6)) np_beta = np.reshape(np_beta, (1, 1, 1, 6)) normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) + self.assertAlmostEqual(np.mean(normed_np_output), 0., places=2) self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) # Test inference with placeholder learning phase. @@ -456,7 +451,7 @@ class BNTest(test.TestCase): # Verify that the axis is normalized during inference. normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta - self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1) + self.assertAlmostEqual(np.mean(normed_np_output), 0., places=2) self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) def testNoCenter(self):