Merge pull request #33803 from duncanriach:bias_add_test_eager_mode
PiperOrigin-RevId: 294705769 Change-Id: I96e27068e2a1d770e49439950c6043d353ca4962
This commit is contained in:
commit
3c88f5b302
@ -20,17 +20,22 @@ from __future__ import print_function
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
from tensorflow.python.eager import backprop
|
||||||
|
from tensorflow.python.eager import context
|
||||||
from tensorflow.python.framework import constant_op
|
from tensorflow.python.framework import constant_op
|
||||||
from tensorflow.python.framework import dtypes
|
from tensorflow.python.framework import dtypes
|
||||||
|
from tensorflow.python.framework import errors_impl
|
||||||
from tensorflow.python.framework import test_util
|
from tensorflow.python.framework import test_util
|
||||||
from tensorflow.python.ops import array_ops
|
from tensorflow.python.ops import array_ops
|
||||||
from tensorflow.python.ops import gradient_checker
|
from tensorflow.python.ops import gradient_checker
|
||||||
|
from tensorflow.python.ops import gradient_checker_v2
|
||||||
from tensorflow.python.ops import gradients_impl
|
from tensorflow.python.ops import gradients_impl
|
||||||
from tensorflow.python.ops import nn_ops
|
from tensorflow.python.ops import nn_ops
|
||||||
import tensorflow.python.ops.nn_grad # pylint: disable=unused-import
|
import tensorflow.python.ops.nn_grad # pylint: disable=unused-import
|
||||||
from tensorflow.python.platform import test
|
from tensorflow.python.platform import test
|
||||||
|
|
||||||
|
|
||||||
|
@test_util.run_all_in_graph_and_eager_modes
|
||||||
class BiasAddTestBase(test.TestCase):
|
class BiasAddTestBase(test.TestCase):
|
||||||
|
|
||||||
def _npBias(self, inputs, bias):
|
def _npBias(self, inputs, bias):
|
||||||
@ -48,7 +53,7 @@ class BiasAddTestBase(test.TestCase):
|
|||||||
def _testBias(self, np_inputs, np_bias, use_gpu=False):
|
def _testBias(self, np_inputs, np_bias, use_gpu=False):
|
||||||
np_val = self._npBias(np_inputs, np_bias)
|
np_val = self._npBias(np_inputs, np_bias)
|
||||||
with self.cached_session(use_gpu=use_gpu):
|
with self.cached_session(use_gpu=use_gpu):
|
||||||
tf_val = nn_ops.bias_add(np_inputs, np_bias).eval()
|
tf_val = self.evaluate(nn_ops.bias_add(np_inputs, np_bias))
|
||||||
self.assertAllCloseAccordingToType(np_val, tf_val)
|
self.assertAllCloseAccordingToType(np_val, tf_val)
|
||||||
|
|
||||||
def _AtLeast3d(self, np_value):
|
def _AtLeast3d(self, np_value):
|
||||||
@ -76,7 +81,8 @@ class BiasAddTestBase(test.TestCase):
|
|||||||
np_val = self._npBias(np_inputs, np_bias)
|
np_val = self._npBias(np_inputs, np_bias)
|
||||||
np_inputs = self._NHWCToNCHW(np_inputs)
|
np_inputs = self._NHWCToNCHW(np_inputs)
|
||||||
with self.cached_session(use_gpu=use_gpu):
|
with self.cached_session(use_gpu=use_gpu):
|
||||||
tf_val = nn_ops.bias_add(np_inputs, np_bias, data_format="NCHW").eval()
|
tf_val = self.evaluate(
|
||||||
|
nn_ops.bias_add(np_inputs, np_bias, data_format="NCHW"))
|
||||||
tf_val = self._NCHWToNHWC(tf_val)
|
tf_val = self._NCHWToNHWC(tf_val)
|
||||||
self.assertAllCloseAccordingToType(self._AtLeast3d(np_val), tf_val)
|
self.assertAllCloseAccordingToType(self._AtLeast3d(np_val), tf_val)
|
||||||
|
|
||||||
@ -87,40 +93,40 @@ class BiasAddTestBase(test.TestCase):
|
|||||||
self._testBias(np_inputs, np_bias, use_gpu=True)
|
self._testBias(np_inputs, np_bias, use_gpu=True)
|
||||||
self._testBiasNCHW(np_inputs, np_bias, use_gpu=True)
|
self._testBiasNCHW(np_inputs, np_bias, use_gpu=True)
|
||||||
|
|
||||||
@test_util.run_deprecated_v1
|
def _expectedException(self):
|
||||||
|
if context.executing_eagerly():
|
||||||
|
return errors_impl.InvalidArgumentError
|
||||||
|
else:
|
||||||
|
return ValueError
|
||||||
|
|
||||||
def testInputDims(self):
|
def testInputDims(self):
|
||||||
with self.assertRaises(ValueError):
|
with self.assertRaises(self._expectedException()):
|
||||||
nn_ops.bias_add([1, 2], [1])
|
nn_ops.bias_add([1, 2], [1])
|
||||||
|
|
||||||
@test_util.run_deprecated_v1
|
|
||||||
def testBiasVec(self):
|
def testBiasVec(self):
|
||||||
with self.assertRaises(ValueError):
|
with self.assertRaises(self._expectedException()):
|
||||||
nn_ops.bias_add(
|
nn_ops.bias_add(
|
||||||
array_ops.reshape([1, 2], shape=[1, 2]),
|
array_ops.reshape([1, 2], shape=[1, 2]),
|
||||||
array_ops.reshape([1, 2], shape=[1, 2]))
|
array_ops.reshape([1, 2], shape=[1, 2]))
|
||||||
|
|
||||||
@test_util.run_deprecated_v1
|
|
||||||
def testBiasInputsMatch(self):
|
def testBiasInputsMatch(self):
|
||||||
with self.assertRaises(ValueError):
|
with self.assertRaises(self._expectedException()):
|
||||||
nn_ops.bias_add(
|
nn_ops.bias_add(
|
||||||
array_ops.reshape([1, 2], shape=[1, 2]),
|
array_ops.reshape([1, 2], shape=[1, 2]),
|
||||||
array_ops.reshape([1], shape=[1]))
|
array_ops.reshape([1], shape=[1]))
|
||||||
|
|
||||||
@test_util.run_deprecated_v1
|
|
||||||
def testIntTypes(self):
|
def testIntTypes(self):
|
||||||
for t in [np.int8, np.int16, np.int32, np.int64]:
|
for t in [np.int8, np.int16, np.int32, np.int64]:
|
||||||
self._testAll(
|
self._testAll(
|
||||||
np.array([[10, 20, 30], [40, 50, 60]]).astype(t),
|
np.array([[10, 20, 30], [40, 50, 60]]).astype(t),
|
||||||
np.array([1, 2, 3]).astype(t))
|
np.array([1, 2, 3]).astype(t))
|
||||||
|
|
||||||
@test_util.run_deprecated_v1
|
|
||||||
def testFloatTypes(self):
|
def testFloatTypes(self):
|
||||||
for t in [np.float16, np.float32, np.float64]:
|
for t in [np.float16, np.float32, np.float64]:
|
||||||
self._testAll(
|
self._testAll(
|
||||||
np.random.rand(4, 3, 3).astype(t),
|
np.random.rand(4, 3, 3).astype(t),
|
||||||
np.random.rand(3).astype(t))
|
np.random.rand(3).astype(t))
|
||||||
|
|
||||||
@test_util.run_deprecated_v1
|
|
||||||
def test4DFloatTypes(self):
|
def test4DFloatTypes(self):
|
||||||
for t in [np.float16, np.float32, np.float64]:
|
for t in [np.float16, np.float32, np.float64]:
|
||||||
self._testAll(
|
self._testAll(
|
||||||
@ -133,32 +139,78 @@ class BiasAddTestBase(test.TestCase):
|
|||||||
np.random.rand(4, 4, 4, 2048).astype(t),
|
np.random.rand(4, 4, 4, 2048).astype(t),
|
||||||
np.random.rand(2048).astype(t))
|
np.random.rand(2048).astype(t))
|
||||||
|
|
||||||
@test_util.run_deprecated_v1
|
|
||||||
def test5DFloatTypes(self):
|
def test5DFloatTypes(self):
|
||||||
for t in [np.float16, np.float32, np.float64]:
|
for t in [np.float16, np.float32, np.float64]:
|
||||||
self._testAll(
|
self._testAll(
|
||||||
np.random.rand(4, 3, 2, 3, 4).astype(t),
|
np.random.rand(4, 3, 2, 3, 4).astype(t),
|
||||||
np.random.rand(4).astype(t))
|
np.random.rand(4).astype(t))
|
||||||
|
|
||||||
|
def _random_tensor(self, shape, dtype):
|
||||||
|
return constant_op.constant(2 * np.random.rand(*shape) - 1, dtype=dtype)
|
||||||
|
|
||||||
|
def _computeGradient(self, np_input, bias, dtype, data_format):
|
||||||
|
input_shape = output_shape = np_input.shape
|
||||||
|
bias_shape = bias.shape
|
||||||
|
input_tensor = constant_op.constant(
|
||||||
|
np_input, shape=input_shape, dtype=dtype)
|
||||||
|
bias_tensor = constant_op.constant(bias, shape=bias_shape, dtype=dtype)
|
||||||
|
|
||||||
|
if context.executing_eagerly():
|
||||||
|
|
||||||
|
def bias_add(input_tensor, bias_tensor):
|
||||||
|
return nn_ops.bias_add(
|
||||||
|
input_tensor, bias_tensor, data_format=data_format)
|
||||||
|
|
||||||
|
# The following is a work-around for TF issue 33660. Instead of
|
||||||
|
# calculating the analytical and numerical gradients for both
|
||||||
|
# inputs in a single call to compute_gradient, compute_gradient
|
||||||
|
# is called for each input separately.
|
||||||
|
def bias_add_1(input_tensor):
|
||||||
|
return bias_add(input_tensor, bias_tensor)
|
||||||
|
|
||||||
|
def bias_add_2(bias_tensor):
|
||||||
|
return bias_add(input_tensor, bias_tensor)
|
||||||
|
|
||||||
|
input_jacob_a, input_jacob_n = gradient_checker_v2.compute_gradient(
|
||||||
|
bias_add_1, [input_tensor])
|
||||||
|
bias_jacob_a, bias_jacob_n = gradient_checker_v2.compute_gradient(
|
||||||
|
bias_add_2, [bias_tensor])
|
||||||
|
|
||||||
|
# Test gradient of BiasAddGrad
|
||||||
|
def bias_add_grad_function(upstream_gradients):
|
||||||
|
with backprop.GradientTape() as tape:
|
||||||
|
tape.watch(bias_tensor)
|
||||||
|
bias_add_output = bias_add(input_tensor, bias_tensor)
|
||||||
|
gradient_injector_output = bias_add_output * upstream_gradients
|
||||||
|
return tape.gradient(gradient_injector_output, bias_tensor)
|
||||||
|
|
||||||
|
upstream_tensor = self._random_tensor(output_shape, dtype)
|
||||||
|
grad_jacob_a, grad_jacob_n = gradient_checker_v2.compute_gradient(
|
||||||
|
bias_add_grad_function, [upstream_tensor])
|
||||||
|
else:
|
||||||
|
output_tensor = nn_ops.bias_add(
|
||||||
|
input_tensor, bias_tensor, data_format=data_format)
|
||||||
|
jacobians = gradient_checker.compute_gradient([input_tensor, bias_tensor],
|
||||||
|
[input_shape, bias_shape],
|
||||||
|
output_tensor, output_shape)
|
||||||
|
(input_jacob_a, input_jacob_n), (bias_jacob_a, bias_jacob_n) = jacobians
|
||||||
|
# Test gradient of BiasAddGrad
|
||||||
|
bias_add_grad = gradients_impl.gradients(
|
||||||
|
nn_ops.l2_loss(output_tensor), bias_tensor)[0]
|
||||||
|
grad_jacob_a, grad_jacob_n = gradient_checker.compute_gradient(
|
||||||
|
output_tensor, output_shape, bias_add_grad, bias_shape)
|
||||||
|
|
||||||
|
return ((input_jacob_a, bias_jacob_a, grad_jacob_a),
|
||||||
|
(input_jacob_n, bias_jacob_n, grad_jacob_n))
|
||||||
|
|
||||||
def _testGradient(self, np_input, bias, dtype, data_format, use_gpu):
|
def _testGradient(self, np_input, bias, dtype, data_format, use_gpu):
|
||||||
with self.cached_session(use_gpu=use_gpu):
|
with self.cached_session(use_gpu=use_gpu):
|
||||||
if data_format == "NCHW":
|
if data_format == "NCHW":
|
||||||
np_input = self._NHWCToNCHW(np_input)
|
np_input = self._NHWCToNCHW(np_input)
|
||||||
input_tensor = constant_op.constant(
|
jacob_a, jacob_n = self._computeGradient(np_input, bias, dtype,
|
||||||
np_input, shape=np_input.shape, dtype=dtype)
|
data_format)
|
||||||
bias_tensor = constant_op.constant(bias, shape=bias.shape, dtype=dtype)
|
input_jacob_a, bias_jacob_a, grad_jacob_a = jacob_a
|
||||||
output_tensor = nn_ops.bias_add(
|
input_jacob_n, bias_jacob_n, grad_jacob_n = jacob_n
|
||||||
input_tensor, bias_tensor, data_format=data_format)
|
|
||||||
tensor_jacob_t, tensor_jacob_n = gradient_checker.compute_gradient(
|
|
||||||
input_tensor, np_input.shape, output_tensor, np_input.shape)
|
|
||||||
bias_jacob_t, bias_jacob_n = gradient_checker.compute_gradient(
|
|
||||||
bias_tensor, bias.shape, output_tensor, np_input.shape)
|
|
||||||
|
|
||||||
# Test gradient of BiasAddGrad
|
|
||||||
bias_add_grad = gradients_impl.gradients(
|
|
||||||
nn_ops.l2_loss(output_tensor), bias_tensor)[0]
|
|
||||||
grad_jacob_t, grad_jacob_n = gradient_checker.compute_gradient(
|
|
||||||
output_tensor, np_input.shape, bias_add_grad, bias.shape)
|
|
||||||
|
|
||||||
if dtype == np.float16:
|
if dtype == np.float16:
|
||||||
# Compare fp16 analytical gradients to fp32 numerical gradients,
|
# Compare fp16 analytical gradients to fp32 numerical gradients,
|
||||||
@ -166,30 +218,22 @@ class BiasAddTestBase(test.TestCase):
|
|||||||
# care is taken with choosing the inputs and the delta. This is
|
# care is taken with choosing the inputs and the delta. This is
|
||||||
# a weaker, but pragmatic, check (in particular, it does not test
|
# a weaker, but pragmatic, check (in particular, it does not test
|
||||||
# the op itself, only its gradient).
|
# the op itself, only its gradient).
|
||||||
input_tensor = constant_op.constant(
|
_, jacob_n = self._computeGradient(np_input, bias, np.float32,
|
||||||
np_input, shape=np_input.shape, dtype=np.float32)
|
data_format)
|
||||||
bias_tensor = constant_op.constant(
|
input_jacob_n, bias_jacob_n, grad_jacob_n = jacob_n
|
||||||
bias, shape=bias.shape, dtype=np.float32)
|
|
||||||
output_tensor = nn_ops.bias_add(
|
|
||||||
input_tensor, bias_tensor, data_format=data_format)
|
|
||||||
_, tensor_jacob_n = gradient_checker.compute_gradient(
|
|
||||||
input_tensor, np_input.shape, output_tensor, np_input.shape)
|
|
||||||
_, bias_jacob_n = gradient_checker.compute_gradient(
|
|
||||||
bias_tensor, bias.shape, output_tensor, np_input.shape)
|
|
||||||
|
|
||||||
bias_add_grad = gradients_impl.gradients(
|
|
||||||
nn_ops.l2_loss(output_tensor), bias_tensor)[0]
|
|
||||||
_, grad_jacob_n = gradient_checker.compute_gradient(
|
|
||||||
output_tensor, np_input.shape, bias_add_grad, bias.shape)
|
|
||||||
|
|
||||||
threshold = 5e-3
|
|
||||||
if dtype == dtypes.float64:
|
if dtype == dtypes.float64:
|
||||||
threshold = 1e-10
|
threshold = 1e-10
|
||||||
self.assertAllClose(tensor_jacob_t, tensor_jacob_n, threshold, threshold)
|
elif np_input.size >= 512:
|
||||||
self.assertAllClose(bias_jacob_t, bias_jacob_n, threshold, threshold)
|
# The 5e-3 threshold seems to have been marginal in these cases, and
|
||||||
self.assertAllClose(grad_jacob_t, grad_jacob_n, threshold, threshold)
|
# small changes in the test were pushing it over the limit.
|
||||||
|
threshold = 5e-2
|
||||||
|
else:
|
||||||
|
threshold = 5e-3
|
||||||
|
self.assertAllClose(input_jacob_a, input_jacob_n, threshold, threshold)
|
||||||
|
self.assertAllClose(bias_jacob_a, bias_jacob_n, threshold, threshold)
|
||||||
|
self.assertAllClose(grad_jacob_a, grad_jacob_n, threshold, threshold)
|
||||||
|
|
||||||
@test_util.run_deprecated_v1
|
|
||||||
def testGradientTensor2D(self):
|
def testGradientTensor2D(self):
|
||||||
for (data_format, use_gpu) in ("NHWC", False), ("NHWC", True):
|
for (data_format, use_gpu) in ("NHWC", False), ("NHWC", True):
|
||||||
for dtype in (dtypes.float16, dtypes.float32, dtypes.float64):
|
for dtype in (dtypes.float16, dtypes.float32, dtypes.float64):
|
||||||
@ -198,7 +242,6 @@ class BiasAddTestBase(test.TestCase):
|
|||||||
bias = np.array([1.3, 2.4], dtype=dtype.as_numpy_dtype)
|
bias = np.array([1.3, 2.4], dtype=dtype.as_numpy_dtype)
|
||||||
self._testGradient(np_input, bias, dtype, data_format, use_gpu)
|
self._testGradient(np_input, bias, dtype, data_format, use_gpu)
|
||||||
|
|
||||||
@test_util.run_deprecated_v1
|
|
||||||
def testGradientTensor3D(self):
|
def testGradientTensor3D(self):
|
||||||
for (data_format, use_gpu) in [("NHWC", False), ("NHWC", True),
|
for (data_format, use_gpu) in [("NHWC", False), ("NHWC", True),
|
||||||
("NCHW", False), ("NCHW", True)]:
|
("NCHW", False), ("NCHW", True)]:
|
||||||
@ -208,7 +251,6 @@ class BiasAddTestBase(test.TestCase):
|
|||||||
bias = np.array([1.3, 2.4], dtype=dtype.as_numpy_dtype)
|
bias = np.array([1.3, 2.4], dtype=dtype.as_numpy_dtype)
|
||||||
self._testGradient(np_input, bias, dtype, data_format, use_gpu)
|
self._testGradient(np_input, bias, dtype, data_format, use_gpu)
|
||||||
|
|
||||||
@test_util.run_deprecated_v1
|
|
||||||
def testGradientTensor4D(self):
|
def testGradientTensor4D(self):
|
||||||
for (data_format, use_gpu) in [("NHWC", False)]:
|
for (data_format, use_gpu) in [("NHWC", False)]:
|
||||||
for dtype in (dtypes.float16, dtypes.float32, dtypes.float64):
|
for dtype in (dtypes.float16, dtypes.float32, dtypes.float64):
|
||||||
@ -230,7 +272,6 @@ class BiasAddTestBase(test.TestCase):
|
|||||||
np.random.rand(64).astype(dtype.as_numpy_dtype),
|
np.random.rand(64).astype(dtype.as_numpy_dtype),
|
||||||
dtype, data_format, use_gpu)
|
dtype, data_format, use_gpu)
|
||||||
|
|
||||||
@test_util.run_deprecated_v1
|
|
||||||
def testGradientTensor5D(self):
|
def testGradientTensor5D(self):
|
||||||
for (data_format, use_gpu) in [("NHWC", False), ("NHWC", True),
|
for (data_format, use_gpu) in [("NHWC", False), ("NHWC", True),
|
||||||
("NCHW", False), ("NCHW", True)]:
|
("NCHW", False), ("NCHW", True)]:
|
||||||
@ -242,13 +283,11 @@ class BiasAddTestBase(test.TestCase):
|
|||||||
bias = np.array([1.3, 2.4], dtype=dtype.as_numpy_dtype)
|
bias = np.array([1.3, 2.4], dtype=dtype.as_numpy_dtype)
|
||||||
self._testGradient(np_input, bias, dtype, data_format, use_gpu)
|
self._testGradient(np_input, bias, dtype, data_format, use_gpu)
|
||||||
|
|
||||||
@test_util.run_deprecated_v1
|
|
||||||
def testEmpty(self):
|
def testEmpty(self):
|
||||||
np.random.seed(7)
|
np.random.seed(7)
|
||||||
for shape in (0, 0), (2, 0), (0, 2), (4, 3, 0), (4, 0, 3), (0, 4, 3):
|
for shape in (0, 0), (2, 0), (0, 2), (4, 3, 0), (4, 0, 3), (0, 4, 3):
|
||||||
self._testAll(np.random.randn(*shape), np.random.randn(shape[-1]))
|
self._testAll(np.random.randn(*shape), np.random.randn(shape[-1]))
|
||||||
|
|
||||||
@test_util.run_deprecated_v1
|
|
||||||
def testEmptyGradient(self):
|
def testEmptyGradient(self):
|
||||||
for (data_format, use_gpu) in ("NHWC", False), ("NHWC", True):
|
for (data_format, use_gpu) in ("NHWC", False), ("NHWC", True):
|
||||||
for shape in (0, 0), (2, 0), (0, 2):
|
for shape in (0, 0), (2, 0), (0, 2):
|
||||||
|
@ -24,6 +24,8 @@ import numpy as np
|
|||||||
|
|
||||||
from absl.testing import parameterized
|
from absl.testing import parameterized
|
||||||
|
|
||||||
|
from tensorflow.python.eager import backprop
|
||||||
|
from tensorflow.python.eager import context
|
||||||
from tensorflow.python.framework import constant_op
|
from tensorflow.python.framework import constant_op
|
||||||
from tensorflow.python.framework import dtypes
|
from tensorflow.python.framework import dtypes
|
||||||
from tensorflow.python.framework import test_util
|
from tensorflow.python.framework import test_util
|
||||||
@ -37,8 +39,8 @@ from tensorflow.python.platform import test
|
|||||||
class BiasAddDeterministicTest(bias_op_base.BiasAddTestBase,
|
class BiasAddDeterministicTest(bias_op_base.BiasAddTestBase,
|
||||||
parameterized.TestCase):
|
parameterized.TestCase):
|
||||||
|
|
||||||
def _make_shape_tuple(self, batch_size, channel_count, data_rank, data_dim,
|
def _makeShapeTuple(self, batch_size, channel_count, data_rank, data_dim,
|
||||||
data_layout):
|
data_layout):
|
||||||
data_dims = data_rank * (data_dim,)
|
data_dims = data_rank * (data_dim,)
|
||||||
if data_layout == 'channels_first':
|
if data_layout == 'channels_first':
|
||||||
shape = (batch_size,) + (channel_count,) + data_dims
|
shape = (batch_size,) + (channel_count,) + data_dims
|
||||||
@ -48,7 +50,7 @@ class BiasAddDeterministicTest(bias_op_base.BiasAddTestBase,
|
|||||||
raise ValueError('Unknown data format')
|
raise ValueError('Unknown data format')
|
||||||
return shape
|
return shape
|
||||||
|
|
||||||
def _data_format_from_data_layout(self, data_layout=None):
|
def _dataFormatFromDataLayout(self, data_layout=None):
|
||||||
if data_layout == 'channels_first':
|
if data_layout == 'channels_first':
|
||||||
return 'NCHW'
|
return 'NCHW'
|
||||||
elif data_layout == 'channels_last':
|
elif data_layout == 'channels_last':
|
||||||
@ -56,59 +58,82 @@ class BiasAddDeterministicTest(bias_op_base.BiasAddTestBase,
|
|||||||
else:
|
else:
|
||||||
raise ValueError('Unknown data_layout')
|
raise ValueError('Unknown data_layout')
|
||||||
|
|
||||||
def _random_data_op(self, shape, data_type):
|
def _randomNDArray(self, shape):
|
||||||
return constant_op.constant(
|
|
||||||
2 * np.random.random_sample(shape) - 1, dtype=data_type)
|
|
||||||
|
|
||||||
def _random_ndarray(self, shape):
|
|
||||||
return 2 * np.random.random_sample(shape) - 1
|
return 2 * np.random.random_sample(shape) - 1
|
||||||
|
|
||||||
def _assert_reproducible(self, operation, feed_dict={}):
|
def _randomDataOp(self, shape, data_type):
|
||||||
with self.cached_session(force_gpu=True):
|
return constant_op.constant(self._randomNDArray(shape), dtype=data_type)
|
||||||
result_a = operation[0].eval(feed_dict=feed_dict)
|
|
||||||
result_b = operation[0].eval(feed_dict=feed_dict)
|
|
||||||
self.assertAllEqual(result_a, result_b)
|
|
||||||
|
|
||||||
# TODO(duncanriach): add test coverage for deterministic gradients
|
|
||||||
# in eager mode
|
|
||||||
@parameterized.named_parameters(
|
@parameterized.named_parameters(
|
||||||
*test_util.generate_combinations_with_testcase_name(
|
*test_util.generate_combinations_with_testcase_name(
|
||||||
|
# With the selected layer configuration, at least in TensorFlow
|
||||||
|
# version 2.0, when data_layout='channels_last', bias_add operates
|
||||||
|
# deterministically by default. I don't know if this is true for
|
||||||
|
# all layer configurations. These cases are still being tested here,
|
||||||
|
# for completeness.
|
||||||
data_layout=['channels_first', 'channels_last'],
|
data_layout=['channels_first', 'channels_last'],
|
||||||
data_rank=[1, 2, 3],
|
data_rank=[1, 2, 3],
|
||||||
data_type=[dtypes.float16, dtypes.float32, dtypes.float64]))
|
data_type=[dtypes.float16, dtypes.float32, dtypes.float64]))
|
||||||
@test_util.run_deprecated_v1
|
@test_util.run_in_graph_and_eager_modes
|
||||||
@test_util.run_cuda_only
|
@test_util.run_cuda_only
|
||||||
def testDeterministicGradients(self, data_layout, data_rank, data_type):
|
def testDeterministicGradients(self, data_layout, data_rank, data_type):
|
||||||
seed = (
|
with self.session(force_gpu=True):
|
||||||
hash(data_layout) % 256 + hash(data_rank) % 256 + hash(data_type) % 256)
|
# Using a cached_session with force_gpu=True does not work at the time
|
||||||
np.random.seed(seed)
|
# of writing (2019-12-10). Before the @parameterized.named_parameters
|
||||||
batch_size = 10
|
# decorator was added, this non-cached session context was set outside
|
||||||
channel_count = 8
|
# the iteration loops for the parameter combinations, and so was re-used.
|
||||||
data_dim = 14
|
seed = (
|
||||||
in_shape = self._make_shape_tuple(batch_size, channel_count, data_rank,
|
hash(data_layout) % 256 + hash(data_rank) % 256 +
|
||||||
data_dim, data_layout)
|
hash(data_type) % 256)
|
||||||
bias_shape = (channel_count,)
|
np.random.seed(seed)
|
||||||
out_shape = in_shape
|
batch_size = 10
|
||||||
in_op = self._random_data_op(in_shape, data_type)
|
channel_count = 8
|
||||||
bias_op = self._random_data_op(bias_shape, data_type)
|
data_dim = 14
|
||||||
data_format = self._data_format_from_data_layout(data_layout)
|
input_shape = self._makeShapeTuple(batch_size, channel_count, data_rank,
|
||||||
bias_add_op = nn_ops.bias_add(in_op, bias_op, data_format=data_format)
|
data_dim, data_layout)
|
||||||
upstream_gradients = array_ops.placeholder(
|
bias_shape = (channel_count,)
|
||||||
data_type, shape=out_shape, name='upstream_gradients')
|
output_shape = input_shape
|
||||||
gradient_injector_op = bias_add_op * upstream_gradients
|
input_val = self._randomDataOp(input_shape, data_type)
|
||||||
# The gradient function behaves as if grad_ys is multiplied by the op
|
bias_val = self._randomDataOp(bias_shape, data_type)
|
||||||
# gradient result, not passing the upstram gradients through the op's
|
data_format = self._dataFormatFromDataLayout(data_layout)
|
||||||
# gradient generation graph. This is the reason for using the
|
repeat_count = 5
|
||||||
# gradient_injector_op
|
if context.executing_eagerly():
|
||||||
grad_ys = None
|
|
||||||
bias_gradients_op = gradients_impl.gradients(
|
def bias_gradients(local_seed):
|
||||||
gradient_injector_op,
|
np.random.seed(local_seed)
|
||||||
bias_op,
|
upstream_gradients = self._randomDataOp(output_shape, data_type)
|
||||||
grad_ys=grad_ys,
|
with backprop.GradientTape(persistent=True) as tape:
|
||||||
colocate_gradients_with_ops=True)
|
tape.watch(bias_val)
|
||||||
for i in range(5):
|
bias_add_output = nn_ops.bias_add(
|
||||||
feed_dict = {upstream_gradients: self._random_ndarray(out_shape)}
|
input_val, bias_val, data_format=data_format)
|
||||||
self._assert_reproducible(bias_gradients_op, feed_dict=feed_dict)
|
gradient_injector_output = bias_add_output * upstream_gradients
|
||||||
|
return tape.gradient(gradient_injector_output, bias_val)
|
||||||
|
|
||||||
|
for i in range(repeat_count):
|
||||||
|
local_seed = seed + i # select different upstream gradients
|
||||||
|
result_a = bias_gradients(local_seed)
|
||||||
|
result_b = bias_gradients(local_seed)
|
||||||
|
self.assertAllEqual(result_a, result_b)
|
||||||
|
else: # graph mode
|
||||||
|
upstream_gradients = array_ops.placeholder(
|
||||||
|
data_type, shape=output_shape, name='upstream_gradients')
|
||||||
|
bias_add_output = nn_ops.bias_add(
|
||||||
|
input_val, bias_val, data_format=data_format)
|
||||||
|
gradient_injector_output = bias_add_output * upstream_gradients
|
||||||
|
# The gradient function behaves as if grad_ys is multiplied by the op
|
||||||
|
# gradient result, not passing the upstram gradients through the op's
|
||||||
|
# gradient generation graph. This is the reason for using the
|
||||||
|
# gradient injector
|
||||||
|
bias_gradients = gradients_impl.gradients(
|
||||||
|
gradient_injector_output,
|
||||||
|
bias_val,
|
||||||
|
grad_ys=None,
|
||||||
|
colocate_gradients_with_ops=True)[0]
|
||||||
|
for i in range(repeat_count):
|
||||||
|
feed_dict = {upstream_gradients: self._randomNDArray(output_shape)}
|
||||||
|
result_a = bias_gradients.eval(feed_dict=feed_dict)
|
||||||
|
result_b = bias_gradients.eval(feed_dict=feed_dict)
|
||||||
|
self.assertAllEqual(result_a, result_b)
|
||||||
|
|
||||||
# TODO(duncanriach): Re-enable the following three tests for the error checks
|
# TODO(duncanriach): Re-enable the following three tests for the error checks
|
||||||
# after deterministic functionality is implemented at the CUDA kernel level.
|
# after deterministic functionality is implemented at the CUDA kernel level.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user