diff --git a/RELEASE.md b/RELEASE.md index e8ec9067fb4..8da2b277a0f 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -106,6 +106,8 @@ * Added `tf.config.experimental.get_memory_info`, returning a dict with the current and peak memory usage. Deprecated `tf.config.experimental.get_memory_usage` in favor of this new function. + * Extended `tf.config.experimental.enable_tensor_float_32_execution` to + control Tensor-Float-32 evaluation in RNNs. * `tf.summary`: * New `tf.summary.graph` allows manual write of TensorFlow graph diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index 700912f3eff..cf135b03273 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -428,6 +428,7 @@ tf_cuda_cc_test( "//tensorflow/c/experimental/ops", "//tensorflow/core:test", "//tensorflow/core:test_main", + "//tensorflow/core/platform:tensor_float_32_utils", "@com_google_absl//absl/types:span", ], ) diff --git a/tensorflow/c/eager/gradient_checker_test.cc b/tensorflow/c/eager/gradient_checker_test.cc index 068051585d5..0882172dd48 100644 --- a/tensorflow/c/eager/gradient_checker_test.cc +++ b/tensorflow/c/eager/gradient_checker_test.cc @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/c/experimental/ops/math_ops.h" #include "tensorflow/c/tf_status_helper.h" #include "tensorflow/c/tf_tensor.h" +#include "tensorflow/core/platform/tensor_float_32_utils.h" #include "tensorflow/core/platform/test.h" namespace tensorflow { @@ -96,6 +97,11 @@ class GradientCheckerTest ASSERT_EQ(errors::OK, s.code()) << s.error_message(); ctx_.reset(ctx_raw); } + + // Computing numerical gradients with TensorFloat-32 is numerically + // unstable. Some forward pass tests also fail with TensorFloat-32 due to + // low tolerances + enable_tensor_float_32_execution(false); } AbstractContextPtr ctx_; diff --git a/tensorflow/compiler/xla/client/lib/BUILD b/tensorflow/compiler/xla/client/lib/BUILD index 01d74fae1c3..2c14a39c4ae 100644 --- a/tensorflow/compiler/xla/client/lib/BUILD +++ b/tensorflow/compiler/xla/client/lib/BUILD @@ -538,6 +538,7 @@ xla_test( "//tensorflow/compiler/xla/tests:test_macros_header", "//tensorflow/compiler/xla/tests:xla_internal_test_main", "//tensorflow/core:test", + "//tensorflow/core/platform:tensor_float_32_utils", ], ) diff --git a/tensorflow/compiler/xla/client/lib/svd_test.cc b/tensorflow/compiler/xla/client/lib/svd_test.cc index a39238548fc..2b00735a3d0 100644 --- a/tensorflow/compiler/xla/client/lib/svd_test.cc +++ b/tensorflow/compiler/xla/client/lib/svd_test.cc @@ -33,6 +33,7 @@ limitations under the License. #include "tensorflow/compiler/xla/tests/test_macros.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/tensor_float_32_utils.h" namespace xla { @@ -54,6 +55,9 @@ class SVDTest : public ClientLibraryTestBase { {12, 48, 6, 62, 3}, }, }; + + // Test fails with TensorFloat-32 enabled + tensorflow::enable_tensor_float_32_execution(false); } void TearDown() override { ClientLibraryTestBase::TearDown(); } diff --git a/tensorflow/python/keras/layers/gru_v2_test.py b/tensorflow/python/keras/layers/gru_v2_test.py index 80776fab40c..589a3d581b1 100644 --- a/tensorflow/python/keras/layers/gru_v2_test.py +++ b/tensorflow/python/keras/layers/gru_v2_test.py @@ -59,6 +59,7 @@ _graph_options = config_pb2.GraphOptions(rewrite_options=_rewrites) _config = config_pb2.ConfigProto(graph_options=_graph_options) +@testing_utils.run_all_without_tensor_float_32('RNN GRU can use TF32 on GPU') @keras_parameterized.run_all_keras_modes(config=_config) class GRUV2Test(keras_parameterized.TestCase): @@ -667,6 +668,7 @@ class GRUV2Test(keras_parameterized.TestCase): self.assertAllClose(self.evaluate(outputs), self.evaluate(copied_outputs)) +@testing_utils.run_all_without_tensor_float_32('RNN GRU can use TF32 on GPU') class GRULayerGradientTapeTest(keras_parameterized.TestCase): @combinations.generate(combinations.combine(mode=['eager'])) @@ -694,6 +696,7 @@ class GRULayerGradientTapeTest(keras_parameterized.TestCase): tape.gradient(loss, gru.variables) +@testing_utils.run_all_without_tensor_float_32('RNN GRU can use TF32 on GPU') @keras_parameterized.run_all_keras_modes(config=_config) class GRUGraphRewriteTest(keras_parameterized.TestCase): diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index d3703e5ef67..01113f89f5e 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -1123,8 +1123,13 @@ class CudnnRnnDescriptor : public dnn::RnnDescriptor { "Algo requests disallowed tensor op evaluation."); } +#if CUDNN_VERSION >= 8000 + cudnnMathType_t math_type = + use_tensor_ops ? CUDNN_TENSOR_OP_MATH : CUDNN_FMA_MATH; +#else cudnnMathType_t math_type = use_tensor_ops ? CUDNN_TENSOR_OP_MATH : CUDNN_DEFAULT_MATH; +#endif #if CUDNN_VERSION >= 8000 cudnnRNNBiasMode_t bias_mode = CUDNN_RNN_DOUBLE_BIAS;