Merge pull request #46300 from nluehr:tf32-fixes

PiperOrigin-RevId: 356348412 Change-Id: I7df8f2d191fede578d54db4ec38b7a6a6ae11865
2021-02-08 14:08:55 -08:00 · 2021-02-08 14:08:55 -08:00 · 0092ebe4c7
commit 0092ebe4c7
parent bdd28bfb0c 89ddedbb40
7 changed files with 22 additions and 0 deletions
--- a/RELEASE.md
+++ b/RELEASE.md
@ -106,6 +106,8 @@
    * Added `tf.config.experimental.get_memory_info`, returning a dict with the
      current and peak memory usage. Deprecated 
      `tf.config.experimental.get_memory_usage` in favor of this new function.
+    *   Extended `tf.config.experimental.enable_tensor_float_32_execution` to
+        control Tensor-Float-32 evaluation in RNNs.

 *   `tf.summary`:
  *   New `tf.summary.graph` allows manual write of TensorFlow graph
--- a/tensorflow/c/eager/BUILD
+++ b/tensorflow/c/eager/BUILD
@ -428,6 +428,7 @@ tf_cuda_cc_test(
        "//tensorflow/c/experimental/ops",
        "//tensorflow/core:test",
        "//tensorflow/core:test_main",
+        "//tensorflow/core/platform:tensor_float_32_utils",
        "@com_google_absl//absl/types:span",
    ],
 )
--- a/tensorflow/c/eager/gradient_checker_test.cc
+++ b/tensorflow/c/eager/gradient_checker_test.cc
@ -20,6 +20,7 @@ limitations under the License.
 #include "tensorflow/c/experimental/ops/math_ops.h"
 #include "tensorflow/c/tf_status_helper.h"
 #include "tensorflow/c/tf_tensor.h"
+#include "tensorflow/core/platform/tensor_float_32_utils.h"
 #include "tensorflow/core/platform/test.h"

 namespace tensorflow {
@ -96,6 +97,11 @@ class GradientCheckerTest
      ASSERT_EQ(errors::OK, s.code()) << s.error_message();
      ctx_.reset(ctx_raw);
    }
+
+    // Computing numerical gradients with TensorFloat-32 is numerically
+    // unstable. Some forward pass tests also fail with TensorFloat-32 due to
+    // low tolerances
+    enable_tensor_float_32_execution(false);
  }

  AbstractContextPtr ctx_;
--- a/tensorflow/compiler/xla/client/lib/BUILD
+++ b/tensorflow/compiler/xla/client/lib/BUILD
@ -538,6 +538,7 @@ xla_test(
        "//tensorflow/compiler/xla/tests:test_macros_header",
        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
        "//tensorflow/core:test",
+        "//tensorflow/core/platform:tensor_float_32_utils",
    ],
 )

--- a/tensorflow/compiler/xla/client/lib/svd_test.cc
+++ b/tensorflow/compiler/xla/client/lib/svd_test.cc
@ -33,6 +33,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/tests/test_macros.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/tensor_float_32_utils.h"

 namespace xla {

@ -54,6 +55,9 @@ class SVDTest : public ClientLibraryTestBase {
            {12, 48, 6, 62, 3},
        },
    };
+
+    // Test fails with TensorFloat-32 enabled
+    tensorflow::enable_tensor_float_32_execution(false);
  }
  void TearDown() override { ClientLibraryTestBase::TearDown(); }

--- a/tensorflow/python/keras/layers/gru_v2_test.py
+++ b/tensorflow/python/keras/layers/gru_v2_test.py
@ -59,6 +59,7 @@ _graph_options = config_pb2.GraphOptions(rewrite_options=_rewrites)
 _config = config_pb2.ConfigProto(graph_options=_graph_options)


+@testing_utils.run_all_without_tensor_float_32('RNN GRU can use TF32 on GPU')
@keras_parameterized.run_all_keras_modes(config=_config)
 class GRUV2Test(keras_parameterized.TestCase):

@ -667,6 +668,7 @@ class GRUV2Test(keras_parameterized.TestCase):
    self.assertAllClose(self.evaluate(outputs), self.evaluate(copied_outputs))


+@testing_utils.run_all_without_tensor_float_32('RNN GRU can use TF32 on GPU')
 class GRULayerGradientTapeTest(keras_parameterized.TestCase):

  @combinations.generate(combinations.combine(mode=['eager']))
@ -694,6 +696,7 @@ class GRULayerGradientTapeTest(keras_parameterized.TestCase):
      tape.gradient(loss, gru.variables)


+@testing_utils.run_all_without_tensor_float_32('RNN GRU can use TF32 on GPU')
@keras_parameterized.run_all_keras_modes(config=_config)
 class GRUGraphRewriteTest(keras_parameterized.TestCase):

--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@ -1123,8 +1123,13 @@ class CudnnRnnDescriptor : public dnn::RnnDescriptor {
                          "Algo requests disallowed tensor op evaluation.");
    }

+#if CUDNN_VERSION >= 8000
+    cudnnMathType_t math_type =
+        use_tensor_ops ? CUDNN_TENSOR_OP_MATH : CUDNN_FMA_MATH;
+#else
    cudnnMathType_t math_type =
        use_tensor_ops ? CUDNN_TENSOR_OP_MATH : CUDNN_DEFAULT_MATH;
+#endif

 #if CUDNN_VERSION >= 8000
    cudnnRNNBiasMode_t bias_mode = CUDNN_RNN_DOUBLE_BIAS;