Flush denormals to zero in eager mode.

PiperOrigin-RevId: 311251058 Change-Id: I6ddca2fabc904e8e7400735aaddef361ba0b8778
2020-05-12 19:45:42 -07:00 · 2020-05-12 19:45:42 -07:00 · f690a054c5
commit f690a054c5
parent 7bffd6c498
2 changed files with 18 additions and 19 deletions
--- a/tensorflow/core/common_runtime/eager/kernel_and_device.cc
+++ b/tensorflow/core/common_runtime/eager/kernel_and_device.cc
@ -35,10 +35,8 @@ limitations under the License.
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/platform/denormal.h"
 #include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/fingerprint.h"
 #include "tensorflow/core/platform/setround.h"
 #include "tensorflow/core/profiler/lib/annotated_traceme.h"
 #include "tensorflow/core/profiler/lib/traceme.h"
 #include "tensorflow/core/public/version.h"
@ -283,8 +281,6 @@ Status KernelAndDeviceOp::Run(
  OpKernelContext context(&params);
  {
    port::ScopedFlushDenormal flush;
    port::ScopedSetRound round(FE_TONEAREST);
    // 'AnnotatedTraceMe' will trace both scheduling time on host and execution
    // time on device of the OpKernel.
    profiler::AnnotatedTraceMe activity(
--- a/tensorflow/python/kernel_tests/denormal_test.py
+++ b/tensorflow/python/kernel_tests/denormal_test.py
@ -23,6 +23,7 @@ import platform
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
@ -34,30 +35,32 @@ class DenormalTest(test.TestCase):
      tiny = np.finfo(dtype).tiny
      self.assertEqual(tiny, tiny / 16 * 16)
-  def _flushDenormalsTest(self, dtypes):
+  def _flushDenormalsTest(self, use_gpu, dtypes):
-    if (platform.machine() == "ppc64le" or platform.machine() == "s390x" or
+    if platform.machine() == "ppc64le" or platform.machine(
-        platform.machine() == "aarch64"):
+    ) == "s390x" or platform.machine() == "aarch64":
      # Disabled denormal_test on power/s390x/aarch64 platform
      # Check relevant discussion - https://github.com/tensorflow/tensorflow/issues/11902
      return
-    for dtype in dtypes:
+    with self.cached_session(use_gpu=use_gpu):
-      tiny = np.finfo(dtype).tiny
+      array_ops.identity(7).eval()
-      # Small shape to test main thread, large shape to test thread pool
+      for dtype in dtypes:
-      for shape in (), (1 << 20,):
+        tiny = np.finfo(dtype).tiny
-        flush = 0.1 * constant_op.constant(tiny, shape=shape)
+        # Small shape to test main thread, large shape to test thread pool
-        self.assertAllEqual(self.evaluate(flush), np.zeros(shape))
+        for shape in (), (1 << 20,):
-        # Make sure the flags don't leak out
+          flush = 0.1 * constant_op.constant(tiny, shape=shape)
-        self.testPythonHasDenormals()
+          self.assertAllEqual(flush.eval(), np.zeros(shape))
          # Make sure the flags don't leak out
          self.testPythonHasDenormals()
-  @test_util.run_in_graph_and_eager_modes(use_gpu=False)
+  @test_util.run_deprecated_v1
  def testFlushDenormalsCPU(self):
    # On CPUs, the processor flags flush for both single and double precision.
-    self._flushDenormalsTest(dtypes=(np.float32, np.float64))
+    self._flushDenormalsTest(use_gpu=False, dtypes=(np.float32, np.float64))
-  @test_util.run_in_graph_and_eager_modes(use_gpu=True)
+  @test_util.run_deprecated_v1
  def testFlushDenormalsGPU(self):
    # On GPUs, only single precision can flush to zero.
-    self._flushDenormalsTest(dtypes=(np.float32,))
+    self._flushDenormalsTest(use_gpu=True, dtypes=(np.float32,))
 if __name__ == "__main__":