Flush denormals to zero in eager mode.

PiperOrigin-RevId: 311364546 Change-Id: I42efa6b19b8193c49bc581879b04ce3d05a13607
2020-05-13 11:06:08 -07:00 · 2020-05-13 11:06:08 -07:00 · b97bf5ae0b
commit b97bf5ae0b
parent 88c4ee0102
2 changed files with 19 additions and 18 deletions
--- a/tensorflow/core/common_runtime/eager/kernel_and_device.cc
+++ b/tensorflow/core/common_runtime/eager/kernel_and_device.cc
@ -35,8 +35,10 @@ limitations under the License.
 #include "tensorflow/core/lib/gtl/cleanup.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/platform/denormal.h"
 #include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/fingerprint.h"
+#include "tensorflow/core/platform/setround.h"
 #include "tensorflow/core/profiler/lib/annotated_traceme.h"
 #include "tensorflow/core/profiler/lib/traceme.h"
 #include "tensorflow/core/public/version.h"
@ -281,6 +283,8 @@ Status KernelAndDeviceOp::Run(
  OpKernelContext context(&params);

  {
+    port::ScopedFlushDenormal flush;
+    port::ScopedSetRound round(FE_TONEAREST);
    // 'AnnotatedTraceMe' will trace both scheduling time on host and execution
    // time on device of the OpKernel.
    profiler::AnnotatedTraceMe activity(
--- a/tensorflow/python/kernel_tests/denormal_test.py
+++ b/tensorflow/python/kernel_tests/denormal_test.py
@ -23,7 +23,6 @@ import platform

 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import test_util
-from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test


@ -35,32 +34,30 @@ class DenormalTest(test.TestCase):
      tiny = np.finfo(dtype).tiny
      self.assertEqual(tiny, tiny / 16 * 16)

-  def _flushDenormalsTest(self, use_gpu, dtypes):
-    if platform.machine() == "ppc64le" or platform.machine(
-    ) == "s390x" or platform.machine() == "aarch64":
+  def _flushDenormalsTest(self, dtypes):
+    if (platform.machine() == "ppc64le" or platform.machine() == "s390x" or
+        platform.machine() == "aarch64"):
      # Disabled denormal_test on power/s390x/aarch64 platform
      # Check relevant discussion - https://github.com/tensorflow/tensorflow/issues/11902
      return
-    with self.cached_session(use_gpu=use_gpu):
-      array_ops.identity(7).eval()
-      for dtype in dtypes:
-        tiny = np.finfo(dtype).tiny
-        # Small shape to test main thread, large shape to test thread pool
-        for shape in (), (1 << 20,):
-          flush = 0.1 * constant_op.constant(tiny, shape=shape)
-          self.assertAllEqual(flush.eval(), np.zeros(shape))
-          # Make sure the flags don't leak out
-          self.testPythonHasDenormals()
+    for dtype in dtypes:
+      tiny = np.finfo(dtype).tiny
+      # Small shape to test main thread, large shape to test thread pool
+      for shape in (), (1 << 20,):
+        flush = 0.1 * constant_op.constant(tiny, shape=shape)
+        self.assertAllEqual(self.evaluate(flush), np.zeros(shape))
+        # Make sure the flags don't leak out
+        self.testPythonHasDenormals()

-  @test_util.run_deprecated_v1
+  @test_util.run_in_graph_and_eager_modes(use_gpu=False)
  def testFlushDenormalsCPU(self):
    # On CPUs, the processor flags flush for both single and double precision.
-    self._flushDenormalsTest(use_gpu=False, dtypes=(np.float32, np.float64))
+    self._flushDenormalsTest(dtypes=(np.float32, np.float64))

-  @test_util.run_deprecated_v1
+  @test_util.run_in_graph_and_eager_modes(use_gpu=True)
  def testFlushDenormalsGPU(self):
    # On GPUs, only single precision can flush to zero.
-    self._flushDenormalsTest(use_gpu=True, dtypes=(np.float32,))
+    self._flushDenormalsTest(dtypes=(np.float32,))


 if __name__ == "__main__":