diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device.cc b/tensorflow/core/common_runtime/eager/kernel_and_device.cc index 3c586e8188a..bf7c083f24b 100644 --- a/tensorflow/core/common_runtime/eager/kernel_and_device.cc +++ b/tensorflow/core/common_runtime/eager/kernel_and_device.cc @@ -35,8 +35,10 @@ limitations under the License. #include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/lib/random/random.h" +#include "tensorflow/core/platform/denormal.h" #include "tensorflow/core/platform/errors.h" #include "tensorflow/core/platform/fingerprint.h" +#include "tensorflow/core/platform/setround.h" #include "tensorflow/core/profiler/lib/annotated_traceme.h" #include "tensorflow/core/profiler/lib/traceme.h" #include "tensorflow/core/public/version.h" @@ -281,6 +283,8 @@ Status KernelAndDeviceOp::Run( OpKernelContext context(¶ms); { + port::ScopedFlushDenormal flush; + port::ScopedSetRound round(FE_TONEAREST); // 'AnnotatedTraceMe' will trace both scheduling time on host and execution // time on device of the OpKernel. profiler::AnnotatedTraceMe activity( diff --git a/tensorflow/python/kernel_tests/denormal_test.py b/tensorflow/python/kernel_tests/denormal_test.py index d824e95f213..6e073f0d526 100644 --- a/tensorflow/python/kernel_tests/denormal_test.py +++ b/tensorflow/python/kernel_tests/denormal_test.py @@ -23,7 +23,6 @@ import platform from tensorflow.python.framework import constant_op from tensorflow.python.framework import test_util -from tensorflow.python.ops import array_ops from tensorflow.python.platform import test @@ -35,32 +34,30 @@ class DenormalTest(test.TestCase): tiny = np.finfo(dtype).tiny self.assertEqual(tiny, tiny / 16 * 16) - def _flushDenormalsTest(self, use_gpu, dtypes): - if platform.machine() == "ppc64le" or platform.machine( - ) == "s390x" or platform.machine() == "aarch64": + def _flushDenormalsTest(self, dtypes): + if (platform.machine() == "ppc64le" or platform.machine() == "s390x" or + platform.machine() == "aarch64"): # Disabled denormal_test on power/s390x/aarch64 platform # Check relevant discussion - https://github.com/tensorflow/tensorflow/issues/11902 return - with self.cached_session(use_gpu=use_gpu): - array_ops.identity(7).eval() - for dtype in dtypes: - tiny = np.finfo(dtype).tiny - # Small shape to test main thread, large shape to test thread pool - for shape in (), (1 << 20,): - flush = 0.1 * constant_op.constant(tiny, shape=shape) - self.assertAllEqual(flush.eval(), np.zeros(shape)) - # Make sure the flags don't leak out - self.testPythonHasDenormals() + for dtype in dtypes: + tiny = np.finfo(dtype).tiny + # Small shape to test main thread, large shape to test thread pool + for shape in (), (1 << 20,): + flush = 0.1 * constant_op.constant(tiny, shape=shape) + self.assertAllEqual(self.evaluate(flush), np.zeros(shape)) + # Make sure the flags don't leak out + self.testPythonHasDenormals() - @test_util.run_deprecated_v1 + @test_util.run_in_graph_and_eager_modes(use_gpu=False) def testFlushDenormalsCPU(self): # On CPUs, the processor flags flush for both single and double precision. - self._flushDenormalsTest(use_gpu=False, dtypes=(np.float32, np.float64)) + self._flushDenormalsTest(dtypes=(np.float32, np.float64)) - @test_util.run_deprecated_v1 + @test_util.run_in_graph_and_eager_modes(use_gpu=True) def testFlushDenormalsGPU(self): # On GPUs, only single precision can flush to zero. - self._flushDenormalsTest(use_gpu=True, dtypes=(np.float32,)) + self._flushDenormalsTest(dtypes=(np.float32,)) if __name__ == "__main__":