Flush denormals to zero in eager mode.

PiperOrigin-RevId: 311251058
Change-Id: I6ddca2fabc904e8e7400735aaddef361ba0b8778
This commit is contained in:
A. Unique TensorFlower 2020-05-12 19:45:42 -07:00 committed by TensorFlower Gardener
parent 7bffd6c498
commit f690a054c5
2 changed files with 18 additions and 19 deletions

View File

@ -35,10 +35,8 @@ limitations under the License.
#include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/lib/gtl/cleanup.h"
#include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/lib/gtl/map_util.h"
#include "tensorflow/core/lib/random/random.h" #include "tensorflow/core/lib/random/random.h"
#include "tensorflow/core/platform/denormal.h"
#include "tensorflow/core/platform/errors.h" #include "tensorflow/core/platform/errors.h"
#include "tensorflow/core/platform/fingerprint.h" #include "tensorflow/core/platform/fingerprint.h"
#include "tensorflow/core/platform/setround.h"
#include "tensorflow/core/profiler/lib/annotated_traceme.h" #include "tensorflow/core/profiler/lib/annotated_traceme.h"
#include "tensorflow/core/profiler/lib/traceme.h" #include "tensorflow/core/profiler/lib/traceme.h"
#include "tensorflow/core/public/version.h" #include "tensorflow/core/public/version.h"
@ -283,8 +281,6 @@ Status KernelAndDeviceOp::Run(
OpKernelContext context(&params); OpKernelContext context(&params);
{ {
port::ScopedFlushDenormal flush;
port::ScopedSetRound round(FE_TONEAREST);
// 'AnnotatedTraceMe' will trace both scheduling time on host and execution // 'AnnotatedTraceMe' will trace both scheduling time on host and execution
// time on device of the OpKernel. // time on device of the OpKernel.
profiler::AnnotatedTraceMe activity( profiler::AnnotatedTraceMe activity(

View File

@ -23,6 +23,7 @@ import platform
from tensorflow.python.framework import constant_op from tensorflow.python.framework import constant_op
from tensorflow.python.framework import test_util from tensorflow.python.framework import test_util
from tensorflow.python.ops import array_ops
from tensorflow.python.platform import test from tensorflow.python.platform import test
@ -34,30 +35,32 @@ class DenormalTest(test.TestCase):
tiny = np.finfo(dtype).tiny tiny = np.finfo(dtype).tiny
self.assertEqual(tiny, tiny / 16 * 16) self.assertEqual(tiny, tiny / 16 * 16)
def _flushDenormalsTest(self, dtypes): def _flushDenormalsTest(self, use_gpu, dtypes):
if (platform.machine() == "ppc64le" or platform.machine() == "s390x" or if platform.machine() == "ppc64le" or platform.machine(
platform.machine() == "aarch64"): ) == "s390x" or platform.machine() == "aarch64":
# Disabled denormal_test on power/s390x/aarch64 platform # Disabled denormal_test on power/s390x/aarch64 platform
# Check relevant discussion - https://github.com/tensorflow/tensorflow/issues/11902 # Check relevant discussion - https://github.com/tensorflow/tensorflow/issues/11902
return return
for dtype in dtypes: with self.cached_session(use_gpu=use_gpu):
tiny = np.finfo(dtype).tiny array_ops.identity(7).eval()
# Small shape to test main thread, large shape to test thread pool for dtype in dtypes:
for shape in (), (1 << 20,): tiny = np.finfo(dtype).tiny
flush = 0.1 * constant_op.constant(tiny, shape=shape) # Small shape to test main thread, large shape to test thread pool
self.assertAllEqual(self.evaluate(flush), np.zeros(shape)) for shape in (), (1 << 20,):
# Make sure the flags don't leak out flush = 0.1 * constant_op.constant(tiny, shape=shape)
self.testPythonHasDenormals() self.assertAllEqual(flush.eval(), np.zeros(shape))
# Make sure the flags don't leak out
self.testPythonHasDenormals()
@test_util.run_in_graph_and_eager_modes(use_gpu=False) @test_util.run_deprecated_v1
def testFlushDenormalsCPU(self): def testFlushDenormalsCPU(self):
# On CPUs, the processor flags flush for both single and double precision. # On CPUs, the processor flags flush for both single and double precision.
self._flushDenormalsTest(dtypes=(np.float32, np.float64)) self._flushDenormalsTest(use_gpu=False, dtypes=(np.float32, np.float64))
@test_util.run_in_graph_and_eager_modes(use_gpu=True) @test_util.run_deprecated_v1
def testFlushDenormalsGPU(self): def testFlushDenormalsGPU(self):
# On GPUs, only single precision can flush to zero. # On GPUs, only single precision can flush to zero.
self._flushDenormalsTest(dtypes=(np.float32,)) self._flushDenormalsTest(use_gpu=True, dtypes=(np.float32,))
if __name__ == "__main__": if __name__ == "__main__":