Flush denormals to zero in eager mode.

PiperOrigin-RevId: 311364546
Change-Id: I42efa6b19b8193c49bc581879b04ce3d05a13607
This commit is contained in:
A. Unique TensorFlower 2020-05-13 11:06:08 -07:00 committed by TensorFlower Gardener
parent 88c4ee0102
commit b97bf5ae0b
2 changed files with 19 additions and 18 deletions

View File

@ -35,8 +35,10 @@ limitations under the License.
#include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/lib/gtl/cleanup.h"
#include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/lib/gtl/map_util.h"
#include "tensorflow/core/lib/random/random.h" #include "tensorflow/core/lib/random/random.h"
#include "tensorflow/core/platform/denormal.h"
#include "tensorflow/core/platform/errors.h" #include "tensorflow/core/platform/errors.h"
#include "tensorflow/core/platform/fingerprint.h" #include "tensorflow/core/platform/fingerprint.h"
#include "tensorflow/core/platform/setround.h"
#include "tensorflow/core/profiler/lib/annotated_traceme.h" #include "tensorflow/core/profiler/lib/annotated_traceme.h"
#include "tensorflow/core/profiler/lib/traceme.h" #include "tensorflow/core/profiler/lib/traceme.h"
#include "tensorflow/core/public/version.h" #include "tensorflow/core/public/version.h"
@ -281,6 +283,8 @@ Status KernelAndDeviceOp::Run(
OpKernelContext context(&params); OpKernelContext context(&params);
{ {
port::ScopedFlushDenormal flush;
port::ScopedSetRound round(FE_TONEAREST);
// 'AnnotatedTraceMe' will trace both scheduling time on host and execution // 'AnnotatedTraceMe' will trace both scheduling time on host and execution
// time on device of the OpKernel. // time on device of the OpKernel.
profiler::AnnotatedTraceMe activity( profiler::AnnotatedTraceMe activity(

View File

@ -23,7 +23,6 @@ import platform
from tensorflow.python.framework import constant_op from tensorflow.python.framework import constant_op
from tensorflow.python.framework import test_util from tensorflow.python.framework import test_util
from tensorflow.python.ops import array_ops
from tensorflow.python.platform import test from tensorflow.python.platform import test
@ -35,32 +34,30 @@ class DenormalTest(test.TestCase):
tiny = np.finfo(dtype).tiny tiny = np.finfo(dtype).tiny
self.assertEqual(tiny, tiny / 16 * 16) self.assertEqual(tiny, tiny / 16 * 16)
def _flushDenormalsTest(self, use_gpu, dtypes): def _flushDenormalsTest(self, dtypes):
if platform.machine() == "ppc64le" or platform.machine( if (platform.machine() == "ppc64le" or platform.machine() == "s390x" or
) == "s390x" or platform.machine() == "aarch64": platform.machine() == "aarch64"):
# Disabled denormal_test on power/s390x/aarch64 platform # Disabled denormal_test on power/s390x/aarch64 platform
# Check relevant discussion - https://github.com/tensorflow/tensorflow/issues/11902 # Check relevant discussion - https://github.com/tensorflow/tensorflow/issues/11902
return return
with self.cached_session(use_gpu=use_gpu): for dtype in dtypes:
array_ops.identity(7).eval() tiny = np.finfo(dtype).tiny
for dtype in dtypes: # Small shape to test main thread, large shape to test thread pool
tiny = np.finfo(dtype).tiny for shape in (), (1 << 20,):
# Small shape to test main thread, large shape to test thread pool flush = 0.1 * constant_op.constant(tiny, shape=shape)
for shape in (), (1 << 20,): self.assertAllEqual(self.evaluate(flush), np.zeros(shape))
flush = 0.1 * constant_op.constant(tiny, shape=shape) # Make sure the flags don't leak out
self.assertAllEqual(flush.eval(), np.zeros(shape)) self.testPythonHasDenormals()
# Make sure the flags don't leak out
self.testPythonHasDenormals()
@test_util.run_deprecated_v1 @test_util.run_in_graph_and_eager_modes(use_gpu=False)
def testFlushDenormalsCPU(self): def testFlushDenormalsCPU(self):
# On CPUs, the processor flags flush for both single and double precision. # On CPUs, the processor flags flush for both single and double precision.
self._flushDenormalsTest(use_gpu=False, dtypes=(np.float32, np.float64)) self._flushDenormalsTest(dtypes=(np.float32, np.float64))
@test_util.run_deprecated_v1 @test_util.run_in_graph_and_eager_modes(use_gpu=True)
def testFlushDenormalsGPU(self): def testFlushDenormalsGPU(self):
# On GPUs, only single precision can flush to zero. # On GPUs, only single precision can flush to zero.
self._flushDenormalsTest(use_gpu=True, dtypes=(np.float32,)) self._flushDenormalsTest(dtypes=(np.float32,))
if __name__ == "__main__": if __name__ == "__main__":