Flush denormals to zero in eager mode.
PiperOrigin-RevId: 311364546 Change-Id: I42efa6b19b8193c49bc581879b04ce3d05a13607
This commit is contained in:
parent
88c4ee0102
commit
b97bf5ae0b
|
@ -35,8 +35,10 @@ limitations under the License.
|
|||
#include "tensorflow/core/lib/gtl/cleanup.h"
|
||||
#include "tensorflow/core/lib/gtl/map_util.h"
|
||||
#include "tensorflow/core/lib/random/random.h"
|
||||
#include "tensorflow/core/platform/denormal.h"
|
||||
#include "tensorflow/core/platform/errors.h"
|
||||
#include "tensorflow/core/platform/fingerprint.h"
|
||||
#include "tensorflow/core/platform/setround.h"
|
||||
#include "tensorflow/core/profiler/lib/annotated_traceme.h"
|
||||
#include "tensorflow/core/profiler/lib/traceme.h"
|
||||
#include "tensorflow/core/public/version.h"
|
||||
|
@ -281,6 +283,8 @@ Status KernelAndDeviceOp::Run(
|
|||
OpKernelContext context(¶ms);
|
||||
|
||||
{
|
||||
port::ScopedFlushDenormal flush;
|
||||
port::ScopedSetRound round(FE_TONEAREST);
|
||||
// 'AnnotatedTraceMe' will trace both scheduling time on host and execution
|
||||
// time on device of the OpKernel.
|
||||
profiler::AnnotatedTraceMe activity(
|
||||
|
|
|
@ -23,7 +23,6 @@ import platform
|
|||
|
||||
from tensorflow.python.framework import constant_op
|
||||
from tensorflow.python.framework import test_util
|
||||
from tensorflow.python.ops import array_ops
|
||||
from tensorflow.python.platform import test
|
||||
|
||||
|
||||
|
@ -35,32 +34,30 @@ class DenormalTest(test.TestCase):
|
|||
tiny = np.finfo(dtype).tiny
|
||||
self.assertEqual(tiny, tiny / 16 * 16)
|
||||
|
||||
def _flushDenormalsTest(self, use_gpu, dtypes):
|
||||
if platform.machine() == "ppc64le" or platform.machine(
|
||||
) == "s390x" or platform.machine() == "aarch64":
|
||||
def _flushDenormalsTest(self, dtypes):
|
||||
if (platform.machine() == "ppc64le" or platform.machine() == "s390x" or
|
||||
platform.machine() == "aarch64"):
|
||||
# Disabled denormal_test on power/s390x/aarch64 platform
|
||||
# Check relevant discussion - https://github.com/tensorflow/tensorflow/issues/11902
|
||||
return
|
||||
with self.cached_session(use_gpu=use_gpu):
|
||||
array_ops.identity(7).eval()
|
||||
for dtype in dtypes:
|
||||
tiny = np.finfo(dtype).tiny
|
||||
# Small shape to test main thread, large shape to test thread pool
|
||||
for shape in (), (1 << 20,):
|
||||
flush = 0.1 * constant_op.constant(tiny, shape=shape)
|
||||
self.assertAllEqual(flush.eval(), np.zeros(shape))
|
||||
# Make sure the flags don't leak out
|
||||
self.testPythonHasDenormals()
|
||||
for dtype in dtypes:
|
||||
tiny = np.finfo(dtype).tiny
|
||||
# Small shape to test main thread, large shape to test thread pool
|
||||
for shape in (), (1 << 20,):
|
||||
flush = 0.1 * constant_op.constant(tiny, shape=shape)
|
||||
self.assertAllEqual(self.evaluate(flush), np.zeros(shape))
|
||||
# Make sure the flags don't leak out
|
||||
self.testPythonHasDenormals()
|
||||
|
||||
@test_util.run_deprecated_v1
|
||||
@test_util.run_in_graph_and_eager_modes(use_gpu=False)
|
||||
def testFlushDenormalsCPU(self):
|
||||
# On CPUs, the processor flags flush for both single and double precision.
|
||||
self._flushDenormalsTest(use_gpu=False, dtypes=(np.float32, np.float64))
|
||||
self._flushDenormalsTest(dtypes=(np.float32, np.float64))
|
||||
|
||||
@test_util.run_deprecated_v1
|
||||
@test_util.run_in_graph_and_eager_modes(use_gpu=True)
|
||||
def testFlushDenormalsGPU(self):
|
||||
# On GPUs, only single precision can flush to zero.
|
||||
self._flushDenormalsTest(use_gpu=True, dtypes=(np.float32,))
|
||||
self._flushDenormalsTest(dtypes=(np.float32,))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
Loading…
Reference in New Issue