Flush denormals to zero in eager mode.
PiperOrigin-RevId: 311364546 Change-Id: I42efa6b19b8193c49bc581879b04ce3d05a13607
This commit is contained in:
parent
88c4ee0102
commit
b97bf5ae0b
|
@ -35,8 +35,10 @@ limitations under the License.
|
||||||
#include "tensorflow/core/lib/gtl/cleanup.h"
|
#include "tensorflow/core/lib/gtl/cleanup.h"
|
||||||
#include "tensorflow/core/lib/gtl/map_util.h"
|
#include "tensorflow/core/lib/gtl/map_util.h"
|
||||||
#include "tensorflow/core/lib/random/random.h"
|
#include "tensorflow/core/lib/random/random.h"
|
||||||
|
#include "tensorflow/core/platform/denormal.h"
|
||||||
#include "tensorflow/core/platform/errors.h"
|
#include "tensorflow/core/platform/errors.h"
|
||||||
#include "tensorflow/core/platform/fingerprint.h"
|
#include "tensorflow/core/platform/fingerprint.h"
|
||||||
|
#include "tensorflow/core/platform/setround.h"
|
||||||
#include "tensorflow/core/profiler/lib/annotated_traceme.h"
|
#include "tensorflow/core/profiler/lib/annotated_traceme.h"
|
||||||
#include "tensorflow/core/profiler/lib/traceme.h"
|
#include "tensorflow/core/profiler/lib/traceme.h"
|
||||||
#include "tensorflow/core/public/version.h"
|
#include "tensorflow/core/public/version.h"
|
||||||
|
@ -281,6 +283,8 @@ Status KernelAndDeviceOp::Run(
|
||||||
OpKernelContext context(¶ms);
|
OpKernelContext context(¶ms);
|
||||||
|
|
||||||
{
|
{
|
||||||
|
port::ScopedFlushDenormal flush;
|
||||||
|
port::ScopedSetRound round(FE_TONEAREST);
|
||||||
// 'AnnotatedTraceMe' will trace both scheduling time on host and execution
|
// 'AnnotatedTraceMe' will trace both scheduling time on host and execution
|
||||||
// time on device of the OpKernel.
|
// time on device of the OpKernel.
|
||||||
profiler::AnnotatedTraceMe activity(
|
profiler::AnnotatedTraceMe activity(
|
||||||
|
|
|
@ -23,7 +23,6 @@ import platform
|
||||||
|
|
||||||
from tensorflow.python.framework import constant_op
|
from tensorflow.python.framework import constant_op
|
||||||
from tensorflow.python.framework import test_util
|
from tensorflow.python.framework import test_util
|
||||||
from tensorflow.python.ops import array_ops
|
|
||||||
from tensorflow.python.platform import test
|
from tensorflow.python.platform import test
|
||||||
|
|
||||||
|
|
||||||
|
@ -35,32 +34,30 @@ class DenormalTest(test.TestCase):
|
||||||
tiny = np.finfo(dtype).tiny
|
tiny = np.finfo(dtype).tiny
|
||||||
self.assertEqual(tiny, tiny / 16 * 16)
|
self.assertEqual(tiny, tiny / 16 * 16)
|
||||||
|
|
||||||
def _flushDenormalsTest(self, use_gpu, dtypes):
|
def _flushDenormalsTest(self, dtypes):
|
||||||
if platform.machine() == "ppc64le" or platform.machine(
|
if (platform.machine() == "ppc64le" or platform.machine() == "s390x" or
|
||||||
) == "s390x" or platform.machine() == "aarch64":
|
platform.machine() == "aarch64"):
|
||||||
# Disabled denormal_test on power/s390x/aarch64 platform
|
# Disabled denormal_test on power/s390x/aarch64 platform
|
||||||
# Check relevant discussion - https://github.com/tensorflow/tensorflow/issues/11902
|
# Check relevant discussion - https://github.com/tensorflow/tensorflow/issues/11902
|
||||||
return
|
return
|
||||||
with self.cached_session(use_gpu=use_gpu):
|
for dtype in dtypes:
|
||||||
array_ops.identity(7).eval()
|
tiny = np.finfo(dtype).tiny
|
||||||
for dtype in dtypes:
|
# Small shape to test main thread, large shape to test thread pool
|
||||||
tiny = np.finfo(dtype).tiny
|
for shape in (), (1 << 20,):
|
||||||
# Small shape to test main thread, large shape to test thread pool
|
flush = 0.1 * constant_op.constant(tiny, shape=shape)
|
||||||
for shape in (), (1 << 20,):
|
self.assertAllEqual(self.evaluate(flush), np.zeros(shape))
|
||||||
flush = 0.1 * constant_op.constant(tiny, shape=shape)
|
# Make sure the flags don't leak out
|
||||||
self.assertAllEqual(flush.eval(), np.zeros(shape))
|
self.testPythonHasDenormals()
|
||||||
# Make sure the flags don't leak out
|
|
||||||
self.testPythonHasDenormals()
|
|
||||||
|
|
||||||
@test_util.run_deprecated_v1
|
@test_util.run_in_graph_and_eager_modes(use_gpu=False)
|
||||||
def testFlushDenormalsCPU(self):
|
def testFlushDenormalsCPU(self):
|
||||||
# On CPUs, the processor flags flush for both single and double precision.
|
# On CPUs, the processor flags flush for both single and double precision.
|
||||||
self._flushDenormalsTest(use_gpu=False, dtypes=(np.float32, np.float64))
|
self._flushDenormalsTest(dtypes=(np.float32, np.float64))
|
||||||
|
|
||||||
@test_util.run_deprecated_v1
|
@test_util.run_in_graph_and_eager_modes(use_gpu=True)
|
||||||
def testFlushDenormalsGPU(self):
|
def testFlushDenormalsGPU(self):
|
||||||
# On GPUs, only single precision can flush to zero.
|
# On GPUs, only single precision can flush to zero.
|
||||||
self._flushDenormalsTest(use_gpu=True, dtypes=(np.float32,))
|
self._flushDenormalsTest(dtypes=(np.float32,))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
Loading…
Reference in New Issue