diff --git a/tensorflow/core/kernels/cwise_op_ceil.cc b/tensorflow/core/kernels/cwise_op_ceil.cc index 4b1847d758c..f8907ff1baa 100644 --- a/tensorflow/core/kernels/cwise_op_ceil.cc +++ b/tensorflow/core/kernels/cwise_op_ceil.cc @@ -16,7 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER3(UnaryOp, CPU, "Ceil", functor::ceil, float, Eigen::half, double); +REGISTER4(UnaryOp, CPU, "Ceil", functor::ceil, float, Eigen::half, bfloat16, + double); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER3(UnaryOp, GPU, "Ceil", functor::ceil, float, Eigen::half, double); diff --git a/tensorflow/core/kernels/cwise_op_clip.cc b/tensorflow/core/kernels/cwise_op_clip.cc index c0c71c5f638..3d43cf147b1 100644 --- a/tensorflow/core/kernels/cwise_op_clip.cc +++ b/tensorflow/core/kernels/cwise_op_clip.cc @@ -156,6 +156,7 @@ struct TernaryClipOp { INSTANTIATE_CPU(Eigen::half); INSTANTIATE_CPU(float); INSTANTIATE_CPU(double); +INSTANTIATE_CPU(bfloat16); INSTANTIATE_CPU(int8); INSTANTIATE_CPU(int16); INSTANTIATE_CPU(int32); @@ -173,6 +174,7 @@ INSTANTIATE_CPU(uint16); REGISTER_CPU_KERNEL(Eigen::half); REGISTER_CPU_KERNEL(float); REGISTER_CPU_KERNEL(double); +REGISTER_CPU_KERNEL(bfloat16); REGISTER_CPU_KERNEL(int8); REGISTER_CPU_KERNEL(int16); REGISTER_CPU_KERNEL(int32); diff --git a/tensorflow/core/kernels/cwise_op_cos.cc b/tensorflow/core/kernels/cwise_op_cos.cc index 7b434ce4294..3d406fe040a 100644 --- a/tensorflow/core/kernels/cwise_op_cos.cc +++ b/tensorflow/core/kernels/cwise_op_cos.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER5(UnaryOp, CPU, "Cos", functor::cos, float, Eigen::half, double, - complex64, complex128); +REGISTER6(UnaryOp, CPU, "Cos", functor::cos, float, Eigen::half, bfloat16, + double, complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER3(UnaryOp, GPU, "Cos", functor::cos, float, Eigen::half, double); diff --git a/tensorflow/core/kernels/cwise_op_cosh.cc b/tensorflow/core/kernels/cwise_op_cosh.cc index 3388df0096a..e6dff0ea317 100644 --- a/tensorflow/core/kernels/cwise_op_cosh.cc +++ b/tensorflow/core/kernels/cwise_op_cosh.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER4(UnaryOp, CPU, "Cosh", functor::cosh, float, double, complex64, - complex128); +REGISTER5(UnaryOp, CPU, "Cosh", functor::cosh, float, double, bfloat16, + complex64, complex128); #if TENSORFLOW_USE_SYCL #define REGISTER_SYCL_KERNEL(TYPE) \ diff --git a/tensorflow/core/kernels/cwise_op_exp.cc b/tensorflow/core/kernels/cwise_op_exp.cc index 2b157f0e7a9..d937dd0c06d 100644 --- a/tensorflow/core/kernels/cwise_op_exp.cc +++ b/tensorflow/core/kernels/cwise_op_exp.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER5(UnaryOp, CPU, "Exp", functor::exp, float, Eigen::half, double, - complex64, complex128); +REGISTER6(UnaryOp, CPU, "Exp", functor::exp, float, Eigen::half, bfloat16, + double, complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER5(UnaryOp, GPU, "Exp", functor::exp, float, Eigen::half, double, diff --git a/tensorflow/core/kernels/cwise_op_expm1.cc b/tensorflow/core/kernels/cwise_op_expm1.cc index 55fdc4763d3..0b145d83e5c 100644 --- a/tensorflow/core/kernels/cwise_op_expm1.cc +++ b/tensorflow/core/kernels/cwise_op_expm1.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER5(UnaryOp, CPU, "Expm1", functor::expm1, float, Eigen::half, double, - complex64, complex128); +REGISTER6(UnaryOp, CPU, "Expm1", functor::expm1, float, Eigen::half, bfloat16, + double, complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER3(UnaryOp, GPU, "Expm1", functor::expm1, float, Eigen::half, double); #endif diff --git a/tensorflow/core/kernels/cwise_op_floor.cc b/tensorflow/core/kernels/cwise_op_floor.cc index 25210a0fa51..1dbd9bf0634 100644 --- a/tensorflow/core/kernels/cwise_op_floor.cc +++ b/tensorflow/core/kernels/cwise_op_floor.cc @@ -16,7 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER3(UnaryOp, CPU, "Floor", functor::floor, float, Eigen::half, double); +REGISTER4(UnaryOp, CPU, "Floor", functor::floor, float, Eigen::half, bfloat16, + double); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER3(UnaryOp, GPU, "Floor", functor::floor, float, Eigen::half, double); diff --git a/tensorflow/core/kernels/cwise_op_floor_div.cc b/tensorflow/core/kernels/cwise_op_floor_div.cc index 11869e43eaa..d1f6d4c0652 100644 --- a/tensorflow/core/kernels/cwise_op_floor_div.cc +++ b/tensorflow/core/kernels/cwise_op_floor_div.cc @@ -18,8 +18,8 @@ limitations under the License. namespace tensorflow { REGISTER6(BinaryOp, CPU, "FloorDiv", functor::safe_floor_div, uint8, uint16, int8, int16, int32, int64); -REGISTER3(BinaryOp, CPU, "FloorDiv", functor::floor_div_real, float, - Eigen::half, double); +REGISTER4(BinaryOp, CPU, "FloorDiv", functor::floor_div_real, float, + Eigen::half, bfloat16, double); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER4(BinaryOp, GPU, "FloorDiv", functor::floor_div, uint8, uint16, int16, diff --git a/tensorflow/core/kernels/cwise_op_floor_mod.cc b/tensorflow/core/kernels/cwise_op_floor_mod.cc index 3305f54bcca..599ed1a9318 100644 --- a/tensorflow/core/kernels/cwise_op_floor_mod.cc +++ b/tensorflow/core/kernels/cwise_op_floor_mod.cc @@ -18,7 +18,8 @@ limitations under the License. namespace tensorflow { REGISTER3(BinaryOp, CPU, "FloorMod", functor::safe_floor_mod, int32, int64, uint64); -REGISTER2(BinaryOp, CPU, "FloorMod", functor::floor_fmod, float, double); +REGISTER3(BinaryOp, CPU, "FloorMod", functor::floor_fmod, bfloat16, float, + double); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM // A special GPU kernel for int32. diff --git a/tensorflow/core/kernels/cwise_op_isfinite.cc b/tensorflow/core/kernels/cwise_op_isfinite.cc index 061dc8367e2..42c7cbd4fd7 100644 --- a/tensorflow/core/kernels/cwise_op_isfinite.cc +++ b/tensorflow/core/kernels/cwise_op_isfinite.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER3(UnaryOp, CPU, "IsFinite", functor::isfinite, float, Eigen::half, - double); +REGISTER4(UnaryOp, CPU, "IsFinite", functor::isfinite, float, Eigen::half, + bfloat16, double); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER3(UnaryOp, GPU, "IsFinite", functor::isfinite, float, Eigen::half, diff --git a/tensorflow/core/kernels/cwise_op_isinf.cc b/tensorflow/core/kernels/cwise_op_isinf.cc index f87a24d2085..68141f4924a 100644 --- a/tensorflow/core/kernels/cwise_op_isinf.cc +++ b/tensorflow/core/kernels/cwise_op_isinf.cc @@ -16,7 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER3(UnaryOp, CPU, "IsInf", functor::isinf, float, Eigen::half, double); +REGISTER4(UnaryOp, CPU, "IsInf", functor::isinf, float, Eigen::half, bfloat16, + double); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER3(UnaryOp, GPU, "IsInf", functor::isinf, float, Eigen::half, double); diff --git a/tensorflow/core/kernels/cwise_op_log1p.cc b/tensorflow/core/kernels/cwise_op_log1p.cc index 06fc764fc75..88ddfd6af26 100644 --- a/tensorflow/core/kernels/cwise_op_log1p.cc +++ b/tensorflow/core/kernels/cwise_op_log1p.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER5(UnaryOp, CPU, "Log1p", functor::log1p, float, Eigen::half, double, - complex64, complex128); +REGISTER6(UnaryOp, CPU, "Log1p", functor::log1p, float, Eigen::half, bfloat16, + double, complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER3(UnaryOp, GPU, "Log1p", functor::log1p, float, Eigen::half, double); diff --git a/tensorflow/core/kernels/cwise_op_pow.cc b/tensorflow/core/kernels/cwise_op_pow.cc index 1b1d626aa57..214d083e11b 100644 --- a/tensorflow/core/kernels/cwise_op_pow.cc +++ b/tensorflow/core/kernels/cwise_op_pow.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER5(BinaryOp, CPU, "Pow", functor::pow, float, Eigen::half, double, - complex64, complex128); +REGISTER6(BinaryOp, CPU, "Pow", functor::pow, float, Eigen::half, bfloat16, + double, complex64, complex128); REGISTER2(BinaryOp, CPU, "Pow", functor::safe_pow, int32, int64); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/tensorflow/core/kernels/cwise_op_reciprocal.cc b/tensorflow/core/kernels/cwise_op_reciprocal.cc index 8e92691474a..4fe201e9c7b 100644 --- a/tensorflow/core/kernels/cwise_op_reciprocal.cc +++ b/tensorflow/core/kernels/cwise_op_reciprocal.cc @@ -30,15 +30,8 @@ REGISTER3(SimpleBinaryOp, GPU, "InvGrad", functor::inverse_grad, float, Eigen::half, double); #endif -#ifdef ENABLE_INTEL_MKL_BFLOAT16 -// Since Eigen backend does not support bfloat16 ops, we are selectively -// enabling them for MKL backend. REGISTER6(UnaryOp, CPU, "Reciprocal", functor::inverse, float, Eigen::half, - double, complex64, complex128, bfloat16); -#else -REGISTER5(UnaryOp, CPU, "Reciprocal", functor::inverse, float, Eigen::half, - double, complex64, complex128); -#endif // ENABLE_INTEL_MKL_BFLOAT16 + bfloat16, double, complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER4(UnaryOp, GPU, "Reciprocal", functor::inverse, float, Eigen::half, double, int64); @@ -47,8 +40,8 @@ REGISTER4(UnaryOp, GPU, "Reciprocal", functor::inverse, float, Eigen::half, REGISTER(UnaryOp, SYCL, "Reciprocal", functor::inverse, float); #endif // TENSORFLOW_USE_SYCL -REGISTER5(SimpleBinaryOp, CPU, "ReciprocalGrad", functor::inverse_grad, float, - Eigen::half, double, complex64, complex128); +REGISTER6(SimpleBinaryOp, CPU, "ReciprocalGrad", functor::inverse_grad, float, + Eigen::half, bfloat16, double, complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER3(SimpleBinaryOp, GPU, "ReciprocalGrad", functor::inverse_grad, float, Eigen::half, double); diff --git a/tensorflow/core/kernels/cwise_op_sign.cc b/tensorflow/core/kernels/cwise_op_sign.cc index 983cee4c944..200a56eb2d2 100644 --- a/tensorflow/core/kernels/cwise_op_sign.cc +++ b/tensorflow/core/kernels/cwise_op_sign.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER7(UnaryOp, CPU, "Sign", functor::sign, float, double, int32, int64, - complex64, Eigen::half, complex128); +REGISTER8(UnaryOp, CPU, "Sign", functor::sign, float, double, int32, int64, + complex64, Eigen::half, bfloat16, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER6(UnaryOp, GPU, "Sign", functor::sign, float, Eigen::half, double, int64, complex64, complex128); diff --git a/tensorflow/core/kernels/cwise_op_sin.cc b/tensorflow/core/kernels/cwise_op_sin.cc index ab6fb1ccd5e..f0fc2af7366 100644 --- a/tensorflow/core/kernels/cwise_op_sin.cc +++ b/tensorflow/core/kernels/cwise_op_sin.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER5(UnaryOp, CPU, "Sin", functor::sin, float, Eigen::half, double, - complex64, complex128); +REGISTER6(UnaryOp, CPU, "Sin", functor::sin, float, Eigen::half, bfloat16, + double, complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER3(UnaryOp, GPU, "Sin", functor::sin, float, Eigen::half, double); diff --git a/tensorflow/core/kernels/cwise_op_sinh.cc b/tensorflow/core/kernels/cwise_op_sinh.cc index 114a6142bdc..4448d2fef76 100644 --- a/tensorflow/core/kernels/cwise_op_sinh.cc +++ b/tensorflow/core/kernels/cwise_op_sinh.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER4(UnaryOp, CPU, "Sinh", functor::sinh, float, double, complex64, - complex128); +REGISTER5(UnaryOp, CPU, "Sinh", functor::sinh, float, double, bfloat16, + complex64, complex128); #if TENSORFLOW_USE_SYCL #define REGISTER_SYCL_KERNEL(TYPE) \ diff --git a/tensorflow/core/kernels/cwise_op_tan.cc b/tensorflow/core/kernels/cwise_op_tan.cc index d9793501a09..115531213ac 100644 --- a/tensorflow/core/kernels/cwise_op_tan.cc +++ b/tensorflow/core/kernels/cwise_op_tan.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER5(UnaryOp, CPU, "Tan", functor::tan, Eigen::half, float, double, - complex64, complex128); +REGISTER6(UnaryOp, CPU, "Tan", functor::tan, Eigen::half, bfloat16, float, + double, complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER3(UnaryOp, GPU, "Tan", functor::tan, Eigen::half, float, double); diff --git a/tensorflow/core/kernels/cwise_op_tanh.cc b/tensorflow/core/kernels/cwise_op_tanh.cc index 1b6da56e537..de56a5e3e03 100644 --- a/tensorflow/core/kernels/cwise_op_tanh.cc +++ b/tensorflow/core/kernels/cwise_op_tanh.cc @@ -17,8 +17,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_gradients.h" namespace tensorflow { -REGISTER5(UnaryOp, CPU, "Tanh", functor::tanh, float, Eigen::half, double, - complex64, complex128); +REGISTER6(UnaryOp, CPU, "Tanh", functor::tanh, float, Eigen::half, bfloat16, + double, complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #ifndef MLIR_GENERATED_GPU_KERNELS_ENABLED @@ -30,8 +30,8 @@ REGISTER3(UnaryOp, GPU, "Tanh", functor::tanh, float, Eigen::half, double); REGISTER2(UnaryOp, SYCL, "Tanh", functor::tanh, float, double); #endif // TENSORFLOW_USE_SYCL -REGISTER5(SimpleBinaryOp, CPU, "TanhGrad", functor::tanh_grad, float, - Eigen::half, double, complex64, complex128); +REGISTER6(SimpleBinaryOp, CPU, "TanhGrad", functor::tanh_grad, float, + Eigen::half, bfloat16, double, complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER3(SimpleBinaryOp, GPU, "TanhGrad", functor::tanh_grad, float, Eigen::half, double); diff --git a/tensorflow/python/kernel_tests/clip_ops_test.py b/tensorflow/python/kernel_tests/clip_ops_test.py index 8d6b475c914..d0c805f96e3 100644 --- a/tensorflow/python/kernel_tests/clip_ops_test.py +++ b/tensorflow/python/kernel_tests/clip_ops_test.py @@ -67,6 +67,7 @@ class ClipTest(test.TestCase): dtypes.float16, dtypes.float32, dtypes.float64, + dtypes.bfloat16, dtypes.int16, dtypes.int32, dtypes.int64, @@ -88,6 +89,7 @@ class ClipTest(test.TestCase): dtypes.float16, dtypes.float32, dtypes.float64, + dtypes.bfloat16, dtypes.int16, dtypes.int32, dtypes.int64, @@ -110,6 +112,7 @@ class ClipTest(test.TestCase): dtypes.float16, dtypes.float32, dtypes.float64, + dtypes.bfloat16, dtypes.int16, dtypes.int32, dtypes.int64, @@ -132,6 +135,7 @@ class ClipTest(test.TestCase): dtypes.float16, dtypes.float32, dtypes.float64, + dtypes.bfloat16, dtypes.int16, dtypes.int32, dtypes.int64, diff --git a/tensorflow/python/kernel_tests/cwise_ops_test.py b/tensorflow/python/kernel_tests/cwise_ops_test.py index 78d3af17990..8d628d448db 100644 --- a/tensorflow/python/kernel_tests/cwise_ops_test.py +++ b/tensorflow/python/kernel_tests/cwise_ops_test.py @@ -841,6 +841,9 @@ class MathOpsOverloadTest(test.TestCase): def _compareBinary(self, x, y, dtype, np_func, tf_func): np_ans = np_func(x, y).astype(dtype.as_numpy_dtype) + if dtype == dtypes_lib.bfloat16: + # assertAllClose does not properly handle bfloat16 values + np_ans = np_ans.astype(np.float32) self.assertAllClose(np_ans, self._computeTensorAndLiteral(x, y, dtype, tf_func)) self.assertAllClose(np_ans, @@ -857,6 +860,7 @@ class MathOpsOverloadTest(test.TestCase): dtypes_lib.float16, dtypes_lib.float32, dtypes_lib.float64, + dtypes_lib.bfloat16, dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.complex64, @@ -920,12 +924,16 @@ class MathOpsOverloadTest(test.TestCase): class IsFiniteInfNanTest(test.TestCase): def _compare(self, x, use_gpu): - np_finite, np_inf, np_nan = np.isfinite(x), np.isinf(x), np.isnan(x) with test_util.device(use_gpu=use_gpu): inx = ops.convert_to_tensor(x) ofinite, oinf, onan = math_ops.is_finite(inx), math_ops.is_inf( inx), math_ops.is_nan(inx) tf_finite, tf_inf, tf_nan = self.evaluate([ofinite, oinf, onan]) + if x.dtype == dtypes_lib.bfloat16.as_numpy_dtype: + # Numpy will implicitly convert bfloat16 value to float16, so we cast to + # float32 to avoid this. + x = x.astype(np.float32) + np_finite, np_inf, np_nan = np.isfinite(x), np.isinf(x), np.isnan(x) self.assertAllEqual(np_inf, tf_inf) self.assertAllEqual(np_nan, tf_nan) self.assertAllEqual(np_finite, tf_finite) @@ -934,11 +942,18 @@ class IsFiniteInfNanTest(test.TestCase): self.assertShapeEqual(np_finite, ofinite) def _testDtype(self, dtype): - fi = np.finfo(dtype) - data = np.array([ - 0, -1, 1, fi.resolution, -fi.resolution, fi.min, fi.max, -np.inf, - np.inf, np.nan - ]).astype(dtype) + if dtype != dtypes_lib.bfloat16.as_numpy_dtype: + fi = np.finfo(dtype) + data = np.array([ + 0, -1, 1, fi.resolution, -fi.resolution, fi.min, fi.max, -np.inf, + np.inf, np.nan + ]).astype(dtype) + else: + # np.finfo does not support bfloat16 + data = np.array([ + 0, -1, 1, 0.01, -0.01, -3.3895e+38, 3.3895e+38, -np.inf, np.inf, + np.nan + ]).astype(dtype) self._compare(data, use_gpu=False) self._compare(data, use_gpu=True) @@ -951,6 +966,9 @@ class IsFiniteInfNanTest(test.TestCase): def testDouble(self): self._testDtype(np.float64) + def testBfloat16(self): + self._testDtype(dtypes_lib.bfloat16.as_numpy_dtype) + def testSqrt(self): for dtype in [np.float16, np.float32, np.float64]: fi = np.finfo(dtype) @@ -998,8 +1016,8 @@ class RoundingTest(test.TestCase): def _testDtype(self, dtype): data = (np.arange(-3, 3) / 4.).reshape(1, 3, 2).astype(dtype) self._compare(data) - # TODO: rint op is not supported for float16 - if dtype is np.float16: + # TODO(reedwm): rint op is not supported for float16 and bfloat16 + if dtype in (np.float16, dtypes_lib.bfloat16.as_numpy_dtype): return self._compare_values(data) x = [0.5, 0.5000001] @@ -1012,10 +1030,12 @@ class RoundingTest(test.TestCase): self._compare_values(x, y=y) def testTypes(self): - self.skipTest("b/131162241") - for dtype in [np.float16, np.float32, np.float64]: - with self.subTest(dtype=dtype): - self._testDtype(dtype) + # TODO(b/131162241): Enable test for GPU + with ops.device("/CPU:0"): + for dtype in [np.float16, np.float32, np.float64, + dtypes_lib.bfloat16.as_numpy_dtype]: + with self.subTest(dtype=dtype): + self._testDtype(dtype) class ComplexMakeRealImagTest(test.TestCase): diff --git a/tensorflow/python/kernel_tests/cwise_ops_unary_test.py b/tensorflow/python/kernel_tests/cwise_ops_unary_test.py index df848a653d4..9d46ed35639 100644 --- a/tensorflow/python/kernel_tests/cwise_ops_unary_test.py +++ b/tensorflow/python/kernel_tests/cwise_ops_unary_test.py @@ -61,6 +61,8 @@ def _default_tolerance(dtype): Args: dtype: A datatype. """ + if dtype == dtypes_lib.bfloat16.as_numpy_dtype: + return 5e-3 if dtype == np.float16: return 5e-3 elif dtype in (np.float32, np.complex64): @@ -81,12 +83,7 @@ class UnaryOpTest(test.TestCase): np_ans = np_func(x) with self.cached_session(use_gpu=False): inx = ops.convert_to_tensor(x) - if x.dtype in (np.float32, np.float64, - dtypes_lib.bfloat16.as_numpy_dtype): - y = 1.1 * tf_func(inx) - np_ans *= 1.1 - else: - y = tf_func(inx) + y = tf_func(inx) tf_cpu = self.evaluate(y) self.assertShapeEqual(np_ans, y) if x.dtype == np.float16: @@ -99,7 +96,7 @@ class UnaryOpTest(test.TestCase): if x.dtype in (np.complex64, np.complex128) and tf_func == math_ops.sign: return # Return early - if x.dtype == np.float16: + if x.dtype in (np.float16, dtypes_lib.bfloat16.as_numpy_dtype): s = list(np.shape(x)) jacob_t, _ = gradient_checker.compute_gradient( inx, s, y, s, x_init_value=x) @@ -108,7 +105,7 @@ class UnaryOpTest(test.TestCase): yf = tf_func(inxf) _, jacob_n = gradient_checker.compute_gradient( inxf, s, yf, s, x_init_value=xf, delta=1e-2) - jacob_n = jacob_n.astype(np.float16) + jacob_n = jacob_n.astype(x.dtype) self.assertAllClose(jacob_t, jacob_n, rtol=grad_rtol, atol=grad_atol) elif x.dtype in (np.float32, np.complex64): s = list(np.shape(x)) @@ -384,13 +381,36 @@ class UnaryOpTest(test.TestCase): self._compareBothSparse(y, np.sign, math_ops.sign) self._compareBothSparse(x, np.vectorize(math.erf), math_ops.erf, tol=1e-3) + @test_util.run_deprecated_v1 def testBFloat16Basic(self): + def compute_f32(np_func): + """Decorator to compute Numpy function with float32 math.""" + def f(x): + y = np_func(x.astype(np.float32)) + return y.astype(x.dtype) + return f + + bfloat16 = dtypes_lib.bfloat16.as_numpy_dtype x = np.arange(-6, 6, 2).reshape(1, 3, 2).astype(dtypes_lib.bfloat16.as_numpy_dtype) + y = (x + .5).astype(bfloat16) # no zero + z = (x + 15.5).astype(bfloat16) # all positive self._compareCpu(x, np.abs, math_ops.abs) self._compareCpu(x, np.abs, _ABS) self._compareBoth(x, np.negative, math_ops.negative) self._compareBoth(x, np.negative, _NEG) + self._compareCpu(y, compute_f32(self._inv), math_ops.reciprocal) + self._compareCpu(x, np.exp, math_ops.exp) + self._compareCpu(x, np.expm1, math_ops.expm1) + self._compareCpu(z, compute_f32(np.log), math_ops.log) + self._compareCpu(z, compute_f32(np.log1p), math_ops.log1p) + self._compareCpu(y, np.sign, math_ops.sign) + self._compareBoth(x, compute_f32(np.sin), math_ops.sin) + self._compareBoth(x, compute_f32(np.cos), math_ops.cos) + self._compareBoth(x, compute_f32(np.tan), math_ops.tan) + self._compareBoth(x, compute_f32(np.sinh), math_ops.sinh) + self._compareBoth(x, compute_f32(np.cosh), math_ops.cosh) + self._compareBoth(x, compute_f32(np.tanh), math_ops.tanh) def testInt8Basic(self): x = np.arange(-6, 6, 2).reshape(1, 3, 2).astype(np.int8) diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py index 296395d034f..dabf4bb9d33 100644 --- a/tensorflow/python/ops/math_ops_test.py +++ b/tensorflow/python/ops/math_ops_test.py @@ -476,6 +476,13 @@ class DivAndModTest(test_util.TensorFlowTestCase): # % array_ops.constant(divs)) # self.assertAllEqual(tf2_result, tf_result) + def testFloorModBfloat64(self): + nums, divs = self.floatTestData() + tf_result = math_ops.floormod(math_ops.cast(nums, dtypes.bfloat16), + math_ops.cast(divs, dtypes.bfloat16)) + np_result = nums % divs + self.assertAllEqual(tf_result, np_result) + def testTruncateModInt(self): nums, divs = self.intTestData() tf_result = math_ops.truncatemod(nums, divs)