Add bfloat16 support to more cwise CPU ops
PiperOrigin-RevId: 325248465 Change-Id: I68b3be2af4f9acedb76ab6077bf5dac9ac6eeb72
This commit is contained in:
parent
3f24d131d7
commit
696a4a76ce
@ -16,7 +16,8 @@ limitations under the License.
|
||||
#include "tensorflow/core/kernels/cwise_ops_common.h"
|
||||
|
||||
namespace tensorflow {
|
||||
REGISTER3(UnaryOp, CPU, "Ceil", functor::ceil, float, Eigen::half, double);
|
||||
REGISTER4(UnaryOp, CPU, "Ceil", functor::ceil, float, Eigen::half, bfloat16,
|
||||
double);
|
||||
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
REGISTER3(UnaryOp, GPU, "Ceil", functor::ceil, float, Eigen::half, double);
|
||||
|
@ -156,6 +156,7 @@ struct TernaryClipOp<CPUDevice, T> {
|
||||
INSTANTIATE_CPU(Eigen::half);
|
||||
INSTANTIATE_CPU(float);
|
||||
INSTANTIATE_CPU(double);
|
||||
INSTANTIATE_CPU(bfloat16);
|
||||
INSTANTIATE_CPU(int8);
|
||||
INSTANTIATE_CPU(int16);
|
||||
INSTANTIATE_CPU(int32);
|
||||
@ -173,6 +174,7 @@ INSTANTIATE_CPU(uint16);
|
||||
REGISTER_CPU_KERNEL(Eigen::half);
|
||||
REGISTER_CPU_KERNEL(float);
|
||||
REGISTER_CPU_KERNEL(double);
|
||||
REGISTER_CPU_KERNEL(bfloat16);
|
||||
REGISTER_CPU_KERNEL(int8);
|
||||
REGISTER_CPU_KERNEL(int16);
|
||||
REGISTER_CPU_KERNEL(int32);
|
||||
|
@ -16,8 +16,8 @@ limitations under the License.
|
||||
#include "tensorflow/core/kernels/cwise_ops_common.h"
|
||||
|
||||
namespace tensorflow {
|
||||
REGISTER5(UnaryOp, CPU, "Cos", functor::cos, float, Eigen::half, double,
|
||||
complex64, complex128);
|
||||
REGISTER6(UnaryOp, CPU, "Cos", functor::cos, float, Eigen::half, bfloat16,
|
||||
double, complex64, complex128);
|
||||
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
REGISTER3(UnaryOp, GPU, "Cos", functor::cos, float, Eigen::half, double);
|
||||
|
@ -16,8 +16,8 @@ limitations under the License.
|
||||
#include "tensorflow/core/kernels/cwise_ops_common.h"
|
||||
|
||||
namespace tensorflow {
|
||||
REGISTER4(UnaryOp, CPU, "Cosh", functor::cosh, float, double, complex64,
|
||||
complex128);
|
||||
REGISTER5(UnaryOp, CPU, "Cosh", functor::cosh, float, double, bfloat16,
|
||||
complex64, complex128);
|
||||
|
||||
#if TENSORFLOW_USE_SYCL
|
||||
#define REGISTER_SYCL_KERNEL(TYPE) \
|
||||
|
@ -16,8 +16,8 @@ limitations under the License.
|
||||
#include "tensorflow/core/kernels/cwise_ops_common.h"
|
||||
|
||||
namespace tensorflow {
|
||||
REGISTER5(UnaryOp, CPU, "Exp", functor::exp, float, Eigen::half, double,
|
||||
complex64, complex128);
|
||||
REGISTER6(UnaryOp, CPU, "Exp", functor::exp, float, Eigen::half, bfloat16,
|
||||
double, complex64, complex128);
|
||||
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
REGISTER5(UnaryOp, GPU, "Exp", functor::exp, float, Eigen::half, double,
|
||||
|
@ -16,8 +16,8 @@ limitations under the License.
|
||||
#include "tensorflow/core/kernels/cwise_ops_common.h"
|
||||
|
||||
namespace tensorflow {
|
||||
REGISTER5(UnaryOp, CPU, "Expm1", functor::expm1, float, Eigen::half, double,
|
||||
complex64, complex128);
|
||||
REGISTER6(UnaryOp, CPU, "Expm1", functor::expm1, float, Eigen::half, bfloat16,
|
||||
double, complex64, complex128);
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
REGISTER3(UnaryOp, GPU, "Expm1", functor::expm1, float, Eigen::half, double);
|
||||
#endif
|
||||
|
@ -16,7 +16,8 @@ limitations under the License.
|
||||
#include "tensorflow/core/kernels/cwise_ops_common.h"
|
||||
|
||||
namespace tensorflow {
|
||||
REGISTER3(UnaryOp, CPU, "Floor", functor::floor, float, Eigen::half, double);
|
||||
REGISTER4(UnaryOp, CPU, "Floor", functor::floor, float, Eigen::half, bfloat16,
|
||||
double);
|
||||
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
REGISTER3(UnaryOp, GPU, "Floor", functor::floor, float, Eigen::half, double);
|
||||
|
@ -18,8 +18,8 @@ limitations under the License.
|
||||
namespace tensorflow {
|
||||
REGISTER6(BinaryOp, CPU, "FloorDiv", functor::safe_floor_div, uint8, uint16,
|
||||
int8, int16, int32, int64);
|
||||
REGISTER3(BinaryOp, CPU, "FloorDiv", functor::floor_div_real, float,
|
||||
Eigen::half, double);
|
||||
REGISTER4(BinaryOp, CPU, "FloorDiv", functor::floor_div_real, float,
|
||||
Eigen::half, bfloat16, double);
|
||||
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
REGISTER4(BinaryOp, GPU, "FloorDiv", functor::floor_div, uint8, uint16, int16,
|
||||
|
@ -18,7 +18,8 @@ limitations under the License.
|
||||
namespace tensorflow {
|
||||
REGISTER3(BinaryOp, CPU, "FloorMod", functor::safe_floor_mod, int32, int64,
|
||||
uint64);
|
||||
REGISTER2(BinaryOp, CPU, "FloorMod", functor::floor_fmod, float, double);
|
||||
REGISTER3(BinaryOp, CPU, "FloorMod", functor::floor_fmod, bfloat16, float,
|
||||
double);
|
||||
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
// A special GPU kernel for int32.
|
||||
|
@ -16,8 +16,8 @@ limitations under the License.
|
||||
#include "tensorflow/core/kernels/cwise_ops_common.h"
|
||||
|
||||
namespace tensorflow {
|
||||
REGISTER3(UnaryOp, CPU, "IsFinite", functor::isfinite, float, Eigen::half,
|
||||
double);
|
||||
REGISTER4(UnaryOp, CPU, "IsFinite", functor::isfinite, float, Eigen::half,
|
||||
bfloat16, double);
|
||||
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
REGISTER3(UnaryOp, GPU, "IsFinite", functor::isfinite, float, Eigen::half,
|
||||
|
@ -16,7 +16,8 @@ limitations under the License.
|
||||
#include "tensorflow/core/kernels/cwise_ops_common.h"
|
||||
|
||||
namespace tensorflow {
|
||||
REGISTER3(UnaryOp, CPU, "IsInf", functor::isinf, float, Eigen::half, double);
|
||||
REGISTER4(UnaryOp, CPU, "IsInf", functor::isinf, float, Eigen::half, bfloat16,
|
||||
double);
|
||||
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
REGISTER3(UnaryOp, GPU, "IsInf", functor::isinf, float, Eigen::half, double);
|
||||
|
@ -16,8 +16,8 @@ limitations under the License.
|
||||
#include "tensorflow/core/kernels/cwise_ops_common.h"
|
||||
|
||||
namespace tensorflow {
|
||||
REGISTER5(UnaryOp, CPU, "Log1p", functor::log1p, float, Eigen::half, double,
|
||||
complex64, complex128);
|
||||
REGISTER6(UnaryOp, CPU, "Log1p", functor::log1p, float, Eigen::half, bfloat16,
|
||||
double, complex64, complex128);
|
||||
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
REGISTER3(UnaryOp, GPU, "Log1p", functor::log1p, float, Eigen::half, double);
|
||||
|
@ -16,8 +16,8 @@ limitations under the License.
|
||||
#include "tensorflow/core/kernels/cwise_ops_common.h"
|
||||
|
||||
namespace tensorflow {
|
||||
REGISTER5(BinaryOp, CPU, "Pow", functor::pow, float, Eigen::half, double,
|
||||
complex64, complex128);
|
||||
REGISTER6(BinaryOp, CPU, "Pow", functor::pow, float, Eigen::half, bfloat16,
|
||||
double, complex64, complex128);
|
||||
REGISTER2(BinaryOp, CPU, "Pow", functor::safe_pow, int32, int64);
|
||||
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
|
@ -30,15 +30,8 @@ REGISTER3(SimpleBinaryOp, GPU, "InvGrad", functor::inverse_grad, float,
|
||||
Eigen::half, double);
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_INTEL_MKL_BFLOAT16
|
||||
// Since Eigen backend does not support bfloat16 ops, we are selectively
|
||||
// enabling them for MKL backend.
|
||||
REGISTER6(UnaryOp, CPU, "Reciprocal", functor::inverse, float, Eigen::half,
|
||||
double, complex64, complex128, bfloat16);
|
||||
#else
|
||||
REGISTER5(UnaryOp, CPU, "Reciprocal", functor::inverse, float, Eigen::half,
|
||||
double, complex64, complex128);
|
||||
#endif // ENABLE_INTEL_MKL_BFLOAT16
|
||||
bfloat16, double, complex64, complex128);
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
REGISTER4(UnaryOp, GPU, "Reciprocal", functor::inverse, float, Eigen::half,
|
||||
double, int64);
|
||||
@ -47,8 +40,8 @@ REGISTER4(UnaryOp, GPU, "Reciprocal", functor::inverse, float, Eigen::half,
|
||||
REGISTER(UnaryOp, SYCL, "Reciprocal", functor::inverse, float);
|
||||
#endif // TENSORFLOW_USE_SYCL
|
||||
|
||||
REGISTER5(SimpleBinaryOp, CPU, "ReciprocalGrad", functor::inverse_grad, float,
|
||||
Eigen::half, double, complex64, complex128);
|
||||
REGISTER6(SimpleBinaryOp, CPU, "ReciprocalGrad", functor::inverse_grad, float,
|
||||
Eigen::half, bfloat16, double, complex64, complex128);
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
REGISTER3(SimpleBinaryOp, GPU, "ReciprocalGrad", functor::inverse_grad, float,
|
||||
Eigen::half, double);
|
||||
|
@ -16,8 +16,8 @@ limitations under the License.
|
||||
#include "tensorflow/core/kernels/cwise_ops_common.h"
|
||||
|
||||
namespace tensorflow {
|
||||
REGISTER7(UnaryOp, CPU, "Sign", functor::sign, float, double, int32, int64,
|
||||
complex64, Eigen::half, complex128);
|
||||
REGISTER8(UnaryOp, CPU, "Sign", functor::sign, float, double, int32, int64,
|
||||
complex64, Eigen::half, bfloat16, complex128);
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
REGISTER6(UnaryOp, GPU, "Sign", functor::sign, float, Eigen::half, double,
|
||||
int64, complex64, complex128);
|
||||
|
@ -16,8 +16,8 @@ limitations under the License.
|
||||
#include "tensorflow/core/kernels/cwise_ops_common.h"
|
||||
|
||||
namespace tensorflow {
|
||||
REGISTER5(UnaryOp, CPU, "Sin", functor::sin, float, Eigen::half, double,
|
||||
complex64, complex128);
|
||||
REGISTER6(UnaryOp, CPU, "Sin", functor::sin, float, Eigen::half, bfloat16,
|
||||
double, complex64, complex128);
|
||||
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
REGISTER3(UnaryOp, GPU, "Sin", functor::sin, float, Eigen::half, double);
|
||||
|
@ -16,8 +16,8 @@ limitations under the License.
|
||||
#include "tensorflow/core/kernels/cwise_ops_common.h"
|
||||
|
||||
namespace tensorflow {
|
||||
REGISTER4(UnaryOp, CPU, "Sinh", functor::sinh, float, double, complex64,
|
||||
complex128);
|
||||
REGISTER5(UnaryOp, CPU, "Sinh", functor::sinh, float, double, bfloat16,
|
||||
complex64, complex128);
|
||||
|
||||
#if TENSORFLOW_USE_SYCL
|
||||
#define REGISTER_SYCL_KERNEL(TYPE) \
|
||||
|
@ -16,8 +16,8 @@ limitations under the License.
|
||||
#include "tensorflow/core/kernels/cwise_ops_common.h"
|
||||
|
||||
namespace tensorflow {
|
||||
REGISTER5(UnaryOp, CPU, "Tan", functor::tan, Eigen::half, float, double,
|
||||
complex64, complex128);
|
||||
REGISTER6(UnaryOp, CPU, "Tan", functor::tan, Eigen::half, bfloat16, float,
|
||||
double, complex64, complex128);
|
||||
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
REGISTER3(UnaryOp, GPU, "Tan", functor::tan, Eigen::half, float, double);
|
||||
|
@ -17,8 +17,8 @@ limitations under the License.
|
||||
#include "tensorflow/core/kernels/cwise_ops_gradients.h"
|
||||
|
||||
namespace tensorflow {
|
||||
REGISTER5(UnaryOp, CPU, "Tanh", functor::tanh, float, Eigen::half, double,
|
||||
complex64, complex128);
|
||||
REGISTER6(UnaryOp, CPU, "Tanh", functor::tanh, float, Eigen::half, bfloat16,
|
||||
double, complex64, complex128);
|
||||
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
#ifndef MLIR_GENERATED_GPU_KERNELS_ENABLED
|
||||
@ -30,8 +30,8 @@ REGISTER3(UnaryOp, GPU, "Tanh", functor::tanh, float, Eigen::half, double);
|
||||
REGISTER2(UnaryOp, SYCL, "Tanh", functor::tanh, float, double);
|
||||
#endif // TENSORFLOW_USE_SYCL
|
||||
|
||||
REGISTER5(SimpleBinaryOp, CPU, "TanhGrad", functor::tanh_grad, float,
|
||||
Eigen::half, double, complex64, complex128);
|
||||
REGISTER6(SimpleBinaryOp, CPU, "TanhGrad", functor::tanh_grad, float,
|
||||
Eigen::half, bfloat16, double, complex64, complex128);
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
REGISTER3(SimpleBinaryOp, GPU, "TanhGrad", functor::tanh_grad, float,
|
||||
Eigen::half, double);
|
||||
|
@ -67,6 +67,7 @@ class ClipTest(test.TestCase):
|
||||
dtypes.float16,
|
||||
dtypes.float32,
|
||||
dtypes.float64,
|
||||
dtypes.bfloat16,
|
||||
dtypes.int16,
|
||||
dtypes.int32,
|
||||
dtypes.int64,
|
||||
@ -88,6 +89,7 @@ class ClipTest(test.TestCase):
|
||||
dtypes.float16,
|
||||
dtypes.float32,
|
||||
dtypes.float64,
|
||||
dtypes.bfloat16,
|
||||
dtypes.int16,
|
||||
dtypes.int32,
|
||||
dtypes.int64,
|
||||
@ -110,6 +112,7 @@ class ClipTest(test.TestCase):
|
||||
dtypes.float16,
|
||||
dtypes.float32,
|
||||
dtypes.float64,
|
||||
dtypes.bfloat16,
|
||||
dtypes.int16,
|
||||
dtypes.int32,
|
||||
dtypes.int64,
|
||||
@ -132,6 +135,7 @@ class ClipTest(test.TestCase):
|
||||
dtypes.float16,
|
||||
dtypes.float32,
|
||||
dtypes.float64,
|
||||
dtypes.bfloat16,
|
||||
dtypes.int16,
|
||||
dtypes.int32,
|
||||
dtypes.int64,
|
||||
|
@ -841,6 +841,9 @@ class MathOpsOverloadTest(test.TestCase):
|
||||
|
||||
def _compareBinary(self, x, y, dtype, np_func, tf_func):
|
||||
np_ans = np_func(x, y).astype(dtype.as_numpy_dtype)
|
||||
if dtype == dtypes_lib.bfloat16:
|
||||
# assertAllClose does not properly handle bfloat16 values
|
||||
np_ans = np_ans.astype(np.float32)
|
||||
self.assertAllClose(np_ans,
|
||||
self._computeTensorAndLiteral(x, y, dtype, tf_func))
|
||||
self.assertAllClose(np_ans,
|
||||
@ -857,6 +860,7 @@ class MathOpsOverloadTest(test.TestCase):
|
||||
dtypes_lib.float16,
|
||||
dtypes_lib.float32,
|
||||
dtypes_lib.float64,
|
||||
dtypes_lib.bfloat16,
|
||||
dtypes_lib.int32,
|
||||
dtypes_lib.int64,
|
||||
dtypes_lib.complex64,
|
||||
@ -920,12 +924,16 @@ class MathOpsOverloadTest(test.TestCase):
|
||||
class IsFiniteInfNanTest(test.TestCase):
|
||||
|
||||
def _compare(self, x, use_gpu):
|
||||
np_finite, np_inf, np_nan = np.isfinite(x), np.isinf(x), np.isnan(x)
|
||||
with test_util.device(use_gpu=use_gpu):
|
||||
inx = ops.convert_to_tensor(x)
|
||||
ofinite, oinf, onan = math_ops.is_finite(inx), math_ops.is_inf(
|
||||
inx), math_ops.is_nan(inx)
|
||||
tf_finite, tf_inf, tf_nan = self.evaluate([ofinite, oinf, onan])
|
||||
if x.dtype == dtypes_lib.bfloat16.as_numpy_dtype:
|
||||
# Numpy will implicitly convert bfloat16 value to float16, so we cast to
|
||||
# float32 to avoid this.
|
||||
x = x.astype(np.float32)
|
||||
np_finite, np_inf, np_nan = np.isfinite(x), np.isinf(x), np.isnan(x)
|
||||
self.assertAllEqual(np_inf, tf_inf)
|
||||
self.assertAllEqual(np_nan, tf_nan)
|
||||
self.assertAllEqual(np_finite, tf_finite)
|
||||
@ -934,11 +942,18 @@ class IsFiniteInfNanTest(test.TestCase):
|
||||
self.assertShapeEqual(np_finite, ofinite)
|
||||
|
||||
def _testDtype(self, dtype):
|
||||
fi = np.finfo(dtype)
|
||||
data = np.array([
|
||||
0, -1, 1, fi.resolution, -fi.resolution, fi.min, fi.max, -np.inf,
|
||||
np.inf, np.nan
|
||||
]).astype(dtype)
|
||||
if dtype != dtypes_lib.bfloat16.as_numpy_dtype:
|
||||
fi = np.finfo(dtype)
|
||||
data = np.array([
|
||||
0, -1, 1, fi.resolution, -fi.resolution, fi.min, fi.max, -np.inf,
|
||||
np.inf, np.nan
|
||||
]).astype(dtype)
|
||||
else:
|
||||
# np.finfo does not support bfloat16
|
||||
data = np.array([
|
||||
0, -1, 1, 0.01, -0.01, -3.3895e+38, 3.3895e+38, -np.inf, np.inf,
|
||||
np.nan
|
||||
]).astype(dtype)
|
||||
self._compare(data, use_gpu=False)
|
||||
self._compare(data, use_gpu=True)
|
||||
|
||||
@ -951,6 +966,9 @@ class IsFiniteInfNanTest(test.TestCase):
|
||||
def testDouble(self):
|
||||
self._testDtype(np.float64)
|
||||
|
||||
def testBfloat16(self):
|
||||
self._testDtype(dtypes_lib.bfloat16.as_numpy_dtype)
|
||||
|
||||
def testSqrt(self):
|
||||
for dtype in [np.float16, np.float32, np.float64]:
|
||||
fi = np.finfo(dtype)
|
||||
@ -998,8 +1016,8 @@ class RoundingTest(test.TestCase):
|
||||
def _testDtype(self, dtype):
|
||||
data = (np.arange(-3, 3) / 4.).reshape(1, 3, 2).astype(dtype)
|
||||
self._compare(data)
|
||||
# TODO: rint op is not supported for float16
|
||||
if dtype is np.float16:
|
||||
# TODO(reedwm): rint op is not supported for float16 and bfloat16
|
||||
if dtype in (np.float16, dtypes_lib.bfloat16.as_numpy_dtype):
|
||||
return
|
||||
self._compare_values(data)
|
||||
x = [0.5, 0.5000001]
|
||||
@ -1012,10 +1030,12 @@ class RoundingTest(test.TestCase):
|
||||
self._compare_values(x, y=y)
|
||||
|
||||
def testTypes(self):
|
||||
self.skipTest("b/131162241")
|
||||
for dtype in [np.float16, np.float32, np.float64]:
|
||||
with self.subTest(dtype=dtype):
|
||||
self._testDtype(dtype)
|
||||
# TODO(b/131162241): Enable test for GPU
|
||||
with ops.device("/CPU:0"):
|
||||
for dtype in [np.float16, np.float32, np.float64,
|
||||
dtypes_lib.bfloat16.as_numpy_dtype]:
|
||||
with self.subTest(dtype=dtype):
|
||||
self._testDtype(dtype)
|
||||
|
||||
|
||||
class ComplexMakeRealImagTest(test.TestCase):
|
||||
|
@ -61,6 +61,8 @@ def _default_tolerance(dtype):
|
||||
Args:
|
||||
dtype: A datatype.
|
||||
"""
|
||||
if dtype == dtypes_lib.bfloat16.as_numpy_dtype:
|
||||
return 5e-3
|
||||
if dtype == np.float16:
|
||||
return 5e-3
|
||||
elif dtype in (np.float32, np.complex64):
|
||||
@ -81,12 +83,7 @@ class UnaryOpTest(test.TestCase):
|
||||
np_ans = np_func(x)
|
||||
with self.cached_session(use_gpu=False):
|
||||
inx = ops.convert_to_tensor(x)
|
||||
if x.dtype in (np.float32, np.float64,
|
||||
dtypes_lib.bfloat16.as_numpy_dtype):
|
||||
y = 1.1 * tf_func(inx)
|
||||
np_ans *= 1.1
|
||||
else:
|
||||
y = tf_func(inx)
|
||||
y = tf_func(inx)
|
||||
tf_cpu = self.evaluate(y)
|
||||
self.assertShapeEqual(np_ans, y)
|
||||
if x.dtype == np.float16:
|
||||
@ -99,7 +96,7 @@ class UnaryOpTest(test.TestCase):
|
||||
if x.dtype in (np.complex64, np.complex128) and tf_func == math_ops.sign:
|
||||
return # Return early
|
||||
|
||||
if x.dtype == np.float16:
|
||||
if x.dtype in (np.float16, dtypes_lib.bfloat16.as_numpy_dtype):
|
||||
s = list(np.shape(x))
|
||||
jacob_t, _ = gradient_checker.compute_gradient(
|
||||
inx, s, y, s, x_init_value=x)
|
||||
@ -108,7 +105,7 @@ class UnaryOpTest(test.TestCase):
|
||||
yf = tf_func(inxf)
|
||||
_, jacob_n = gradient_checker.compute_gradient(
|
||||
inxf, s, yf, s, x_init_value=xf, delta=1e-2)
|
||||
jacob_n = jacob_n.astype(np.float16)
|
||||
jacob_n = jacob_n.astype(x.dtype)
|
||||
self.assertAllClose(jacob_t, jacob_n, rtol=grad_rtol, atol=grad_atol)
|
||||
elif x.dtype in (np.float32, np.complex64):
|
||||
s = list(np.shape(x))
|
||||
@ -384,13 +381,36 @@ class UnaryOpTest(test.TestCase):
|
||||
self._compareBothSparse(y, np.sign, math_ops.sign)
|
||||
self._compareBothSparse(x, np.vectorize(math.erf), math_ops.erf, tol=1e-3)
|
||||
|
||||
@test_util.run_deprecated_v1
|
||||
def testBFloat16Basic(self):
|
||||
def compute_f32(np_func):
|
||||
"""Decorator to compute Numpy function with float32 math."""
|
||||
def f(x):
|
||||
y = np_func(x.astype(np.float32))
|
||||
return y.astype(x.dtype)
|
||||
return f
|
||||
|
||||
bfloat16 = dtypes_lib.bfloat16.as_numpy_dtype
|
||||
x = np.arange(-6, 6,
|
||||
2).reshape(1, 3, 2).astype(dtypes_lib.bfloat16.as_numpy_dtype)
|
||||
y = (x + .5).astype(bfloat16) # no zero
|
||||
z = (x + 15.5).astype(bfloat16) # all positive
|
||||
self._compareCpu(x, np.abs, math_ops.abs)
|
||||
self._compareCpu(x, np.abs, _ABS)
|
||||
self._compareBoth(x, np.negative, math_ops.negative)
|
||||
self._compareBoth(x, np.negative, _NEG)
|
||||
self._compareCpu(y, compute_f32(self._inv), math_ops.reciprocal)
|
||||
self._compareCpu(x, np.exp, math_ops.exp)
|
||||
self._compareCpu(x, np.expm1, math_ops.expm1)
|
||||
self._compareCpu(z, compute_f32(np.log), math_ops.log)
|
||||
self._compareCpu(z, compute_f32(np.log1p), math_ops.log1p)
|
||||
self._compareCpu(y, np.sign, math_ops.sign)
|
||||
self._compareBoth(x, compute_f32(np.sin), math_ops.sin)
|
||||
self._compareBoth(x, compute_f32(np.cos), math_ops.cos)
|
||||
self._compareBoth(x, compute_f32(np.tan), math_ops.tan)
|
||||
self._compareBoth(x, compute_f32(np.sinh), math_ops.sinh)
|
||||
self._compareBoth(x, compute_f32(np.cosh), math_ops.cosh)
|
||||
self._compareBoth(x, compute_f32(np.tanh), math_ops.tanh)
|
||||
|
||||
def testInt8Basic(self):
|
||||
x = np.arange(-6, 6, 2).reshape(1, 3, 2).astype(np.int8)
|
||||
|
@ -476,6 +476,13 @@ class DivAndModTest(test_util.TensorFlowTestCase):
|
||||
# % array_ops.constant(divs))
|
||||
# self.assertAllEqual(tf2_result, tf_result)
|
||||
|
||||
def testFloorModBfloat64(self):
|
||||
nums, divs = self.floatTestData()
|
||||
tf_result = math_ops.floormod(math_ops.cast(nums, dtypes.bfloat16),
|
||||
math_ops.cast(divs, dtypes.bfloat16))
|
||||
np_result = nums % divs
|
||||
self.assertAllEqual(tf_result, np_result)
|
||||
|
||||
def testTruncateModInt(self):
|
||||
nums, divs = self.intTestData()
|
||||
tf_result = math_ops.truncatemod(nums, divs)
|
||||
|
Loading…
Reference in New Issue
Block a user