diff --git a/tensorflow/compiler/xla/python/bfloat16.cc b/tensorflow/compiler/xla/python/bfloat16.cc index d932a7f9c9a..d4dad44710d 100644 --- a/tensorflow/compiler/xla/python/bfloat16.cc +++ b/tensorflow/compiler/xla/python/bfloat16.cc @@ -824,18 +824,44 @@ struct Multiply { struct TrueDivide { bfloat16 operator()(bfloat16 a, bfloat16 b) { return a / b; } }; + +std::pair divmod(float a, float b) { + if (b == 0.0f) { + float nan = std::numeric_limits::quiet_NaN(); + return {nan, nan}; + } + float mod = std::fmod(a, b); + float div = (a - mod) / b; + if (mod != 0.0f) { + if ((b < 0.0f) != (mod < 0.0f)) { + mod += b; + div -= 1.0f; + } + } else { + mod = std::copysign(0.0f, b); + } + + float floordiv; + if (div != 0.0f) { + floordiv = std::floor(div); + if (div - floordiv > 0.5f) { + floordiv += 1.0f; + } + } else { + floordiv = std::copysign(0.0f, a / b); + } + return {floordiv, mod}; +} + struct FloorDivide { bfloat16 operator()(bfloat16 a, bfloat16 b) { - float mod; - return bfloat16( - npy_divmodf(static_cast(a), static_cast(b), &mod)); + return bfloat16(divmod(static_cast(a), static_cast(b)).first); } }; struct Remainder { bfloat16 operator()(bfloat16 a, bfloat16 b) { - float mod; - npy_divmodf(static_cast(a), static_cast(b), &mod); - return bfloat16(mod); + return bfloat16( + divmod(static_cast(a), static_cast(b)).second); } }; struct DivmodUFunc { @@ -851,9 +877,10 @@ struct DivmodUFunc { for (npy_intp k = 0; k < *dimensions; k++) { bfloat16 x = *reinterpret_cast(i0); bfloat16 y = *reinterpret_cast(i1); - float mod; - *reinterpret_cast(o0) = bfloat16( - npy_divmodf(static_cast(x), static_cast(y), &mod)); + float floordiv, mod; + std::tie(floordiv, mod) = + divmod(static_cast(x), static_cast(y)); + *reinterpret_cast(o0) = bfloat16(floordiv); *reinterpret_cast(o1) = bfloat16(mod); i0 += steps[0]; i1 += steps[1]; @@ -927,9 +954,18 @@ struct Frexp { } }; struct Heaviside { - bfloat16 operator()(bfloat16 a, bfloat16 b) { - return bfloat16( - npy_heavisidef(static_cast(a), static_cast(b))); + bfloat16 operator()(bfloat16 bx, bfloat16 h0) { + float x = static_cast(bx); + if (std::isnan(x)) { + return bx; + } + if (x < 0) { + return bfloat16(0.0f); + } + if (x > 0) { + return bfloat16(1.0f); + } + return h0; // x == 0 } }; struct Conjugate { @@ -970,15 +1006,37 @@ struct Log1p { } }; struct LogAddExp { - bfloat16 operator()(bfloat16 a, bfloat16 b) { - return bfloat16( - npy_logaddexpf(static_cast(a), static_cast(b))); + bfloat16 operator()(bfloat16 bx, bfloat16 by) { + float x = static_cast(bx); + float y = static_cast(by); + if (x == y) { + // Handles infinities of the same sign. + return bfloat16(x + std::log(2.0f)); + } + float out = std::numeric_limits::quiet_NaN(); + if (x > y) { + out = x + std::log1p(std::exp(y - x)); + } else if (x < y) { + out = y + std::log1p(std::exp(x - y)); + } + return bfloat16(out); } }; struct LogAddExp2 { - bfloat16 operator()(bfloat16 a, bfloat16 b) { - return bfloat16( - npy_logaddexp2f(static_cast(a), static_cast(b))); + bfloat16 operator()(bfloat16 bx, bfloat16 by) { + float x = static_cast(bx); + float y = static_cast(by); + if (x == y) { + // Handles infinities of the same sign. + return bfloat16(x + 1.0f); + } + float out = std::numeric_limits::quiet_NaN(); + if (x > y) { + out = x + std::log1p(std::exp2(y - x)) / std::log(2.0f); + } else if (x < y) { + out = y + std::log1p(std::exp2(x - y)) / std::log(2.0f); + } + return bfloat16(out); } }; struct Modf { @@ -1104,12 +1162,14 @@ struct Arctanh { }; struct Deg2rad { bfloat16 operator()(bfloat16 a) { - return bfloat16(npy_deg2radf(static_cast(a))); + static constexpr float radians_per_degree = M_PI / 180.0f; + return bfloat16(static_cast(a) * radians_per_degree); } }; struct Rad2deg { bfloat16 operator()(bfloat16 a) { - return bfloat16(npy_rad2degf(static_cast(a))); + static constexpr float degrees_per_radian = 180.0f / M_PI; + return bfloat16(static_cast(a) * degrees_per_radian); } }; diff --git a/tensorflow/compiler/xla/python/bfloat16_test.py b/tensorflow/compiler/xla/python/bfloat16_test.py index dba715230f3..33274e1358a 100644 --- a/tensorflow/compiler/xla/python/bfloat16_test.py +++ b/tensorflow/compiler/xla/python/bfloat16_test.py @@ -317,7 +317,7 @@ class Bfloat16NumPyTest(parameterized.TestCase): } for op in UNARY_UFUNCS)) def testUnaryUfunc(self, op): rng = np.random.RandomState(seed=42) - x = rng.randn(3, 7).astype(bfloat16) + x = rng.randn(3, 7, 10).astype(bfloat16) numpy_assert_allclose( op(x).astype(np.float32), op(x.astype(np.float32)), rtol=1e-2) @@ -327,8 +327,8 @@ class Bfloat16NumPyTest(parameterized.TestCase): } for op in BINARY_UFUNCS)) def testBinaryUfunc(self, op): rng = np.random.RandomState(seed=42) - x = rng.randn(3, 7).astype(bfloat16) - y = rng.randn(4, 1, 7).astype(bfloat16) + x = rng.randn(3, 7, 10).astype(bfloat16) + y = rng.randn(4, 1, 7, 10).astype(bfloat16) numpy_assert_allclose( op(x, y).astype(np.float32), op(x.astype(np.float32), y.astype(np.float32)), @@ -351,7 +351,7 @@ class Bfloat16NumPyTest(parameterized.TestCase): } for op in [np.isfinite, np.isinf, np.isnan, np.signbit, np.logical_not])) def testPredicateUfunc(self, op): rng = np.random.RandomState(seed=42) - shape = (3, 7) + shape = (3, 7, 10) posinf_flips = rng.rand(*shape) < 0.1 neginf_flips = rng.rand(*shape) < 0.1 nan_flips = rng.rand(*shape) < 0.1