[XLA] Implement the regularized incomplete beta function

An implementation involving continued fractions is used. The continued fraction for this function can be seen at http://dlmf.nist.gov/8.17.v PiperOrigin-RevId: 277327138 Change-Id: Icb8527af344b629806fd7e4880072d05e2530cd2
2019-10-29 11:27:54 -07:00 · 2019-10-29 11:27:54 -07:00 · e5983061d3
commit e5983061d3
parent 2061ec8104
9 changed files with 396 additions and 23 deletions
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD
@ -1022,6 +1022,7 @@ tf_xla_py_test(
    deps = [
        ":xla_test",
        "//tensorflow/python:array_ops",
        "//tensorflow/python:extra_py_tests_deps",
        "//tensorflow/python:framework",
        "//tensorflow/python:math_ops",
        "//tensorflow/python:platform_test",
--- a/tensorflow/compiler/tests/ternary_ops_test.py
+++ b/tensorflow/compiler/tests/ternary_ops_test.py
@ -20,6 +20,7 @@ from __future__ import print_function
 from absl.testing import parameterized
 import numpy as np
 import scipy.special as sps
 from tensorflow.compiler.tests import xla_test
 from tensorflow.python.framework import dtypes
@ -31,7 +32,7 @@ from tensorflow.python.platform import googletest
 class TernaryOpsTest(xla_test.XLATestCase, parameterized.TestCase):
-  def _testTernary(self, op, a, b, c, expected):
+  def _testTernary(self, op, a, b, c, expected, rtol=1e-3, atol=1e-6):
    with self.session() as session:
      with self.test_scope():
        pa = array_ops.placeholder(dtypes.as_dtype(a.dtype), a.shape, name="a")
@ -39,7 +40,7 @@ class TernaryOpsTest(xla_test.XLATestCase, parameterized.TestCase):
        pc = array_ops.placeholder(dtypes.as_dtype(c.dtype), c.shape, name="c")
        output = op(pa, pb, pc)
      result = session.run(output, {pa: a, pb: b, pc: c})
-      self.assertAllClose(result, expected, rtol=1e-3)
+      self.assertAllClose(result, expected, rtol=rtol, atol=atol)
      return result
  @parameterized.parameters(
@ -210,6 +211,55 @@ class TernaryOpsTest(xla_test.XLATestCase, parameterized.TestCase):
            upper,
            expected=np.minimum(np.maximum(x, lower), upper))
  def testBetaincSanity(self):
    # This operation is only supported for float32 and float64.
    for dtype in self.numeric_types & {np.float32, np.float64}:
      # Sanity check a few identities:
      # - betainc(a, b, 0) == 0
      # - betainc(a, b, 1) == 1
      # - betainc(a, 1, x) == x ** a
      # Compare against the implementation in SciPy.
      a = np.array([.3, .4, .2, .2], dtype=dtype)
      b = np.array([1., 1., .4, .4], dtype=dtype)
      x = np.array([.3, .4, .0, .1], dtype=dtype)
      expected = sps.betainc(a, b, x)
      self._testTernary(
          math_ops.betainc, a, b, x, expected, rtol=5e-6, atol=6e-6)
  @parameterized.parameters(
      {
          'sigma': 1e15,
          'rtol': 1e-6,
          'atol': 1e-6
      },
      {
          'sigma': 30,
          'rtol': 1e-6,
          'atol': 2e-3
      },
      {
          'sigma': 1e-8,
          'rtol': 5e-4,
          'atol': 3e-6
      },
      {
          'sigma': 1e-16,
          'rtol': 1e-6,
          'atol': 2e-4
      },
  )
  def testBetainc(self, sigma, rtol, atol):
    # This operation is only supported for float32 and float64.
    for dtype in self.numeric_types & {np.float32, np.float64}:
      # Randomly generate a, b, x in the numerical domain of betainc.
      # Compare against the implementation in SciPy.
      a = np.abs(np.random.randn(10, 10) * sigma).astype(dtype)  # in (0, infty)
      b = np.abs(np.random.randn(10, 10) * sigma).astype(dtype)  # in (0, infty)
      x = np.random.rand(10, 10).astype(dtype)  # in (0, 1)
      expected = sps.betainc(a, b, x, dtype=dtype)
      self._testTernary(
          math_ops.betainc, a, b, x, expected, rtol=rtol, atol=atol)
 if __name__ == "__main__":
  googletest.main()
--- a/tensorflow/compiler/tf2xla/kernels/BUILD
+++ b/tensorflow/compiler/tf2xla/kernels/BUILD
@ -14,6 +14,7 @@ tf_kernel_library(
        "batch_norm_op.cc",
        "batchtospace_op.cc",
        "bcast_ops.cc",
        "beta_op.cc",
        "bias_ops.cc",
        "binary_ops.cc",
        "broadcast_to_op.cc",
--- a/tensorflow/compiler/tf2xla/kernels/beta_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/beta_op.cc
@ -0,0 +1,81 @@
 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include <limits>
 #include "tensorflow/compiler/tf2xla/lib/broadcast.h"
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
 #include "tensorflow/compiler/xla/client/lib/arithmetic.h"
 #include "tensorflow/compiler/xla/client/lib/constants.h"
 #include "tensorflow/compiler/xla/client/lib/loops.h"
 #include "tensorflow/compiler/xla/client/lib/math.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 namespace tensorflow {
 namespace {
 class BetaincOp : public XlaOpKernel {
 public:
  explicit BetaincOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {}
  void Compile(XlaOpKernelContext* ctx) override {
    const TensorShape& a_shape = ctx->InputShape(0);
    const TensorShape& b_shape = ctx->InputShape(1);
    const TensorShape& x_shape = ctx->InputShape(2);
    if (a_shape.dims() > 0 && b_shape.dims() > 0) {
      OP_REQUIRES(ctx, a_shape == b_shape,
                  errors::InvalidArgument(
                      "Shapes of a and b are inconsistent: ",
                      a_shape.DebugString(), " vs. ", b_shape.DebugString()));
    }
    if (a_shape.dims() > 0 && x_shape.dims() > 0) {
      OP_REQUIRES(ctx, a_shape == x_shape,
                  errors::InvalidArgument(
                      "Shapes of a and x are inconsistent: ",
                      a_shape.DebugString(), " vs. ", x_shape.DebugString()));
    }
    if (b_shape.dims() > 0 && x_shape.dims() > 0) {
      OP_REQUIRES(ctx, b_shape == x_shape,
                  errors::InvalidArgument(
                      "Shapes of b and x are inconsistent: ",
                      b_shape.DebugString(), " vs. ", x_shape.DebugString()));
    }
    TensorShape merged_shape(a_shape);
    if (b_shape.dims() > 0) merged_shape = b_shape;
    if (x_shape.dims() > 0) merged_shape = x_shape;
    auto builder = ctx->builder();
    auto result =
        builder->ReportErrorOrReturn([&]() -> xla::StatusOr<xla::XlaOp> {
          TF_ASSIGN_OR_RETURN(
              auto a, BroadcastTo(ctx->Input(0), merged_shape.dim_sizes()));
          TF_ASSIGN_OR_RETURN(
              auto b, BroadcastTo(ctx->Input(1), merged_shape.dim_sizes()));
          TF_ASSIGN_OR_RETURN(
              auto x, BroadcastTo(ctx->Input(2), merged_shape.dim_sizes()));
          return xla::RegularizedIncompleteBeta(a, b, x);
        });
    ctx->SetOutput(0, result);
  }
 };
 REGISTER_XLA_OP(Name("Betainc"), BetaincOp);
 }  // namespace
 }  // namespace tensorflow
--- a/tensorflow/compiler/xla/client/lib/BUILD
+++ b/tensorflow/compiler/xla/client/lib/BUILD
@ -145,6 +145,7 @@ cc_library(
    deps = [
        ":arithmetic",
        ":constants",
        ":loops",
        "//tensorflow/compiler/xla:shape_util",
        "//tensorflow/compiler/xla:status_macros",
        "//tensorflow/compiler/xla/client:xla_builder",
--- a/tensorflow/compiler/xla/client/lib/math.cc
+++ b/tensorflow/compiler/xla/client/lib/math.cc
@ -21,6 +21,8 @@ limitations under the License.
 #include "tensorflow/compiler/xla/client/lib/arithmetic.h"
 #include "tensorflow/compiler/xla/client/lib/constants.h"
 #include "tensorflow/compiler/xla/client/lib/loops.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/primitive_util.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/status_macros.h"
@ -497,6 +499,30 @@ XlaOp Lgamma(XlaOp input) {
  });
 }
 // Computes an approximation of the lbeta function which is equivalent to
 // log(abs(Beta(a, b))) but avoids overflow by computing it with lgamma.
 static XlaOp Lbeta(XlaOp a, XlaOp b) {
  // Beta(a, b) can be computed using Gamma as per
  // http://dlmf.nist.gov/5.12.E1 as follows:
  //   Beta(a, b) = (Gamma(a) * Gamma(b)) / Gamma(a + b)
  //
  // To avoid overflow, we compute in the log domain.
  //
  // As per http://dlmf.nist.gov/4.8.E2 we can transform:
  //   Log(a * b)
  // into:
  //   Log(a) + Log(b)
  //
  // Likewise, per https://dlmf.nist.gov/4.8.E4, we can turn:
  //   Log(a - b)
  // into:
  //   Log(a) - Log(b)
  //
  // This means that we can compute Log(Beta(a, b)) by:
  //   Log(Gamma(a)) + Log(Gamma(b)) - Log(Gamma(a + b))
  return Lgamma(a) + Lgamma(b) - Lgamma(a + b);
 }
 // Compute the Digamma function using Lanczos' approximation from "A Precision
 // Approximation of the Gamma Function". SIAM Journal on Numerical Analysis
 // series B. Vol. 1:
@ -1037,4 +1063,212 @@ XlaOp BesselI1e(XlaOp x) {
  });
 }
 // I J Thompson and A R Barnett. 1986. Coulomb and Bessel functions of complex
 // arguments and order. J. Comput. Phys. 64, 2 (June 1986), 490-509.
 // DOI=http://dx.doi.org/10.1016/0021-9991(86)90046-X
 static XlaOp LentzThompsonBarnettAlgorithm(
    int64 num_iterations, double small, double threshold,
    const ForEachIndexBodyFunction& nth_partial_numerator,
    const ForEachIndexBodyFunction& nth_partial_denominator,
    absl::Span<const XlaOp> inputs, absl::string_view name) {
  auto& b = *inputs.front().builder();
  return b.ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
    TF_RET_CHECK(num_iterations < INT32_MAX);
    enum {
      // Position in the evaluation.
      kIterationIdx,
      // Whether or not we have reached the desired tolerance.
      kValuesUnconvergedIdx,
      // Ratio between nth canonical numerator and the nth-1 canonical
      // numerator.
      kCIdx,
      // Ratio between nth-1 canonical denominator and the nth canonical
      // denominator.
      kDIdx,
      // Computed approximant in the evaluation.
      kHIdx,
      // Inputs follow all of the other state.
      kFirstInputIdx,
    };
    auto while_cond_fn = [num_iterations](
                             absl::Span<const XlaOp> values,
                             XlaBuilder* cond_builder) -> StatusOr<XlaOp> {
      auto iteration = values[kIterationIdx];
      auto iterations_remain_cond =
          Lt(iteration, ScalarLike(iteration, num_iterations));
      auto values_unconverged_cond = values[kValuesUnconvergedIdx];
      return And(iterations_remain_cond, values_unconverged_cond);
    };
    auto while_body_fn =
        [small, threshold, &nth_partial_numerator, &nth_partial_denominator](
            absl::Span<const XlaOp> values,
            XlaBuilder* body_builder) -> StatusOr<std::vector<XlaOp>> {
      XlaOp iteration = values[kIterationIdx];
      TF_ASSIGN_OR_RETURN(
          std::vector<XlaOp> partial_numerator,
          nth_partial_numerator(iteration, values.subspan(kFirstInputIdx),
                                body_builder));
      TF_RET_CHECK(partial_numerator.size() == 1);
      TF_ASSIGN_OR_RETURN(
          std::vector<XlaOp> partial_denominator,
          nth_partial_denominator(iteration, values.subspan(kFirstInputIdx),
                                  body_builder));
      TF_RET_CHECK(partial_denominator.size() == 1);
      auto c = partial_denominator[0] + partial_numerator[0] / values[kCIdx];
      auto small_constant = FullLike(c, small);
      c = Select(Lt(Abs(c), small_constant), small_constant, c);
      auto d = partial_denominator[0] + partial_numerator[0] * values[kDIdx];
      d = Select(Lt(Abs(d), small_constant), small_constant, d);
      d = Reciprocal(d);
      auto delta = c * d;
      auto h = values[kHIdx] * delta;
      std::vector<XlaOp> updated_values(values.size());
      updated_values[kIterationIdx] = Add(iteration, ScalarLike(iteration, 1));
      updated_values[kCIdx] = c;
      updated_values[kDIdx] = d;
      updated_values[kHIdx] = h;
      std::copy(values.begin() + kFirstInputIdx, values.end(),
                updated_values.begin() + kFirstInputIdx);
      // If any values are greater than the tolerance, we have not converged.
      auto tolerance_comparison =
          Ge(Abs(Sub(delta, FullLike(delta, 1.0))), FullLike(delta, threshold));
      updated_values[kValuesUnconvergedIdx] =
          ReduceAll(tolerance_comparison, ConstantR0<bool>(body_builder, false),
                    CreateScalarOrComputation(PRED, body_builder));
      return updated_values;
    };
    TF_ASSIGN_OR_RETURN(std::vector<XlaOp> partial_denominator,
                        nth_partial_denominator(Zero(&b, U32), inputs, &b));
    TF_RET_CHECK(partial_denominator.size() == 1);
    auto h = partial_denominator[0];
    auto small_constant = FullLike(h, small);
    h = Select(Lt(Abs(h), small_constant), small_constant, h);
    std::vector<XlaOp> values(kFirstInputIdx + inputs.size());
    values[kIterationIdx] = One(&b, U32);
    values[kValuesUnconvergedIdx] = ConstantR0<bool>(&b, true);
    values[kCIdx] = h;
    values[kDIdx] = FullLike(h, 0.0);
    values[kHIdx] = h;
    std::copy(inputs.begin(), inputs.end(), values.begin() + kFirstInputIdx);
    TF_ASSIGN_OR_RETURN(values, WhileLoopHelper(while_cond_fn, while_body_fn,
                                                values, name, &b));
    return values[kHIdx];
  });
 }
 XlaOp RegularizedIncompleteBeta(XlaOp a, XlaOp b, XlaOp x) {
  auto& builder = *x.builder();
  return builder.ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
    TF_ASSIGN_OR_RETURN(Shape shape, builder.GetShape(a));
    // The partial numerator for the incomplete beta function is given
    // here: http://dlmf.nist.gov/8.17.E23 Note that there is a special
    // case: the partial numerator for the first iteration is one.
    auto NthPartialBetaincNumerator =
        [&shape](XlaOp iteration, absl::Span<const XlaOp> inputs,
                 XlaBuilder* builder) -> StatusOr<std::vector<XlaOp>> {
      auto a = inputs[0];
      auto b = inputs[1];
      auto x = inputs[2];
      auto iteration_bcast = Broadcast(iteration, shape.dimensions());
      auto iteration_is_even =
          Eq(iteration_bcast % FullLike(iteration_bcast, 2),
             FullLike(iteration_bcast, 0));
      auto iteration_is_one = Eq(iteration_bcast, FullLike(iteration_bcast, 1));
      auto iteration_minus_one = iteration_bcast - FullLike(iteration_bcast, 1);
      auto m = iteration_minus_one / FullLike(iteration_minus_one, 2);
      m = ConvertElementType(m, shape.element_type());
      auto one = FullLike(a, 1.0);
      auto two = FullLike(a, 2.0);
      // Partial numerator terms.
      auto even_numerator =
          -(a + m) * (a + b + m) * x / ((a + two * m) * (a + two * m + one));
      auto odd_numerator =
          m * (b - m) * x / ((a + two * m - one) * (a + two * m));
      auto one_numerator = ScalarLike(x, 1.0);
      auto numerator = Select(iteration_is_even, even_numerator, odd_numerator);
      return std::vector<XlaOp>{
          Select(iteration_is_one, one_numerator, numerator)};
    };
    auto NthPartialBetaincDenominator =
        [&shape](XlaOp iteration, absl::Span<const XlaOp> inputs,
                 XlaBuilder* builder) -> StatusOr<std::vector<XlaOp>> {
      auto x = inputs[2];
      auto iteration_bcast = Broadcast(iteration, shape.dimensions());
      return std::vector<XlaOp>{
          Select(Eq(iteration_bcast, ScalarLike(iteration_bcast, 0)),
                 ScalarLike(x, 0.0), ScalarLike(x, 1.0))};
    };
    // Determine if the inputs are out of range.
    auto result_is_nan =
        Or(Or(Or(Le(a, ScalarLike(a, 0.0)), Le(b, ScalarLike(b, 0.0))),
              Lt(x, ScalarLike(x, 0.0))),
           Gt(x, ScalarLike(x, 1.0)));
    // The continued fraction will converge rapidly when x < (a+1)/(a+b+2)
    // as per: http://dlmf.nist.gov/8.17.E23
    //
    // Otherwise, we can rewrite using the symmetry relation as per:
    // http://dlmf.nist.gov/8.17.E4
    auto converges_rapidly =
        Lt(x, (a + FullLike(a, 1.0)) / (a + b + FullLike(b, 2.0)));
    auto a_orig = a;
    a = Select(converges_rapidly, a, b);
    b = Select(converges_rapidly, b, a_orig);
    x = Select(converges_rapidly, x, Sub(FullLike(x, 1.0), x));
    XlaOp continued_fraction;
    // Thresholds and iteration counts taken from Cephes.
    if (shape.element_type() == F32) {
      continued_fraction = LentzThompsonBarnettAlgorithm(
          /*num_iterations=*/200,
          /*small=*/std::numeric_limits<float>::epsilon() / 2.0f,
          /*threshold=*/std::numeric_limits<float>::epsilon() / 2.0f,
          /*nth_partial_numerator=*/NthPartialBetaincNumerator,
          /*nth_partial_denominator=*/NthPartialBetaincDenominator, {a, b, x},
          "Betainc");
    } else {
      TF_RET_CHECK(shape.element_type() == F64);
      continued_fraction = LentzThompsonBarnettAlgorithm(
          /*num_iterations=*/600,
          /*small=*/std::numeric_limits<double>::epsilon() / 2.0f,
          /*threshold=*/std::numeric_limits<double>::epsilon() / 2.0f,
          /*nth_partial_numerator=*/NthPartialBetaincNumerator,
          /*nth_partial_denominator=*/NthPartialBetaincDenominator, {a, b, x},
          "Betainc");
    }
    // We want to compute the regularized complete beta function so we need to
    // combine the continued fraction with a few more terms as well as dividing
    // it by Beta(a, b). To avoid overflow, we compute in the log domain.
    // See http://dlmf.nist.gov/8.17.E22 for an easier to read version of this
    // formula.
    auto lbeta = Lbeta(a, b);
    auto result =
        continued_fraction * Exp(Log(x) * a + Log1p(-x) * b - lbeta) / a;
    result =
        Select(result_is_nan, NanValue(&builder, shape.element_type()), result);
    // We have an additional fixup to do if we are taking advantage of the
    // symmetry relation.
    return Select(converges_rapidly, result,
                  Sub(FullLike(result, 1.0), result));
  });
 }
 }  // namespace xla
--- a/tensorflow/compiler/xla/client/lib/math.h
+++ b/tensorflow/compiler/xla/client/lib/math.h
@ -108,6 +108,9 @@ XlaOp BesselI0e(XlaOp x);
 // at x.
 XlaOp BesselI1e(XlaOp x);
 // Computes the Regularized Incomplete Beta function.
 XlaOp RegularizedIncompleteBeta(XlaOp a, XlaOp b, XlaOp x);
 }  // namespace xla
 #endif  // TENSORFLOW_COMPILER_XLA_CLIENT_LIB_MATH_H_
--- a/tensorflow/python/kernel_tests/betainc_op_test.py
+++ b/tensorflow/python/kernel_tests/betainc_op_test.py
@ -50,49 +50,51 @@ class BetaincTest(test.TestCase):
      tf_out_t = math_ops.betainc(tf_a_s, tf_b_s, tf_x_s)
      with self.cached_session():
        tf_out = self.evaluate(tf_out_t)
-      scipy_out = special.betainc(a_s, b_s, x_s).astype(np_dt)
+      scipy_out = special.betainc(a_s, b_s, x_s, dtype=np_dt)
      # the scipy version of betainc uses a double-only implementation.
      # TODO(ebrevdo): identify reasons for (sometime) precision loss
      # with doubles
-      tol = 1e-4 if dtype == dtypes.float32 else 5e-5
+      rtol = 1e-4 if dtype == dtypes.float32 else 5e-5
-      self.assertAllCloseAccordingToType(scipy_out, tf_out, rtol=tol, atol=0)
+      atol = 9e-6 if dtype == dtypes.float32 else 3e-6
      self.assertAllCloseAccordingToType(
          scipy_out, tf_out, rtol=rtol, atol=atol)
      # Test out-of-range values (most should return nan output)
      combinations = list(itertools.product([-1, 0, 0.5, 1.0, 1.5], repeat=3))
      a_comb, b_comb, x_comb = np.asarray(list(zip(*combinations)), dtype=np_dt)
      with self.cached_session():
        tf_comb = math_ops.betainc(a_comb, b_comb, x_comb).eval()
-      scipy_comb = special.betainc(a_comb, b_comb, x_comb).astype(np_dt)
+      scipy_comb = special.betainc(a_comb, b_comb, x_comb, dtype=np_dt)
      self.assertAllCloseAccordingToType(scipy_comb, tf_comb)
      # Test broadcasting between scalars and other shapes
      with self.cached_session():
        self.assertAllCloseAccordingToType(
-            special.betainc(0.1, b_s, x_s).astype(np_dt),
+            special.betainc(0.1, b_s, x_s, dtype=np_dt),
            math_ops.betainc(0.1, b_s, x_s).eval(),
-            rtol=tol,
+            rtol=rtol,
-            atol=0)
+            atol=atol)
        self.assertAllCloseAccordingToType(
-            special.betainc(a_s, 0.1, x_s).astype(np_dt),
+            special.betainc(a_s, 0.1, x_s, dtype=np_dt),
            math_ops.betainc(a_s, 0.1, x_s).eval(),
-            rtol=tol,
+            rtol=rtol,
-            atol=0)
+            atol=atol)
        self.assertAllCloseAccordingToType(
-            special.betainc(a_s, b_s, 0.1).astype(np_dt),
+            special.betainc(a_s, b_s, 0.1, dtype=np_dt),
            math_ops.betainc(a_s, b_s, 0.1).eval(),
-            rtol=tol,
+            rtol=rtol,
-            atol=0)
+            atol=atol)
        self.assertAllCloseAccordingToType(
-            special.betainc(0.1, b_s, 0.1).astype(np_dt),
+            special.betainc(0.1, b_s, 0.1, dtype=np_dt),
            math_ops.betainc(0.1, b_s, 0.1).eval(),
-            rtol=tol,
+            rtol=rtol,
-            atol=0)
+            atol=atol)
        self.assertAllCloseAccordingToType(
-            special.betainc(0.1, 0.1, 0.1).astype(np_dt),
+            special.betainc(0.1, 0.1, 0.1, dtype=np_dt),
            math_ops.betainc(0.1, 0.1, 0.1).eval(),
-            rtol=tol,
+            rtol=rtol,
-            atol=0)
+            atol=atol)
      with self.assertRaisesRegexp(ValueError, "must be equal"):
        math_ops.betainc(0.5, [0.5], [[0.5]])
--- a/tensorflow/python/kernel_tests/distributions/beta_test.py
+++ b/tensorflow/python/kernel_tests/distributions/beta_test.py
@ -322,7 +322,7 @@ class BetaTest(test.TestCase):
      self.assertAllEqual(np.ones(shape, dtype=np.bool), 1. >= x)
      if not stats:
        return
-      self.assertAllClose(stats.beta.cdf(x, a, b), actual, rtol=1e-4, atol=0)
+      self.assertAllClose(stats.beta.cdf(x, a, b), actual, rtol=9e-3, atol=5e-6)
  def testBetaLogCdf(self):
    shape = (30, 40, 50)
@ -335,7 +335,7 @@ class BetaTest(test.TestCase):
      self.assertAllEqual(np.ones(shape, dtype=np.bool), 1. >= x)
      if not stats:
        return
-      self.assertAllClose(stats.beta.cdf(x, a, b), actual, rtol=1e-4, atol=0)
+      self.assertAllClose(stats.beta.cdf(x, a, b), actual, rtol=3e-3, atol=2e-5)
  def testBetaWithSoftplusConcentration(self):
    a, b = -4.2, -9.1