Deprecate tf.batch_matmul and replace with equivalent calls to tf.matmul that now supports adjoint and batch matmul.

CL created by: replace_string \ batch_matmul\\\( \ matmul\( plus some manual edits, mostly s/adj_x/adjoint_a/ s/adj_y/adjoint_b/. Change: 139536034
2016-11-17 19:04:05 -08:00 · 2016-11-17 19:04:05 -08:00 · 2eabf986b0
commit 2eabf986b0
parent 9e650f5a20
14 changed files with 53 additions and 59 deletions
--- a/tensorflow/contrib/distributions/python/kernel_tests/mvn_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/mvn_test.py
@ -193,7 +193,7 @@ class MultivariateNormalCholeskyTest(tf.test.TestCase):
    mat = self._rng.rand(*shape)
    chol = distributions.matrix_diag_transform(mat, transform=tf.nn.softplus)
    chol = tf.matrix_band_part(chol, -1, 0)
-    sigma = tf.batch_matmul(chol, chol, adj_y=True)
+    sigma = tf.matmul(chol, chol, adjoint_b=True)
    return chol.eval(), sigma.eval()

  def testNonmatchingMuSigmaFailsStatic(self):
@ -391,7 +391,7 @@ class MultivariateNormalFullTest(tf.test.TestCase):
    # This ensures sigma is positive def.
    mat_shape = batch_shape + event_shape + event_shape
    mat = self._rng.randn(*mat_shape)
-    sigma = tf.batch_matmul(mat, mat, adj_y=True).eval()
+    sigma = tf.matmul(mat, mat, adjoint_b=True).eval()

    mu_shape = batch_shape + event_shape
    mu = self._rng.randn(*mu_shape)
--- a/tensorflow/contrib/distributions/python/kernel_tests/operator_pd_cholesky_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/operator_pd_cholesky_test.py
@ -84,7 +84,7 @@ class OperatorPDCholeskyTest(tf.test.TestCase):
        operator = operator_pd_cholesky.OperatorPDCholesky(chol)

        sqrt_operator_times_x = operator.sqrt_matmul(x)
-        expected = tf.batch_matmul(chol, x)
+        expected = tf.matmul(chol, x)

        self.assertEqual(expected.get_shape(),
                         sqrt_operator_times_x.get_shape())
@ -102,7 +102,7 @@ class OperatorPDCholeskyTest(tf.test.TestCase):
        operator = operator_pd_cholesky.OperatorPDCholesky(chol)

        sqrt_operator_times_x = operator.sqrt_matmul(x)
-        expected = tf.batch_matmul(chol, x)
+        expected = tf.matmul(chol, x)

        self.assertEqual(expected.get_shape(),
                         sqrt_operator_times_x.get_shape())
@ -121,7 +121,7 @@ class OperatorPDCholeskyTest(tf.test.TestCase):

        sqrt_operator_times_x = operator.sqrt_matmul(x, transpose_x=True)
        # tf.batch_matmul is defined x * y, so "y" is on the right, not "x".
-        expected = tf.batch_matmul(chol, x, adj_y=True)
+        expected = tf.matmul(chol, x, adjoint_b=True)

        self.assertEqual(expected.get_shape(),
                         sqrt_operator_times_x.get_shape())
@ -135,11 +135,11 @@ class OperatorPDCholeskyTest(tf.test.TestCase):
        x = self._rng.rand(*x_shape)
        chol_shape = batch_shape + (k, k)
        chol = self._random_cholesky_array(chol_shape)
-        matrix = tf.batch_matmul(chol, chol, adj_y=True)
+        matrix = tf.matmul(chol, chol, adjoint_b=True)

        operator = operator_pd_cholesky.OperatorPDCholesky(chol)

-        expected = tf.batch_matmul(matrix, x)
+        expected = tf.matmul(matrix, x)

        self.assertEqual(expected.get_shape(), operator.matmul(x).get_shape())
        self.assertAllClose(expected.eval(), operator.matmul(x).eval())
@ -152,11 +152,11 @@ class OperatorPDCholeskyTest(tf.test.TestCase):
        x = self._rng.rand(*x_shape)
        chol_shape = batch_shape + (k, k)
        chol = self._random_cholesky_array(chol_shape)
-        matrix = tf.batch_matmul(chol, chol, adj_y=True)
+        matrix = tf.matmul(chol, chol, adjoint_b=True)

        operator = operator_pd_cholesky.OperatorPDCholesky(chol)

-        expected = tf.batch_matmul(matrix, x)
+        expected = tf.matmul(matrix, x)

        self.assertEqual(expected.get_shape(), operator.matmul(x).get_shape())
        self.assertAllClose(expected.eval(), operator.matmul(x).eval())
@ -169,13 +169,13 @@ class OperatorPDCholeskyTest(tf.test.TestCase):
        x = self._rng.rand(*x_shape)
        chol_shape = batch_shape + (k, k)
        chol = self._random_cholesky_array(chol_shape)
-        matrix = tf.batch_matmul(chol, chol, adj_y=True)
+        matrix = tf.matmul(chol, chol, adjoint_b=True)

        operator = operator_pd_cholesky.OperatorPDCholesky(chol)
        operator_times_x = operator.matmul(x, transpose_x=True)

        # tf.batch_matmul is defined x * y, so "y" is on the right, not "x".
-        expected = tf.batch_matmul(matrix, x, adj_y=True)
+        expected = tf.matmul(matrix, x, adjoint_b=True)

        self.assertEqual(expected.get_shape(), operator_times_x.get_shape())
        self.assertAllClose(expected.eval(), operator_times_x.eval())
--- a/tensorflow/contrib/distributions/python/kernel_tests/operator_pd_full_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/operator_pd_full_test.py
@ -32,7 +32,7 @@ class OperatorPDFullTest(tf.test.TestCase):

  def _random_positive_def_array(self, *shape):
    matrix = self._rng.rand(*shape)
-    return tf.batch_matmul(matrix, matrix, adj_y=True).eval()
+    return tf.matmul(matrix, matrix, adjoint_b=True).eval()

  def testPositiveDefiniteMatrixDoesntRaise(self):
    with self.test_session():
--- a/tensorflow/contrib/distributions/python/kernel_tests/operator_pd_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/operator_pd_test.py
@ -75,7 +75,7 @@ class OperatorSolve(OperatorShape):
  """Operator implements .solve."""

  def __init__(self, chol):
-    self._pos_def_matrix = tf.batch_matmul(chol, chol, adj_y=True)
+    self._pos_def_matrix = tf.matmul(chol, chol, adjoint_b=True)
    super(OperatorSolve, self).__init__(chol.shape)

  def _solve(self, rhs):
--- a/tensorflow/contrib/distributions/python/kernel_tests/operator_pd_vdvt_update_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/operator_pd_vdvt_update_test.py
@ -38,7 +38,7 @@ class OperatorPDSqrtVDVTUpdateTest(
  def _random_pd_matrix(self, shape):
    # With probability 1 this is positive definite.
    sqrt = self._rng.randn(*shape)
-    mat = tf.batch_matmul(sqrt, sqrt, adj_y=True)
+    mat = tf.matmul(sqrt, sqrt, adjoint_b=True)
    return mat.eval()

  def _random_v_and_diag(self, mat_shape, v_matrix_rank):
@ -67,11 +67,11 @@ class OperatorPDSqrtVDVTUpdateTest(
      diag_vt = tf.matrix_transpose(v)
    else:
      diag_mat = tf.matrix_diag(diag)
-      diag_vt = tf.batch_matmul(diag_mat, v, adj_y=True)
+      diag_vt = tf.matmul(diag_mat, v, adjoint_b=True)

-    v_diag_vt = tf.batch_matmul(v, diag_vt)
+    v_diag_vt = tf.matmul(v, diag_vt)
    sqrt = mat + v_diag_vt
-    a = tf.batch_matmul(sqrt, sqrt, adj_y=True)
+    a = tf.matmul(sqrt, sqrt, adjoint_b=True)
    return a.eval()

  def _build_operator_and_mat(self, batch_shape, k, dtype=np.float64):
--- a/tensorflow/contrib/distributions/python/ops/bijector.py
+++ b/tensorflow/contrib/distributions/python/ops/bijector.py
@ -1394,7 +1394,7 @@ class ScaleAndShift(Bijector):

  def _forward(self, x):
    x, sample_shape = self.shaper.make_batch_of_event_sample_matrices(x)
-    x = math_ops.batch_matmul(self.scale, x)
+    x = math_ops.matmul(self.scale, x)
    x = self.shaper.undo_make_batch_of_event_sample_matrices(x, sample_shape)
    x += self.shift
    return x
@ -1776,7 +1776,7 @@ class CholeskyOuterProduct(Bijector):
      x = control_flow_ops.with_dependencies([is_matrix, is_square], x)
    # For safety, explicitly zero-out the upper triangular part.
    x = array_ops.matrix_band_part(x, -1, 0)
-    return math_ops.batch_matmul(x, x, adj_y=True)
+    return math_ops.matmul(x, x, adjoint_b=True)

  def _inverse_and_inverse_log_det_jacobian(self, y):
    x = (math_ops.sqrt(y) if self._static_event_ndims == 0
@ -1855,8 +1855,7 @@ class CholeskyOuterProduct(Bijector):
        dim=1)

    sum_weighted_log_diag = array_ops.squeeze(
-        math_ops.batch_matmul(math_ops.log(diag), exponents),
-        squeeze_dims=-1)
+        math_ops.matmul(math_ops.log(diag), exponents), squeeze_dims=-1)
    fldj = p * math.log(2.) + sum_weighted_log_diag

    if x.get_shape().ndims is not None:
--- a/tensorflow/contrib/distributions/python/ops/dirichlet.py
+++ b/tensorflow/contrib/distributions/python/ops/dirichlet.py
@ -216,9 +216,11 @@ class Dirichlet(distribution.Distribution):
  def _variance(self):
    scale = self.alpha_sum * math_ops.sqrt(1. + self.alpha_sum)
    alpha = self.alpha / scale
-    outer_prod = -math_ops.batch_matmul(
-        array_ops.expand_dims(alpha, dim=-1),  # column
-        array_ops.expand_dims(alpha, dim=-2))  # row
+    outer_prod = -math_ops.matmul(
+        array_ops.expand_dims(
+            alpha, dim=-1),  # column
+        array_ops.expand_dims(
+            alpha, dim=-2))  # row
    return array_ops.matrix_set_diag(outer_prod,
                                     alpha * (self.alpha_sum / scale - alpha))

--- a/tensorflow/contrib/distributions/python/ops/dirichlet_multinomial.py
+++ b/tensorflow/contrib/distributions/python/ops/dirichlet_multinomial.py
@ -245,7 +245,7 @@ class DirichletMultinomial(distribution.Distribution):
  def _variance(self):
    alpha_sum = array_ops.expand_dims(self.alpha_sum, -1)
    normalized_alpha = self.alpha / alpha_sum
-    variance = -math_ops.batch_matmul(
+    variance = -math_ops.matmul(
        array_ops.expand_dims(normalized_alpha, -1),
        array_ops.expand_dims(normalized_alpha, -2))
    variance = array_ops.matrix_set_diag(variance, normalized_alpha *
--- a/tensorflow/contrib/distributions/python/ops/multinomial.py
+++ b/tensorflow/contrib/distributions/python/ops/multinomial.py
@ -223,9 +223,8 @@ class Multinomial(distribution.Distribution):

  def _variance(self):
    p = self.p * array_ops.expand_dims(array_ops.ones_like(self.n), -1)
-    outer_prod = math_ops.batch_matmul(
-        array_ops.expand_dims(self._mean_val, -1),
-        array_ops.expand_dims(p, -2))
+    outer_prod = math_ops.matmul(
+        array_ops.expand_dims(self._mean_val, -1), array_ops.expand_dims(p, -2))
    return array_ops.matrix_set_diag(-outer_prod,
                                     self._mean_val - self._mean_val * p)

--- a/tensorflow/contrib/distributions/python/ops/operator_pd.py
+++ b/tensorflow/contrib/distributions/python/ops/operator_pd.py
@ -437,7 +437,7 @@ class OperatorPDBase(object):
      name:  A name to give this `Op`.

    Returns:
-      A result equivalent to `tf.batch_matmul(self.to_dense(), x)`.
+      A result equivalent to `tf.matmul(self.to_dense(), x)`.
    """
    with ops.name_scope(self.name):
      with ops.name_scope(name, values=[x] + self.inputs):
@ -471,7 +471,7 @@ class OperatorPDBase(object):
      name:  A name scope to use for ops added by this method.

    Returns:
-      A result equivalent to `tf.batch_matmul(self.sqrt_to_dense(), x)`.
+      A result equivalent to `tf.matmul(self.sqrt_to_dense(), x)`.
    """
    with ops.name_scope(self.name):
      with ops.name_scope(name, values=[x] + self.inputs):
--- a/tensorflow/contrib/distributions/python/ops/operator_pd_cholesky.py
+++ b/tensorflow/contrib/distributions/python/ops/operator_pd_cholesky.py
@ -127,21 +127,21 @@ class OperatorPDCholesky(operator_pd.OperatorPDBase):
    return math_ops.matmul(chol, chol_times_x)

  def _batch_matmul(self, x, transpose_x=False):
-    # tf.batch_matmul is defined x * y, so "y" is on the right, not "x".
+    # tf.matmul is defined x * y, so "y" is on the right, not "x".
    chol = array_ops.matrix_band_part(self._chol, -1, 0)
-    chol_times_x = math_ops.batch_matmul(
-        chol, x, adj_x=True, adj_y=transpose_x)
-    return math_ops.batch_matmul(chol, chol_times_x)
+    chol_times_x = math_ops.matmul(
+        chol, x, adjoint_a=True, adjoint_b=transpose_x)
+    return math_ops.matmul(chol, chol_times_x)

  def _sqrt_matmul(self, x, transpose_x=False):
    chol = array_ops.matrix_band_part(self._chol, -1, 0)
    # tf.matmul is defined a * b
-    return math_ops.matmul(chol, x, transpose_b=transpose_x)
+    return math_ops.matmul(chol, x, adjoint_b=transpose_x)

  def _batch_sqrt_matmul(self, x, transpose_x=False):
    chol = array_ops.matrix_band_part(self._chol, -1, 0)
    # tf.batch_matmul is defined x * y, so "y" is on the right, not "x".
-    return math_ops.batch_matmul(chol, x, adj_y=transpose_x)
+    return math_ops.matmul(chol, x, adjoint_b=transpose_x)

  def _batch_solve(self, rhs):
    return linalg_ops.cholesky_solve(self._chol, rhs)
@ -181,4 +181,4 @@ class OperatorPDCholesky(operator_pd.OperatorPDBase):

  def _to_dense(self):
    chol = array_ops.matrix_band_part(self._chol, -1, 0)
-    return math_ops.batch_matmul(chol, chol, adj_y=True)
+    return math_ops.matmul(chol, chol, adjoint_b=True)
--- a/tensorflow/contrib/distributions/python/ops/operator_pd_vdvt_update.py
+++ b/tensorflow/contrib/distributions/python/ops/operator_pd_vdvt_update.py
@ -319,10 +319,7 @@ class OperatorPDSqrtVDVTUpdate(operator_pd.OperatorPDBase):
    # M^{-1} V
    minv_v = self._operator.solve(self._v)
    # V^T M^{-1} V
-    if batch_mode:
-      vt_minv_v = math_ops.batch_matmul(self._v, minv_v, adj_x=True)
-    else:
-      vt_minv_v = math_ops.matmul(self._v, minv_v, transpose_a=True)
+    vt_minv_v = math_ops.matmul(self._v, minv_v, adjoint_a=True)

    # D^{-1} + V^T M^{-1} V
    capacitance = self._diag_inv_operator.add_to_tensor(vt_minv_v)
@ -360,14 +357,14 @@ class OperatorPDSqrtVDVTUpdate(operator_pd.OperatorPDBase):
    v = self._v
    m = self._operator
    d = self._diag_operator
-    # The operators call the appropriate matmul/batch_matmul automatically.  We
-    # cannot override.
-    # batch_matmul is defined as:  x * y, so adj_x and adj_y are the ways to
-    # transpose the left and right.
+    # The operators call the appropriate matmul/batch_matmul automatically.
+    # We cannot override.
+    # batch_matmul is defined as:  x * y, so adjoint_a and adjoint_b are the
+    # ways to transpose the left and right.
    mx = m.matmul(x, transpose_x=transpose_x)
-    vt_x = math_ops.batch_matmul(v, x, adj_x=True, adj_y=transpose_x)
+    vt_x = math_ops.matmul(v, x, adjoint_a=True, adjoint_b=transpose_x)
    d_vt_x = d.matmul(vt_x)
-    v_d_vt_x = math_ops.batch_matmul(v, d_vt_x)
+    v_d_vt_x = math_ops.matmul(v, d_vt_x)

    return mx + v_d_vt_x

@ -444,11 +441,11 @@ class OperatorPDSqrtVDVTUpdate(operator_pd.OperatorPDBase):
    # M^{-1} rhs
    minv_rhs = m.solve(rhs)
    # V^T M^{-1} rhs
-    vt_minv_rhs = math_ops.batch_matmul(v, minv_rhs, adj_x=True)
+    vt_minv_rhs = math_ops.matmul(v, minv_rhs, adjoint_a=True)
    # C^{-1} V^T M^{-1} rhs
    cinv_vt_minv_rhs = linalg_ops.cholesky_solve(cchol, vt_minv_rhs)
    # V C^{-1} V^T M^{-1} rhs
-    v_cinv_vt_minv_rhs = math_ops.batch_matmul(v, cinv_vt_minv_rhs)
+    v_cinv_vt_minv_rhs = math_ops.matmul(v, cinv_vt_minv_rhs)
    # M^{-1} V C^{-1} V^T M^{-1} rhs
    minv_v_cinv_vt_minv_rhs = m.solve(v_cinv_vt_minv_rhs)

@ -457,7 +454,7 @@ class OperatorPDSqrtVDVTUpdate(operator_pd.OperatorPDBase):

  def _to_dense(self):
    sqrt = self.sqrt_to_dense()
-    return math_ops.batch_matmul(sqrt, sqrt, adj_y=True)
+    return math_ops.matmul(sqrt, sqrt, adjoint_b=True)

  def _sqrt_to_dense(self):
    v = self._v
@ -467,6 +464,6 @@ class OperatorPDSqrtVDVTUpdate(operator_pd.OperatorPDBase):
    d_vt = d.matmul(v, transpose_x=True)
    # Batch op won't be efficient for singletons.  Currently we don't break
    # to_dense into batch/singleton methods.
-    v_d_vt = math_ops.batch_matmul(v, d_vt)
+    v_d_vt = math_ops.matmul(v, d_vt)
    m_plus_v_d_vt = m.to_dense() + v_d_vt
    return m_plus_v_d_vt
--- a/tensorflow/contrib/distributions/python/ops/operator_test_util.py
+++ b/tensorflow/contrib/distributions/python/ops/operator_test_util.py
@ -87,9 +87,7 @@ class OperatorPDDerivedClassTest(tf.test.TestCase):
          self.assertEqual(mat.shape, sqrt.get_shape())
          # Square roots are not unique, but SS^T should equal mat.  In this
          # case however, we should have S = S^T.
-          self._compare_results(
-              expected=mat,
-              actual=tf.batch_matmul(sqrt, sqrt))
+          self._compare_results(expected=mat, actual=tf.matmul(sqrt, sqrt))

  def testDeterminants(self):
    with self.test_session():
@ -111,8 +109,7 @@ class OperatorPDDerivedClassTest(tf.test.TestCase):
          x = self._rng.randn(*(batch_shape + (k, 5)))

          self._compare_results(
-              expected=tf.batch_matmul(mat, x).eval(),
-              actual=operator.matmul(x))
+              expected=tf.matmul(mat, x).eval(), actual=operator.matmul(x))

  def testSqrtMatmul(self):
    # Square roots are not unique, but we should have SS^T x = Ax, and in our
@ -126,7 +123,7 @@ class OperatorPDDerivedClassTest(tf.test.TestCase):
          x = self._rng.randn(*(batch_shape + (k, 5)))

          self._compare_results(
-              expected=tf.batch_matmul(mat, x).eval(),
+              expected=tf.matmul(mat, x).eval(),
              actual=operator.sqrt_matmul(operator.sqrt_matmul(x)))

  def testSolve(self):
--- a/tensorflow/contrib/distributions/python/ops/wishart.py
+++ b/tensorflow/contrib/distributions/python/ops/wishart.py
@ -245,7 +245,7 @@ class _WishartOperatorPD(distribution.Distribution):

    if not self.cholesky_input_output_matrices:
      # Complexity: O(nbk^3)
-      x = math_ops.batch_matmul(x, x, adj_y=True)
+      x = math_ops.matmul(x, x, adjoint_b=True)

    return x

@ -353,7 +353,7 @@ class _WishartOperatorPD(distribution.Distribution):
  def _variance(self):
    x = math_ops.sqrt(self.df) * self.scale_operator_pd.to_dense()
    d = array_ops.expand_dims(array_ops.matrix_diag_part(x), -1)
-    v = math_ops.square(x) + math_ops.batch_matmul(d, d, adj_y=True)
+    v = math_ops.square(x) + math_ops.matmul(d, d, adjoint_b=True)
    if self.cholesky_input_output_matrices:
      return linalg_ops.cholesky(v)
    return v