Merge pull request #20611 from jonathanwyatt16:matrix_square_root

PiperOrigin-RevId: 218197028
2018-10-22 11:09:36 -07:00 · 2018-10-22 11:09:36 -07:00 · 3d715da989
commit 3d715da989
parent 78c5cba04a d0f92dfff7
15 changed files with 393 additions and 0 deletions
--- a/tensorflow/core/api_def/base_api/api_def_MatrixSquareRoot.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_MatrixSquareRoot.pbtxt
@ -0,0 +1,37 @@
+op {
+  graph_op_name: "MatrixSquareRoot"
+  in_arg {
+    name: "input"
+    description: <<END
+Shape is `[..., M, M]`.
+END
+  }
+  out_arg {
+    name: "output"
+    description: <<END
+Shape is `[..., M, M]`.
+
+@compatibility(scipy)
+Equivalent to scipy.linalg.sqrtm
+@end_compatibility
+END
+  }
+  summary: "Computes the matrix square root of one or more square matrices:"
+  description: <<END
+matmul(sqrtm(A), sqrtm(A)) = A
+
+The input matrix should be invertible. If the input matrix is real, it should
+have no eigenvalues which are real and negative (pairs of complex conjugate
+eigenvalues are allowed).
+
+The matrix square root is computed by first reducing the matrix to 
+quasi-triangular form with the real Schur decomposition. The square root 
+of the quasi-triangular matrix is then computed directly. Details of 
+the algorithm can be found in: Nicholas J. Higham, "Computing real 
+square roots of a real matrix", Linear Algebra Appl., 1987.
+
+The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
+form square matrices. The output is a tensor of the same shape as the input
+containing the matrix square root for all input submatrices `[..., :, :]`.
+END
+}
--- a/tensorflow/core/api_def/python_api/api_def_MatrixSquareRoot.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_MatrixSquareRoot.pbtxt
@ -0,0 +1,9 @@
+op {
+  graph_op_name: "MatrixSquareRoot"
+  endpoint {
+    name: "linalg.sqrtm"
+  }
+  endpoint {
+    name: "matrix_square_root"
+  }
+}
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@ -2629,6 +2629,7 @@ cc_library(
        ":matrix_logarithm_op",
        ":matrix_solve_ls_op",
        ":matrix_solve_op",
+        ":matrix_square_root_op",
        ":matrix_triangular_solve_op",
        ":qr_op",
        ":self_adjoint_eig_op",
@ -2738,6 +2739,12 @@ tf_kernel_library(
    deps = LINALG_DEPS,
 )

+tf_kernel_library(
+    name = "matrix_square_root_op",
+    prefix = "matrix_square_root_op",
+    deps = LINALG_DEPS,
+)
+
 tf_kernel_library(
    name = "matrix_triangular_solve_op",
    prefix = "matrix_triangular_solve_op",
--- a/tensorflow/core/kernels/matrix_square_root_op.cc
+++ b/tensorflow/core/kernels/matrix_square_root_op.cc
@ -0,0 +1,58 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See docs in ../ops/linalg_ops.cc.
+
+#include "third_party/eigen3/Eigen/Core"
+#include "third_party/eigen3/unsupported/Eigen/MatrixFunctions"
+#include "tensorflow/core/framework/kernel_def_builder.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/kernels/linalg_ops_common.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+
+template <class Scalar>
+class MatrixSquareRootOp : public LinearAlgebraOp<Scalar> {
+ public:
+  INHERIT_LINALG_TYPEDEFS(Scalar);
+
+  explicit MatrixSquareRootOp(OpKernelConstruction* context) : Base(context) {}
+
+  void ComputeMatrix(OpKernelContext* context, const ConstMatrixMaps& inputs,
+                     MatrixMaps* outputs) final {
+    const ConstMatrixMap& input = inputs[0];
+    if (input.rows() == 0) return;
+    using Matrix =
+        Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
+    Matrix tmp = input;
+    outputs->at(0) = tmp.sqrt();
+  }
+
+ private:
+  TF_DISALLOW_COPY_AND_ASSIGN(MatrixSquareRootOp);
+};
+
+REGISTER_LINALG_OP("MatrixSquareRoot", (MatrixSquareRootOp<float>), float);
+REGISTER_LINALG_OP("MatrixSquareRoot", (MatrixSquareRootOp<double>), double);
+REGISTER_LINALG_OP("MatrixSquareRoot", (MatrixSquareRootOp<complex64>),
+                   complex64);
+REGISTER_LINALG_OP("MatrixSquareRoot", (MatrixSquareRootOp<complex128>),
+                   complex128);
+}  // namespace tensorflow
--- a/tensorflow/core/ops/linalg_ops.cc
+++ b/tensorflow/core/ops/linalg_ops.cc
@ -323,6 +323,12 @@ REGISTER_OP("MatrixSolveLs")
      return MatrixSolveShapeFn(c, false /* square */);
    });

+REGISTER_OP("MatrixSquareRoot")
+    .Input("input: T")
+    .Output("output: T")
+    .Attr("T: {double, float, complex64, complex128}")
+    .SetShapeFn(BatchUnchangedSquareShapeFn);
+
 REGISTER_OP("Qr")
    .Input("input: T")
    .Output("q: T")
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@ -16084,6 +16084,29 @@ op {
    }
  }
 }
+op {
+  name: "MatrixSquareRoot"
+  input_arg {
+    name: "matrix"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
 op {
  name: "MatrixTriangularSolve"
  input_arg {
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@ -16660,6 +16660,46 @@ func MatrixSolveLs(scope *Scope, matrix tf.Output, rhs tf.Output, l2_regularizer
 	return op.Output(0)
 }

+// Computes the matrix square root of one or more square matrices:
+//
+// matmul(sqrtm(A), sqrtm(A)) = A
+//
+// The input matrix should be invertible. If the input matrix is real,
+// it should have no eigenvalues which are real and negative
+// (pairs of complex conjugate eigenvalues are allowed).
+//
+// The matrix square root is computed by first reducing the matrix to
+// quasi-triangular form with the real Schur decomposition. The square root
+// of the quasi-triangular matrix is then computed directly. Details of
+// the algorithm can be found in: Nicholas J. Higham, "Computing real
+// square roots of a real matrix", Linear Algebra Appl., 1987.
+//
+// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
+// form square matrices. The output is a tensor of the same shape as the input
+// containing the matrix square root for all input submatrices `[..., :, :]`.
+//
+// Arguments:
+//	input: Shape is `[..., M, M]`.
+//
+// Returns Shape is `[..., M, M]`.
+//
+// @compatibility(scipy)
+// Equivalent to scipy.linalg.sqrtm
+// @end_compatibility
+func MatrixSquareRoot(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "MatrixSquareRoot",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // MaxPool3DAttr is an optional argument to MaxPool3D.
 type MaxPool3DAttr func(optionalAttr)

--- a/tensorflow/python/kernel_tests/linalg_grad_test.py
+++ b/tensorflow/python/kernel_tests/linalg_grad_test.py
@ -66,6 +66,10 @@ def _GetMatrixUnaryFunctorGradientTest(functor_, dtype_, shape_, **kwargs_):
          low=-1.0, high=1.0,
          size=np.prod(shape_)).reshape(shape_).astype(dtype_)
      a = constant_op.constant(a_np)
+      if functor_.__name__ == 'matrix_square_root':
+        # Square the input matrix to ensure that its matrix square root exists
+        a = math_ops.matmul(a, a)
+        a_np = a.eval()
      b = functor_(a, **kwargs_)

      # Optimal stepsize for central difference is O(epsilon^{1/3}).
@ -189,6 +193,17 @@ if __name__ == '__main__':
                lambda x: linalg_ops.log_matrix_determinant(x)[1],
                dtype, shape))

+        # The numerical Jacobian is consistently invalid for these four shapes
+        # because the matrix square root of the perturbed input doesn't exist
+        if shape in {(2, 5, 5), (3, 5, 5), (3, 10, 10), (3, 2, 5, 5)}:
+          # Alternative shape that consistently produces a valid numerical Jacobian
+          shape = extra + (size + 1, size + 1)
+          name = '%s_%s' % (dtype.__name__, '_'.join(map(str, shape)))
+        _AddTest(
+            MatrixUnaryFunctorGradientTest, 'MatrixSquareRootGradient', name,
+            _GetMatrixUnaryFunctorGradientTest(linalg_ops.matrix_square_root,
+                                               dtype, shape))
+
  # Tests for gradients of matrix_solve_ls
  for dtype in np.float32, np.float64:
    for rows in 2, 5, 10:
--- a/tensorflow/python/kernel_tests/matrix_square_root_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_square_root_op_test.py
@ -0,0 +1,116 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for tensorflow.ops.math_ops.matrix_square_root."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.ops import gen_linalg_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.platform import test
+
+
+class SquareRootOpTest(test.TestCase):
+
+  def _verifySquareRoot(self, matrix, np_type):
+    matrix = matrix.astype(np_type)
+    with self.test_session(use_gpu=True):
+      # Verify that matmul(sqrtm(A), sqrtm(A)) = A
+      sqrt = gen_linalg_ops.matrix_square_root(matrix)
+      square = math_ops.matmul(sqrt, sqrt)
+      self.assertShapeEqual(matrix, square)
+      self.assertAllClose(matrix, square, rtol=1e-4, atol=1e-3)
+
+  def _verifySquareRootReal(self, x):
+    for np_type in [np.float32, np.float64]:
+      self._verifySquareRoot(x, np_type)
+
+  def _verifySquareRootComplex(self, x):
+    for np_type in [np.complex64, np.complex128]:
+      self._verifySquareRoot(x, np_type)
+
+  def _makeBatch(self, matrix1, matrix2):
+    matrix_batch = np.concatenate(
+        [np.expand_dims(matrix1, 0),
+         np.expand_dims(matrix2, 0)])
+    matrix_batch = np.tile(matrix_batch, [2, 3, 1, 1])
+    return matrix_batch
+
+  def _testMatrices(self, matrix1, matrix2):
+    # Real
+    self._verifySquareRootReal(matrix1)
+    self._verifySquareRootReal(matrix2)
+    self._verifySquareRootReal(self._makeBatch(matrix1, matrix2))
+    # Complex
+    matrix1 = matrix1.astype(np.complex64)
+    matrix2 = matrix2.astype(np.complex64)
+    matrix1 += 1j * matrix1
+    matrix2 += 1j * matrix2
+    self._verifySquareRootComplex(matrix1)
+    self._verifySquareRootComplex(matrix2)
+    self._verifySquareRootComplex(self._makeBatch(matrix1, matrix2))
+
+  def testSymmetricPositiveDefinite(self):
+    matrix1 = np.array([[2., 1.], [1., 2.]])
+    matrix2 = np.array([[3., -1.], [-1., 3.]])
+    self._testMatrices(matrix1, matrix2)
+
+  def testAsymmetric(self):
+    matrix1 = np.array([[0., 4.], [-1., 5.]])
+    matrix2 = np.array([[33., 24.], [48., 57.]])
+    self._testMatrices(matrix1, matrix2)
+
+  def testIdentityMatrix(self):
+    # 2x2
+    identity = np.array([[1., 0], [0, 1.]])
+    self._verifySquareRootReal(identity)
+    # 3x3
+    identity = np.array([[1., 0, 0], [0, 1., 0], [0, 0, 1.]])
+    self._verifySquareRootReal(identity)
+
+  def testEmpty(self):
+    self._verifySquareRootReal(np.empty([0, 2, 2]))
+    self._verifySquareRootReal(np.empty([2, 0, 0]))
+
+  def testWrongDimensions(self):
+    # The input to the square root should be at least a 2-dimensional tensor.
+    tensor = constant_op.constant([1., 2.])
+    with self.assertRaises(ValueError):
+      gen_linalg_ops.matrix_square_root(tensor)
+
+  def testNotSquare(self):
+    with self.test_session():
+      with self.assertRaises(ValueError):
+        tensor = constant_op.constant([[1., 0., -1.], [-1., 1., 0.]])
+        gen_linalg_ops.matrix_square_root(tensor).eval()
+
+  def testConcurrentExecutesWithoutError(self):
+    with self.test_session(use_gpu=True) as sess:
+      matrix1 = random_ops.random_normal([5, 5], seed=42)
+      matrix2 = random_ops.random_normal([5, 5], seed=42)
+      sqrt1 = gen_linalg_ops.matrix_square_root(matrix1)
+      sqrt2 = gen_linalg_ops.matrix_square_root(matrix2)
+      all_ops = [sqrt1, sqrt2]
+      sqrt = sess.run(all_ops)
+      self.assertAllEqual(sqrt[0], sqrt[1])
+
+
+if __name__ == "__main__":
+  test.main()
--- a/tensorflow/python/ops/linalg/linalg_impl.py
+++ b/tensorflow/python/ops/linalg/linalg_impl.py
@ -50,6 +50,7 @@ norm = linalg_ops.norm
 qr = linalg_ops.qr
 set_diag = array_ops.matrix_set_diag
 solve = linalg_ops.matrix_solve
+sqrtm = linalg_ops.matrix_square_root
 svd = linalg_ops.svd
 tensordot = math_ops.tensordot
 trace = math_ops.trace
--- a/tensorflow/python/ops/linalg_grad.py
+++ b/tensorflow/python/ops/linalg_grad.py
@ -55,6 +55,71 @@ def _MatrixDeterminantGrad(op, grad):
  return multipliers * a_adj_inv


+@ops.RegisterGradient("MatrixSquareRoot")
+def _MatrixSquareRootGrad(op, grad):
+  """Gradient for MatrixSquareRoot."""
+
+  # Let A be an m x m square matrix (or batch of matrices)
+  # Let R = sqrtm(A)
+  # By definition, A = RR
+  # Take the differential: dA = d(RR) = RdR + dRR
+  # Solve the resulting Sylvester equation for dR
+
+  # Used to find Kronecker products within the Sylvester equation
+  def _KroneckerProduct(b1, b2):
+    """Computes the Kronecker product of two batches of square matrices"""
+    b1_shape = array_ops.shape(b1)
+    b2_shape = array_ops.shape(b2)
+    b1_order = b1_shape[-1]
+    b2_order = b2_shape[-1]
+
+    shape_slice_size = [math_ops.subtract(array_ops.size(b1_shape), 2)]
+    shape_slice = array_ops.slice(b1_shape, [0],
+                                  shape_slice_size)  # Same for both batches
+    b1_reshape_shape = array_ops.concat(
+        [shape_slice, [b1_order], [1], [b1_order], [1]], 0)
+    b2_reshape_shape = array_ops.concat(
+        [shape_slice, [1], [b2_order], [1], [b2_order]], 0)
+
+    b1_reshape = array_ops.reshape(b1, b1_reshape_shape)
+    b2_reshape = array_ops.reshape(b2, b2_reshape_shape)
+
+    order_prod = b1_order * b2_order
+    kprod_shape = array_ops.concat([shape_slice, [order_prod], [order_prod]], 0)
+    return array_ops.reshape(b1_reshape * b2_reshape, kprod_shape)
+
+  sqrtm = op.outputs[0]  # R
+  shape = array_ops.shape(sqrtm)
+  order = shape[-1]  # m
+  matrix_count = math_ops.reduce_prod(shape[0:-2])
+
+  # Get batch of m x m identity matrices
+  eye = linalg_ops.eye(order, dtype=sqrtm.dtype)  # m x m identity matrix
+  eye_flat = array_ops.reshape(eye, [-1])
+  eye_tiled = array_ops.tile(eye_flat, [matrix_count])
+  eye_batch = array_ops.reshape(eye_tiled, shape)
+
+  # The transpose of R is taken in the k1 term instead of k2 in
+  # order to prevent redundant transposition of R (i.e. (R')' = R)
+  sqrtm_transpose = array_ops.matrix_transpose(sqrtm)
+  k1 = _KroneckerProduct(eye_batch, sqrtm_transpose)
+  k2 = _KroneckerProduct(sqrtm, eye_batch)
+  ksum = math_ops.add(k1, k2)
+
+  # Vectorize dA
+  shape_slice_size = [math_ops.subtract(array_ops.size(shape), 2)]
+  shape_slice = array_ops.slice(shape, [0], shape_slice_size)
+  shape_vec_da = array_ops.concat([shape_slice, [order * order], [1]], 0)
+  vec_da = array_ops.reshape(array_ops.matrix_transpose(grad), shape_vec_da)
+
+  # Solve for vec(dR)
+  vec_dsqrtm = linalg_ops.matrix_solve(ksum, vec_da)
+
+  # Solve for dR by inverse vectorizing vec(dR)
+  dsqrtm_transpose = array_ops.reshape(vec_dsqrtm, shape)
+  return array_ops.matrix_transpose(dsqrtm_transpose)
+
+
@ops.RegisterGradient("LogMatrixDeterminant")
 def _LogMatrixDeterminantGrad(op, _, grad_b):
  """Gradient for LogMatrixDeterminant."""
--- a/tensorflow/tools/api/golden/v1/tensorflow.linalg.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.linalg.pbtxt
@ -156,6 +156,10 @@ tf_module {
    name: "solve"
    argspec: "args=[\'matrix\', \'rhs\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
  }
+  member_method {
+    name: "sqrtm"
+    argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
  member_method {
    name: "svd"
    argspec: "args=[\'tensor\', \'full_matrices\', \'compute_uv\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'True\', \'None\'], "
--- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
@ -1504,6 +1504,10 @@ tf_module {
    name: "matrix_solve_ls"
    argspec: "args=[\'matrix\', \'rhs\', \'l2_regularizer\', \'fast\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'True\', \'None\'], "
  }
+  member_method {
+    name: "matrix_square_root"
+    argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
  member_method {
    name: "matrix_transpose"
    argspec: "args=[\'a\', \'name\', \'conjugate\'], varargs=None, keywords=None, defaults=[\'matrix_transpose\', \'False\'], "
--- a/tensorflow/tools/api/golden/v2/tensorflow.linalg.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.linalg.pbtxt
@ -156,6 +156,10 @@ tf_module {
    name: "solve"
    argspec: "args=[\'matrix\', \'rhs\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
  }
+  member_method {
+    name: "sqrtm"
+    argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
  member_method {
    name: "svd"
    argspec: "args=[\'tensor\', \'full_matrices\', \'compute_uv\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'True\', \'None\'], "
--- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
@ -1120,6 +1120,10 @@ tf_module {
    name: "matrix_solve"
    argspec: "args=[\'matrix\', \'rhs\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
  }
+  member_method {
+    name: "matrix_square_root"
+    argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
  member_method {
    name: "matrix_triangular_solve"
    argspec: "args=[\'matrix\', \'rhs\', \'lower\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'False\', \'None\'], "