[TF:XLA] Re-implement MatrixSolve in terms of QR decomposition.

This implementation is effectively just a MatrixInverse followed by a batched
dot, so all of the same disclaimers apply as for MatrixInverse (QR decomposition
likely to be slower than LU, doesn't support complex numbers). This is a
short-term solution to avoid having to use workarounds for tf.linalg.solve on
TPU. Performance can be improved in future (e.g. when LU is implemented).

This functionality was added previously but had to be rolled-back due to slow
tests. This version has faster tests.

PiperOrigin-RevId: 277120825
Change-Id: I0a1e274738d9a9393b061f6b793180e1f20dacf7
This commit is contained in:
A. Unique TensorFlower 2019-10-28 12:37:34 -07:00 committed by TensorFlower Gardener
parent ad6be7820b
commit 269e4abc11
5 changed files with 171 additions and 0 deletions

View File

@ -300,12 +300,14 @@ bool RecursiveCompilabilityChecker::OpIsInaccurate(const Node& node) const {
bool RecursiveCompilabilityChecker::OpIsSlow(const Node& node) const {
// b/128001705: SelfAdjointEigV2 and Svd performance issues.
// b/135640736: MatrixInverse performance issues.
// b/111271662: MatrixSolve performance issues.
// https://github.com/tensorflow/tensorflow/pull/31012:
// ResizeNearestNeighbor, ResizeBilinear, and ResizeBilinearGrad sometimes
// create convolutions too large for CuDNN to handle.
return node.type_string() == "SelfAdjointEigV2" ||
node.type_string() == "Svd" || node.type_string() == "Qr" ||
node.type_string() == "MatrixInverse" ||
node.type_string() == "MatrixSolve" ||
node.type_string() == "ResizeNearestNeighbor" ||
node.type_string() == "ResizeBilinear" ||
node.type_string() == "ResizeBilinearGrad";

View File

@ -311,6 +311,20 @@ tf_xla_py_test(
],
)
tf_xla_py_test(
name = "matrix_solve_op_test",
size = "small",
timeout = "moderate",
srcs = ["matrix_solve_op_test.py"],
deps = [
":xla_test",
"//tensorflow/python:linalg_ops",
"//tensorflow/python:platform_test",
"//tensorflow/python:random_ops",
"@absl_py//absl/testing:parameterized",
],
)
tf_xla_py_test(
name = "matrix_triangular_solve_op_test",
size = "small",

View File

@ -0,0 +1,78 @@
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for XLA implementation of tf.linalg.solve."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from absl.testing import parameterized
import numpy as np
from tensorflow.compiler.tests import xla_test
from tensorflow.python.ops import linalg_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.platform import googletest
class MatrixSolveOpTest(xla_test.XLATestCase, parameterized.TestCase):
def _verifySolve(self, x, y, adjoint):
for np_type in self.float_types & {np.float32, np.float64}:
tol = 1e-4 if np_type == np.float32 else 1e-12
a = x.astype(np_type)
b = y.astype(np_type)
np_ans = np.linalg.solve(np.swapaxes(a, -2, -1) if adjoint else a, b)
with self.session() as sess:
with self.test_scope():
tf_ans = linalg_ops.matrix_solve(a, b, adjoint=adjoint)
out = sess.run(tf_ans)
self.assertEqual(tf_ans.shape, out.shape)
self.assertEqual(np_ans.shape, out.shape)
self.assertAllClose(np_ans, out, atol=tol, rtol=tol)
@parameterized.named_parameters(
("Scalar", 1, 1, [], [], False),
("Vector", 5, 1, [], [], False),
("MultipleRHS", 5, 4, [], [], False),
("Adjoint", 5, 4, [], [], True),
("BatchedScalar", 1, 4, [2], [2], False),
("BatchedVector", 5, 4, [2], [2], False),
("BatchedRank2", 5, 4, [7, 4], [7, 4], False),
("BatchedAdjoint", 5, 4, [7, 4], [7, 4], True),
)
def testSolve(self, n, nrhs, batch_dims, rhs_batch_dims, adjoint):
matrix = np.random.normal(-5.0, 5.0, batch_dims + [n, n])
rhs = np.random.normal(-5.0, 5.0, rhs_batch_dims + [n, nrhs])
self._verifySolve(matrix, rhs, adjoint=adjoint)
@parameterized.named_parameters(
("Simple", False),
("Adjoint", True),
)
def testConcurrent(self, adjoint):
with self.session() as sess:
lhs1 = random_ops.random_normal([3, 3], seed=42)
lhs2 = random_ops.random_normal([3, 3], seed=42)
rhs1 = random_ops.random_normal([3, 3], seed=42)
rhs2 = random_ops.random_normal([3, 3], seed=42)
with self.test_scope():
s1 = linalg_ops.matrix_solve(lhs1, rhs1, adjoint=adjoint)
s2 = linalg_ops.matrix_solve(lhs2, rhs2, adjoint=adjoint)
self.assertAllEqual(*sess.run([s1, s2]))
if __name__ == "__main__":
googletest.main()

View File

@ -57,6 +57,7 @@ tf_kernel_library(
"matrix_band_part_op.cc",
"matrix_diag_ops.cc",
"matrix_inverse_op.cc",
"matrix_solve_op.cc",
"matrix_triangular_solve_op.cc",
"mirror_pad_op.cc",
"next_after_op.cc",

View File

@ -0,0 +1,76 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
#include "tensorflow/compiler/xla/client/lib/matrix.h"
#include "tensorflow/compiler/xla/client/lib/qr.h"
#include "tensorflow/compiler/xla/client/xla_builder.h"
#include "tensorflow/compiler/xla/xla_data.pb.h"
namespace tensorflow {
namespace {
class MatrixSolveOp : public XlaOpKernel {
public:
explicit MatrixSolveOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
OP_REQUIRES_OK(ctx, ctx->GetAttr("adjoint", &adjoint_));
}
void Compile(XlaOpKernelContext* ctx) override {
const TensorShape matrix_shape = ctx->InputShape(0);
int64 matrix_ndims = matrix_shape.dims();
OP_REQUIRES(ctx, matrix_ndims >= 2,
errors::InvalidArgument(
"Input matrix must have rank >= 2, got ", matrix_ndims));
OP_REQUIRES(ctx,
matrix_shape.dim_size(matrix_ndims - 2) ==
matrix_shape.dim_size(matrix_ndims - 1),
errors::InvalidArgument(
"Input matrices must be square, got",
matrix_shape.dim_size(matrix_ndims - 2),
" != ", matrix_shape.dim_size(matrix_ndims - 1)));
xla::XlaOp matrix = ctx->Input(0);
xla::XlaOp rhs = ctx->Input(1);
// TODO(b/111271662): Using LU decomposition instead of QR should be faster.
auto qr = xla::QRDecomposition(matrix, /*full_matrices=*/false);
OP_REQUIRES_OK(ctx, qr.status());
xla::XlaOp inv = xla::TriangularSolve(
qr.ValueOrDie().r, xla::TransposeInMinorDims(qr.ValueOrDie().q),
/*left_side=*/true,
/*lower=*/false, /*unit_diagonal=*/false,
/*transpose_a=*/
xla::TriangularSolveOptions::NO_TRANSPOSE);
xla::XlaOp output =
xla::BatchDot(inv, adjoint_, rhs,
/*transpose_y=*/false, xla::PrecisionConfig::HIGHEST);
ctx->SetOutput(0, output);
}
private:
bool adjoint_;
TF_DISALLOW_COPY_AND_ASSIGN(MatrixSolveOp);
};
// TODO(b/111271662): Support integer and complex types.
REGISTER_XLA_OP(Name("MatrixSolve").TypeConstraint("T", kFloatTypes),
MatrixSolveOp);
} // namespace
} // namespace tensorflow