From 9baeb353e1d22ec2154401249c2d3170fd9cd153 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 5 Mar 2019 15:49:07 -0800 Subject: [PATCH] [TF:XLA] Implement XlaSvdOp PiperOrigin-RevId: 236936547 --- tensorflow/compiler/tests/BUILD | 21 +++- tensorflow/compiler/tests/svd_op_test.py | 81 ++++++++++++++++ tensorflow/compiler/tf2xla/kernels/BUILD | 3 + .../compiler/tf2xla/kernels/xla_conv_op.cc | 7 +- .../compiler/tf2xla/kernels/xla_svd_op.cc | 95 +++++++++++++++++++ tensorflow/compiler/tf2xla/ops/xla_ops.cc | 34 +++++++ tensorflow/compiler/tf2xla/python/xla.py | 7 ++ tensorflow/compiler/xla/client/lib/svd.cc | 15 ++- tensorflow/python/kernel_tests/BUILD | 6 +- .../python/kernel_tests/norm_op_test.py | 2 +- 10 files changed, 249 insertions(+), 22 deletions(-) create mode 100644 tensorflow/compiler/tests/svd_op_test.py create mode 100644 tensorflow/compiler/tf2xla/kernels/xla_svd_op.cc diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index 65f5fba269c..21d5df0d987 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -247,12 +247,23 @@ tf_xla_py_test( name = "self_adjoint_eig_op_test", size = "medium", srcs = ["self_adjoint_eig_op_test.py"], - # TODO(kuny): remove it after b/124377352 is fixed. - disabled_backends = [ - "cpu", - "gpu", - "cpu_ondemand", + tags = ["optonly"], + deps = [ + ":xla_test", + "//tensorflow/python:array_ops", + "//tensorflow/python:framework", + "//tensorflow/python:map_fn", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform_test", + "//tensorflow/python:training", + "@absl_py//absl/testing:parameterized", ], +) + +tf_xla_py_test( + name = "svd_op_test", + size = "medium", + srcs = ["svd_op_test.py"], tags = ["optonly"], deps = [ ":xla_test", diff --git a/tensorflow/compiler/tests/svd_op_test.py b/tensorflow/compiler/tests/svd_op_test.py new file mode 100644 index 00000000000..73bec949f3d --- /dev/null +++ b/tensorflow/compiler/tests/svd_op_test.py @@ -0,0 +1,81 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for tensorflow.ops.svd.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import itertools +from absl.testing import parameterized +import numpy as np + +from tensorflow.compiler.tests import xla_test +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import linalg_ops +from tensorflow.python.platform import test + + +class SvdOpTest(xla_test.XLATestCase, parameterized.TestCase): + + def _compute_usvt(self, s, u, v): + m = u.shape[-1] + n = v.shape[-1] + if m <= n: + v = v[..., :m] + else: + u = u[..., :n] + + return np.matmul(u * s[..., None, :], np.swapaxes(v, -1, -2)) + + def _testSvdCorrectness(self, dtype, shape): + np.random.seed(1) + x_np = np.random.uniform(low=-1.0, high=1.0, size=shape).astype(dtype) + m, n = shape[-2], shape[-1] + _, s_np, _ = np.linalg.svd(x_np) + with self.cached_session() as sess: + x_tf = array_ops.placeholder(dtype) + with self.test_scope(): + s, u, v = linalg_ops.svd(x_tf, full_matrices=True) + s_val, u_val, v_val = sess.run([s, u, v], feed_dict={x_tf: x_np}) + u_diff = np.matmul(u_val, np.swapaxes(u_val, -1, -2)) - np.eye(m) + v_diff = np.matmul(v_val, np.swapaxes(v_val, -1, -2)) - np.eye(n) + # Check u_val and v_val are orthogonal matrices. + self.assertLess(np.linalg.norm(u_diff), 1e-2) + self.assertLess(np.linalg.norm(v_diff), 1e-2) + # Check that the singular values are correct, i.e., close to the ones from + # numpy.lingal.svd. + self.assertLess(np.linalg.norm(s_val - s_np), 1e-2) + # The tolerance is set based on our tests on numpy's svd. As our tests + # have batch dimensions and all our operations are on float32, we set the + # tolerance a bit larger. Numpy's svd calls LAPACK's svd, which operates + # on double precision. + self.assertLess( + np.linalg.norm(self._compute_usvt(s_val, u_val, v_val) - x_np), 2e-2) + + SIZES = [1, 2, 5, 10, 32, 64] + DTYPES = [np.float32] + PARAMS = itertools.product(SIZES, DTYPES) + + @parameterized.parameters(*PARAMS) + def testSvd(self, n, dtype): + for batch_dims in [(), (3,)] + [(3, 2)] * (n < 10): + self._testSvdCorrectness(dtype, batch_dims + (n, n)) + self._testSvdCorrectness(dtype, batch_dims + (2 * n, n)) + self._testSvdCorrectness(dtype, batch_dims + (n, 2 * n)) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD index 38b460f00d5..9375d2e93ce 100644 --- a/tensorflow/compiler/tf2xla/kernels/BUILD +++ b/tensorflow/compiler/tf2xla/kernels/BUILD @@ -110,6 +110,7 @@ tf_kernel_library( "xla_reduce_op.cc", "xla_select_and_scatter_op.cc", "xla_self_adjoint_eig_op.cc", + "xla_svd_op.cc", ], hdrs = [ "index_ops.h", @@ -149,7 +150,9 @@ tf_kernel_library( "//tensorflow/compiler/xla/client/lib:qr", "//tensorflow/compiler/xla/client/lib:quantize", "//tensorflow/compiler/xla/client/lib:self_adjoint_eig", + "//tensorflow/compiler/xla/client/lib:slicing", "//tensorflow/compiler/xla/client/lib:sorting", + "//tensorflow/compiler/xla/client/lib:svd", "//tensorflow/core:bitwise_ops_op_lib", "//tensorflow/core:control_flow_ops_op_lib", "//tensorflow/core:data_flow_ops_op_lib", diff --git a/tensorflow/compiler/tf2xla/kernels/xla_conv_op.cc b/tensorflow/compiler/tf2xla/kernels/xla_conv_op.cc index b20adc592a0..0b5b66ae52f 100644 --- a/tensorflow/compiler/tf2xla/kernels/xla_conv_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/xla_conv_op.cc @@ -35,10 +35,9 @@ class XlaConvOp : public XlaOpKernel { string precision_config_attr; OP_REQUIRES_OK( context, context->GetAttr("precision_config", &precision_config_attr)); - OP_REQUIRES( - context, - precision_config_.ParsePartialFromString(precision_config_attr), - errors::InvalidArgument("Error parsing convolution dimension numbers")); + OP_REQUIRES(context, + precision_config_.ParsePartialFromString(precision_config_attr), + errors::InvalidArgument("Error parsing precison config.")); } void Compile(XlaOpKernelContext* context) override { diff --git a/tensorflow/compiler/tf2xla/kernels/xla_svd_op.cc b/tensorflow/compiler/tf2xla/kernels/xla_svd_op.cc new file mode 100644 index 00000000000..de627b8d1b3 --- /dev/null +++ b/tensorflow/compiler/tf2xla/kernels/xla_svd_op.cc @@ -0,0 +1,95 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" +#include "tensorflow/compiler/tf2xla/xla_op_registry.h" +#include "tensorflow/compiler/xla/client/lib/constants.h" +#include "tensorflow/compiler/xla/client/lib/slicing.h" +#include "tensorflow/compiler/xla/client/lib/svd.h" + +namespace tensorflow { +namespace { + +class XlaSvdOp : public XlaOpKernel { + public: + explicit XlaSvdOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("max_iter", &max_iter_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("epsilon", &epsilon_)); + string precision_config_attr; + OP_REQUIRES_OK(ctx, + ctx->GetAttr("precision_config", &precision_config_attr)); + OP_REQUIRES(ctx, + precision_config_.ParsePartialFromString(precision_config_attr), + errors::InvalidArgument("Error parsing precison config.")); + if (precision_config_.operand_precision_size() == 0) { + precision_config_.mutable_operand_precision()->Add( + xla::PrecisionConfig::HIGHEST); + } + } + void Compile(XlaOpKernelContext* ctx) override { + auto result = xla::SVD(ctx->Input(0), max_iter_, epsilon_, + precision_config_.operand_precision(0)); + ctx->SetOutput(0, result.d); + ctx->SetOutput(1, result.u); + ctx->SetOutput(2, result.v); + } + + private: + int32 max_iter_; + float epsilon_; + xla::PrecisionConfig precision_config_; +}; + +class SvdOp : public XlaOpKernel { + public: + explicit SvdOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("compute_uv", &compute_uv_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("full_matrices", &full_matrices_)); + } + void Compile(XlaOpKernelContext* ctx) override { + const TensorShape input_shape = ctx->InputShape("input"); + int m = input_shape.dim_size(input_shape.dims() - 2); + int n = input_shape.dim_size(input_shape.dims() - 1); + // This is based on heuristics that approx log(n) sweep updates are needed. + // Note: the heuristics provides no theoretical guarantee, max_iter=100 and + // epsilon should be used to determine exit condition. + int max_iter = 2 * tensorflow::Log2Ceiling(std::max(m, n)); + auto result = xla::SVD(ctx->Input(0), max_iter, 1e-6); + ctx->SetOutput(0, result.d); + if (compute_uv_) { + int p = std::min(m, n); + if (!full_matrices_) { + if (p < m) { + result.u = xla::SliceInMinorDims(result.u, {0, 0}, {m, p}); + } + if (p < n) { + result.v = xla::SliceInMinorDims(result.v, {0, 0}, {n, p}); + } + } + ctx->SetOutput(1, result.u); + ctx->SetOutput(2, result.v); + } + } + + private: + bool compute_uv_; + bool full_matrices_; +}; + +REGISTER_XLA_OP(Name("XlaSvd").TypeConstraint("T", kFloatTypes), XlaSvdOp); +REGISTER_XLA_OP(Name("Svd").TypeConstraint("T", kFloatTypes), SvdOp); + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/ops/xla_ops.cc b/tensorflow/compiler/tf2xla/ops/xla_ops.cc index ccd58071d35..e5f0969f378 100644 --- a/tensorflow/compiler/tf2xla/ops/xla_ops.cc +++ b/tensorflow/compiler/tf2xla/ops/xla_ops.cc @@ -91,6 +91,40 @@ v: The column v[..., :, i] is the normalized eigenvector corresponding to the eigenvalue w[..., i]. )doc"); +REGISTER_OP("XlaSvd") + .Input("a: T") + .Attr("max_iter: int") + .Attr("epsilon: float") + .Attr("precision_config: string") + .Output("s: T") + .Output("u: T") + .Output("v: T") + .SetShapeFn(shape_inference::UnknownShape) + .Attr("T: numbertype") + .Doc(R"doc( +Computes the eigen decomposition of a batch of self-adjoint matrices +(Note: Only real inputs are supported). + +Computes the eigenvalues and eigenvectors of the innermost M-by-N matrices in +tensor such that tensor[...,:,:] = u[..., :, :] * Diag(s[..., :]) * Transpose(v[...,:,:]). + +a: the input tensor. + +max_iter: maximum number of sweep update, i.e., the whole lower triangular + part or upper triangular part based on parameter lower. Heuristically, it has + been argued that approximatly log(min (M, N)) sweeps are needed in practice + (Ref: Golub & van Loan "Matrix Computation"). + +epsilon: the tolerance ratio. + +precision_config: a serialized xla::PrecisionConfig proto. + +s: Singular values. The values are sorted in reverse order of magnitude, so + s[..., 0] is the largest value, s[..., 1] is the second largest, etc. +u: Left singular vectors. +v: Right singular vectors. +)doc"); + REGISTER_OP("XlaConv") .Input("lhs: T") .Input("rhs: T") diff --git a/tensorflow/compiler/tf2xla/python/xla.py b/tensorflow/compiler/tf2xla/python/xla.py index de4710d03a3..19c5333ece0 100644 --- a/tensorflow/compiler/tf2xla/python/xla.py +++ b/tensorflow/compiler/tf2xla/python/xla.py @@ -295,6 +295,13 @@ def self_adjoint_eig(a, lower, max_iter, epsilon): return gen_xla_ops.xla_self_adjoint_eig(a, lower, max_iter, epsilon) +def svd(a, max_iter, epsilon, precision_config=None): + precision_config_proto = "" + if precision_config: + precision_config_proto = precision_config.SerializeToString() + return gen_xla_ops.xla_svd(a, max_iter, epsilon, precision_config_proto) + + dynamic_slice = gen_xla_ops.xla_dynamic_slice dynamic_update_slice = gen_xla_ops.xla_dynamic_update_slice diff --git a/tensorflow/compiler/xla/client/lib/svd.cc b/tensorflow/compiler/xla/client/lib/svd.cc index 8dad4cab515..dd4547dbab6 100644 --- a/tensorflow/compiler/xla/client/lib/svd.cc +++ b/tensorflow/compiler/xla/client/lib/svd.cc @@ -750,19 +750,18 @@ StatusOr SortBySingularValuesAndPostProcessing(SVDResult result) { result.v = Mul(result.v, sign, broadcast_dims); d = BroadcastInDim(d, dimensions, broadcast_dims); - auto zero = Zero(builder, S32); // As m >= n, only first m columns vectors are needed to be permuted, and the // rest of m - n vectors are appended after the sorting is done. XlaOp sort_u_result = - Sort({-d, DynamicSliceInMinorDims(result.u, {zero, zero}, {m, n})}, + Sort({-d, SliceInMinorDims(result.u, {0, 0}, {m, n})}, CreateScalarLtComputation( {shape.element_type(), shape.element_type()}, builder), num_dims - 1); // TODO(kuny): using CreateScalarGtComputation after b/124862300 is fixed. XlaOp sort_v_result = - Sort({DynamicSliceInMinorDims(-d, {zero, zero}, {n, n}), result.v}, + Sort({SliceInMinorDims(-d, {0, 0}, {n, n}), result.v}, CreateScalarLtComputation( {shape.element_type(), shape.element_type()}, builder), num_dims - 1); @@ -779,12 +778,10 @@ StatusOr SortBySingularValuesAndPostProcessing(SVDResult result) { broadcast_dims); // Append the rest of m - n vectors. - result.u = - ConcatInDim(builder, - {GetTupleElement(sort_u_result, 1), - DynamicSliceInMinorDims( - result.u, {zero, ScalarLike(zero, n)}, {m, m - n})}, - num_dims - 1); + result.u = ConcatInDim(builder, + {GetTupleElement(sort_u_result, 1), + SliceInMinorDims(result.u, {0, n}, {m, m})}, + num_dims - 1); result.u = Mul( result.u, Rsqrt(Reduce(Square(result.u), ScalarLike(d, 0.0), diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 650d607e145..b29cf293f57 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -3348,7 +3348,7 @@ cuda_py_test( "no_rocm", # flaky test "no_windows", ], - # TODO(kuny): Add xla_enable_strict_auto_jit = True after b/124377352 is fixed. + # b/127344411: xla_enable_strict_auto_jit = True, ) cuda_py_test( @@ -3385,7 +3385,7 @@ cuda_py_test( "no_oss", # b/117185141. "nomsan", # TODO(b/117236102): Re-enable in msan build. ], - xla_enable_strict_auto_jit = True, + # b/127344411: xla_enable_strict_auto_jit = True, ) cuda_py_test( @@ -3405,7 +3405,7 @@ cuda_py_test( "no_windows_gpu", "nomsan", ], - xla_enable_strict_auto_jit = True, + # b/127344411: xla_enable_strict_auto_jit = True, ) cuda_py_test( diff --git a/tensorflow/python/kernel_tests/norm_op_test.py b/tensorflow/python/kernel_tests/norm_op_test.py index 20b9ad95c8b..bfea2134454 100644 --- a/tensorflow/python/kernel_tests/norm_op_test.py +++ b/tensorflow/python/kernel_tests/norm_op_test.py @@ -89,7 +89,7 @@ def _GetNormOpTest(dtype_, shape_, ord_, axis_, keep_dims_, use_static_shape_): if ((not is_matrix_norm and ord_ == "fro") or (is_matrix_norm and is_fancy_p_norm)): self.skipTest("Not supported by neither numpy.linalg.norm nor tf.norm") - if ord_ == 'euclidean' or (axis_ is None and len(shape) > 2): + if ord_ == "euclidean" or (axis_ is None and len(shape) > 2): self.skipTest("Not supported by numpy.linalg.norm") matrix = np.random.randn(*shape_).astype(dtype_) if dtype_ in (np.complex64, np.complex128):