From 4e0809914c8879798ee8dad157524d4bb01d7c17 Mon Sep 17 00:00:00 2001 From: Evgenii Zheltonozhskii Date: Tue, 29 Oct 2019 08:31:39 +0200 Subject: [PATCH 01/11] Initial implementation of eigendecomposition gradients --- tensorflow/python/kernel_tests/eig_op_test.py | 83 +++++++++++++++++-- tensorflow/python/ops/linalg_grad.py | 58 +++++++++++-- 2 files changed, 128 insertions(+), 13 deletions(-) diff --git a/tensorflow/python/kernel_tests/eig_op_test.py b/tensorflow/python/kernel_tests/eig_op_test.py index beaf0f574ca..7a202acb237 100644 --- a/tensorflow/python/kernel_tests/eig_op_test.py +++ b/tensorflow/python/kernel_tests/eig_op_test.py @@ -24,6 +24,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes as dtypes_lib from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gradient_checker_v2 from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops @@ -147,17 +148,23 @@ def _GetEigTest(dtype_, shape_, compute_v_): n = shape_[-1] batch_shape = shape_[:-2] np_dtype = dtype_.as_numpy_dtype - # most of matrices are diagonalizable # TODO - a = np.random.uniform( - low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype) - if dtype_.is_complex: - a += 1j * np.random.uniform( + + def RandomInput(): + # most of matrices are diagonalizable # TODO + a = np.random.uniform( low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype) - a = np.tile(a, batch_shape + (1, 1)) + if dtype_.is_complex: + a += 1j * np.random.uniform( + low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype) + a = np.tile(a, batch_shape + (1, 1)) + return a + if dtype_ in (dtypes_lib.float32, dtypes_lib.complex64): atol = 1e-4 else: atol = 1e-12 + + a = RandomInput() np_e, np_v = np.linalg.eig(a) with self.session(use_gpu=True): if compute_v_: @@ -182,6 +189,67 @@ def _GetEigTest(dtype_, shape_, compute_v_): return Test +class EigGradTest(test.TestCase): + pass # Filled in below + + +def _GetEigGradTest(dtype_, shape_, compute_v_): + + def Test(self): + np.random.seed(1) + n = shape_[-1] + batch_shape = shape_[:-2] + np_dtype = dtype_.as_numpy_dtype + + def RandomInput(): + # most of matrices are diagonalizable # TODO + a = np.random.uniform( + low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype) + if dtype_.is_complex: + a += 1j * np.random.uniform( + low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype) + a = np.tile(a, batch_shape + (1, 1)) + return a + + # Optimal stepsize for central difference is O(epsilon^{1/3}). + epsilon = np.finfo(np_dtype).eps + delta = 0.1 * epsilon**(1.0 / 3.0) + # tolerance obtained by looking at actual differences using + # np.linalg.norm(theoretical-numerical, np.inf) on -mavx build + # after discarding one random input sample + _ = RandomInput() + if dtype_ in (dtypes_lib.float32, dtypes_lib.complex64): + tol = 1e-2 + else: + tol = 1e-7 + with self.session(use_gpu=True): + def Compute(x): + e, v = linalg_ops.eig(x) + # (complex) Eigenvectors are only unique up to an arbitrary phase + # We normalize the vectors such that the first component has phase 0. + top_rows = v[..., 0:1, :] + if dtype_.is_complex: + angle = -math_ops.angle(top_rows) + phase = math_ops.complex(math_ops.cos(angle), math_ops.sin(angle)) + else: + phase = math_ops.sign(top_rows) + v *= phase + return e, v + + if compute_v_: + funcs = [lambda x: Compute(x)[0], lambda x: Compute(x)[1]] + else: + funcs = [linalg_ops.self_adjoint_eigvals] + + for f in funcs: + theoretical, numerical = gradient_checker_v2.compute_gradient( + f, + [RandomInput()], + delta=delta) + self.assertAllClose(theoretical, numerical, atol=tol, rtol=tol) + + return Test + if __name__ == "__main__": dtypes_to_test = [ dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.complex64, @@ -194,5 +262,6 @@ if __name__ == "__main__": shape = batch_dims + (size, size) name = "%s_%s_%s" % (dtype.name, "_".join(map(str, shape)), compute_v) _AddTest(EigTest, "Eig", name, _GetEigTest(dtype, shape, compute_v)) - # No gradient yet + _AddTest(EigGradTest, "EigGrad", name, + _GetEigGradTest(dtype, shape, compute_v)) test.main() diff --git a/tensorflow/python/ops/linalg_grad.py b/tensorflow/python/ops/linalg_grad.py index f456581ef60..45a73c4e17c 100644 --- a/tensorflow/python/ops/linalg_grad.py +++ b/tensorflow/python/ops/linalg_grad.py @@ -633,6 +633,57 @@ def _MatrixTriangularSolveGrad(op, grad): return grad_a, grad_b +# To avoid nan in cases with degenerate eigenvalues or +# degenerate/zero singular values in calculations of +# f and s_inv_mat, we introduce a Lorentz brodening. +def _SafeReciprocal(x, epsilon=1E-20): + return x * math_ops.reciprocal(x * x + epsilon) + +@ops.RegisterGradient("Eig") +def _EigGrad(op, grad_e, grad_v): + """Gradient for Eig. + Based on report by Mike Giles + https://people.maths.ox.ac.uk/gilesm/files/NA-08-01.pdf + See also + "Computation of eigenvalue and eigenvector derivatives + for a general complex-valued eigensystem" by Nico van der Aa. + As for now only distinct eigenvalue case is considered. + """ + e = op.outputs[0] + compute_v = op.get_attr("compute_v") + # a = op.inputs[0], which satisfies + # a[...,:,:] * v[...,:,i] = e[...,i] * v[...,i] + with ops.control_dependencies([grad_e, grad_v]): + if compute_v: + v = op.outputs[1] + w = _linalg.adjoint(linalg_ops.matrix_inverse(v)) + # Construct the matrix f(i,j) = (i != j ? 1 / (e_i - e_j) : 0). + # Notice that because of the term involving f, the gradient becomes + # infinite (or NaN in practice) when eigenvalues are not unique. + # Mathematically this should not be surprising, since for (k-fold) + # degenerate eigenvalues, the corresponding eigenvectors are only defined + # up to arbitrary rotation in a (k-dimensional) subspace. + f = array_ops.matrix_set_diag( + _SafeReciprocal( + array_ops.expand_dims(e, -2) - array_ops.expand_dims(e, -1)), + array_ops.zeros_like(e)) + grad_a = math_ops.matmul( + w, + math_ops.matmul( + array_ops.matrix_diag(grad_e) + + f * math_ops.matmul(v, grad_v, adjoint_a=True), + v, + adjoint_b=True)) + else: + _, v = linalg_ops.self_adjoint_eig(op.inputs[0]) + w = _linalg.adjoint(linalg_ops.matrix_inverse(v)) + grad_a = math_ops.matmul(w, + math_ops.matmul( + array_ops.matrix_diag(grad_e), + v, + adjoint_b=True)) + return math_ops.cast(grad_a, op.inputs[0].dtype) + @ops.RegisterGradient("SelfAdjointEigV2") def _SelfAdjointEigV2Grad(op, grad_e, grad_v): """Gradient for SelfAdjointEigV2.""" @@ -650,7 +701,7 @@ def _SelfAdjointEigV2Grad(op, grad_e, grad_v): # degenerate eigenvalues, the corresponding eigenvectors are only defined # up to arbitrary rotation in a (k-dimensional) subspace. f = array_ops.matrix_set_diag( - math_ops.reciprocal( + _SafeReciprocal( array_ops.expand_dims(e, -2) - array_ops.expand_dims(e, -1)), array_ops.zeros_like(e)) grad_a = math_ops.matmul( @@ -745,11 +796,6 @@ def _SvdGrad(op, grad_s, grad_u, grad_v): # only defined up a (k-dimensional) subspace. In practice, this can # lead to numerical instability when singular values are close but not # exactly equal. - # To avoid nan in cases with degenerate sigular values or zero singular values - # in calculating f and s_inv_mat, we introduce a Lorentz brodening. - - def _SafeReciprocal(x, epsilon=1E-20): - return x * math_ops.reciprocal(x * x + epsilon) s_shape = array_ops.shape(s) f = array_ops.matrix_set_diag( From c586924b44e23012050c77dc30bc12cc5170bf19 Mon Sep 17 00:00:00 2001 From: Evgenii Zheltonozhskii Date: Sun, 3 Nov 2019 18:02:47 +0200 Subject: [PATCH 02/11] Add missing diagonal term --- tensorflow/python/kernel_tests/eig_op_test.py | 2 +- tensorflow/python/ops/linalg_grad.py | 22 +++++++++++-------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/kernel_tests/eig_op_test.py b/tensorflow/python/kernel_tests/eig_op_test.py index 7a202acb237..1fe98910c98 100644 --- a/tensorflow/python/kernel_tests/eig_op_test.py +++ b/tensorflow/python/kernel_tests/eig_op_test.py @@ -262,6 +262,6 @@ if __name__ == "__main__": shape = batch_dims + (size, size) name = "%s_%s_%s" % (dtype.name, "_".join(map(str, shape)), compute_v) _AddTest(EigTest, "Eig", name, _GetEigTest(dtype, shape, compute_v)) - _AddTest(EigGradTest, "EigGrad", name, + _AddTest(EigGradTest, "EigGrad", name, _GetEigGradTest(dtype, shape, compute_v)) test.main() diff --git a/tensorflow/python/ops/linalg_grad.py b/tensorflow/python/ops/linalg_grad.py index 45a73c4e17c..f1395108d65 100644 --- a/tensorflow/python/ops/linalg_grad.py +++ b/tensorflow/python/ops/linalg_grad.py @@ -642,8 +642,9 @@ def _SafeReciprocal(x, epsilon=1E-20): @ops.RegisterGradient("Eig") def _EigGrad(op, grad_e, grad_v): """Gradient for Eig. - Based on report by Mike Giles - https://people.maths.ox.ac.uk/gilesm/files/NA-08-01.pdf + Based on eq. 4.77 from paper by + Christoph Boeddeker et al. + https://arxiv.org/abs/1701.00392 See also "Computation of eigenvalue and eigenvector derivatives for a general complex-valued eigensystem" by Nico van der Aa. @@ -667,13 +668,16 @@ def _EigGrad(op, grad_e, grad_v): _SafeReciprocal( array_ops.expand_dims(e, -2) - array_ops.expand_dims(e, -1)), array_ops.zeros_like(e)) - grad_a = math_ops.matmul( - w, - math_ops.matmul( - array_ops.matrix_diag(grad_e) + - f * math_ops.matmul(v, grad_v, adjoint_a=True), - v, - adjoint_b=True)) + + vgv = math_ops.matmul(v, grad_v, adjoint_a=True) + mid = array_ops.matrix_diag(grad_e) + diag_grad_part = array_ops.matrix_set_diag( + array_ops.zeros_like(vgv), + array_ops.matrix_diag_part( + math_ops.cast(math_ops.real(vgv), vgv.dtype))) + mid += f * (vgv - math_ops.matmul(math_ops.matmul(v, v, adjoint_a=True), + diag_grad_part)) + grad_a = math_ops.matmul(w, math_ops.matmul(mid, v, adjoint_b=True)) else: _, v = linalg_ops.self_adjoint_eig(op.inputs[0]) w = _linalg.adjoint(linalg_ops.matrix_inverse(v)) From ec19ba73b7688d14ab1f9f3852202435dc8bfe20 Mon Sep 17 00:00:00 2001 From: Evgenii Zheltonozhskii Date: Tue, 5 Nov 2019 12:39:02 +0200 Subject: [PATCH 03/11] Fix wrong call for eigenvalue function --- tensorflow/python/kernel_tests/eig_op_test.py | 2 +- tensorflow/python/ops/linalg_grad.py | 25 ++++++++----------- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/tensorflow/python/kernel_tests/eig_op_test.py b/tensorflow/python/kernel_tests/eig_op_test.py index 1fe98910c98..a02b9bb5095 100644 --- a/tensorflow/python/kernel_tests/eig_op_test.py +++ b/tensorflow/python/kernel_tests/eig_op_test.py @@ -239,7 +239,7 @@ def _GetEigGradTest(dtype_, shape_, compute_v_): if compute_v_: funcs = [lambda x: Compute(x)[0], lambda x: Compute(x)[1]] else: - funcs = [linalg_ops.self_adjoint_eigvals] + funcs = [linalg_ops.eigvals] for f in funcs: theoretical, numerical = gradient_checker_v2.compute_gradient( diff --git a/tensorflow/python/ops/linalg_grad.py b/tensorflow/python/ops/linalg_grad.py index f1395108d65..ec900ab991f 100644 --- a/tensorflow/python/ops/linalg_grad.py +++ b/tensorflow/python/ops/linalg_grad.py @@ -657,7 +657,8 @@ def _EigGrad(op, grad_e, grad_v): with ops.control_dependencies([grad_e, grad_v]): if compute_v: v = op.outputs[1] - w = _linalg.adjoint(linalg_ops.matrix_inverse(v)) + vt = _linalg.adjoint(v) + w = linalg_ops.matrix_inverse(vt) # Construct the matrix f(i,j) = (i != j ? 1 / (e_i - e_j) : 0). # Notice that because of the term involving f, the gradient becomes # infinite (or NaN in practice) when eigenvalues are not unique. @@ -668,24 +669,20 @@ def _EigGrad(op, grad_e, grad_v): _SafeReciprocal( array_ops.expand_dims(e, -2) - array_ops.expand_dims(e, -1)), array_ops.zeros_like(e)) - - vgv = math_ops.matmul(v, grad_v, adjoint_a=True) + vgv = math_ops.matmul(vt, grad_v) mid = array_ops.matrix_diag(grad_e) - diag_grad_part = array_ops.matrix_set_diag( - array_ops.zeros_like(vgv), - array_ops.matrix_diag_part( - math_ops.cast(math_ops.real(vgv), vgv.dtype))) - mid += f * (vgv - math_ops.matmul(math_ops.matmul(v, v, adjoint_a=True), - diag_grad_part)) - grad_a = math_ops.matmul(w, math_ops.matmul(mid, v, adjoint_b=True)) + diag_grad_part = array_ops.matrix_diag(array_ops.matrix_diag_part( + math_ops.cast(math_ops.real(vgv), vgv.dtype))) + mid += f * (vgv - math_ops.matmul(math_ops.matmul(vt, v), diag_grad_part)) + grad_a = math_ops.matmul(w, math_ops.matmul(mid, vt)) else: - _, v = linalg_ops.self_adjoint_eig(op.inputs[0]) - w = _linalg.adjoint(linalg_ops.matrix_inverse(v)) + _, v = linalg_ops.eig(op.inputs[0]) + vt = _linalg.adjoint(v) + w = linalg_ops.matrix_inverse(vt) grad_a = math_ops.matmul(w, math_ops.matmul( array_ops.matrix_diag(grad_e), - v, - adjoint_b=True)) + vt)) return math_ops.cast(grad_a, op.inputs[0].dtype) @ops.RegisterGradient("SelfAdjointEigV2") From 1b1b68f03a450f13b8891c756b79d0d17833942e Mon Sep 17 00:00:00 2001 From: Evgenii Zheltonozhskii Date: Sat, 9 Nov 2019 06:44:01 +0200 Subject: [PATCH 04/11] Fix f conjugation --- tensorflow/python/kernel_tests/eig_op_test.py | 10 ++++++---- tensorflow/python/ops/linalg_grad.py | 1 + 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/kernel_tests/eig_op_test.py b/tensorflow/python/kernel_tests/eig_op_test.py index a02b9bb5095..fff36b78ebc 100644 --- a/tensorflow/python/kernel_tests/eig_op_test.py +++ b/tensorflow/python/kernel_tests/eig_op_test.py @@ -251,10 +251,12 @@ def _GetEigGradTest(dtype_, shape_, compute_v_): return Test if __name__ == "__main__": - dtypes_to_test = [ - dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.complex64, - dtypes_lib.complex128 - ] + dtypes_to_test = [dtypes_lib.complex64, dtypes_lib.complex128] + # TODO: is gradient_check able to deal with complex outputs of real inputs? + # [ + # dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.complex64, + # dtypes_lib.complex128 + # ] for compute_v in True, False: for dtype in dtypes_to_test: for size in 1, 2, 5, 10: diff --git a/tensorflow/python/ops/linalg_grad.py b/tensorflow/python/ops/linalg_grad.py index ec900ab991f..2d8bb63825b 100644 --- a/tensorflow/python/ops/linalg_grad.py +++ b/tensorflow/python/ops/linalg_grad.py @@ -669,6 +669,7 @@ def _EigGrad(op, grad_e, grad_v): _SafeReciprocal( array_ops.expand_dims(e, -2) - array_ops.expand_dims(e, -1)), array_ops.zeros_like(e)) + f = math_ops.conj(f) vgv = math_ops.matmul(vt, grad_v) mid = array_ops.matrix_diag(grad_e) diag_grad_part = array_ops.matrix_diag(array_ops.matrix_diag_part( From 1838163152217eac4d8cb9bf960beac29f38b969 Mon Sep 17 00:00:00 2001 From: Evgenii Zheltonozhskii Date: Tue, 12 Nov 2019 09:45:07 +0200 Subject: [PATCH 05/11] Fix comments, return non-gradient tests for real inputs --- tensorflow/python/kernel_tests/eig_op_test.py | 12 ++++++++---- tensorflow/python/ops/linalg_grad.py | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/kernel_tests/eig_op_test.py b/tensorflow/python/kernel_tests/eig_op_test.py index fff36b78ebc..4cc98884fa8 100644 --- a/tensorflow/python/kernel_tests/eig_op_test.py +++ b/tensorflow/python/kernel_tests/eig_op_test.py @@ -150,7 +150,7 @@ def _GetEigTest(dtype_, shape_, compute_v_): np_dtype = dtype_.as_numpy_dtype def RandomInput(): - # most of matrices are diagonalizable # TODO + # Most matrices are diagonalizable a = np.random.uniform( low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype) if dtype_.is_complex: @@ -202,7 +202,7 @@ def _GetEigGradTest(dtype_, shape_, compute_v_): np_dtype = dtype_.as_numpy_dtype def RandomInput(): - # most of matrices are diagonalizable # TODO + # Most matrices are diagonalizable a = np.random.uniform( low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype) if dtype_.is_complex: @@ -264,6 +264,10 @@ if __name__ == "__main__": shape = batch_dims + (size, size) name = "%s_%s_%s" % (dtype.name, "_".join(map(str, shape)), compute_v) _AddTest(EigTest, "Eig", name, _GetEigTest(dtype, shape, compute_v)) - _AddTest(EigGradTest, "EigGrad", name, - _GetEigGradTest(dtype, shape, compute_v)) + + # TODO: gradient_check gets wrong numeric output for real inputs + # (might be connected with the fact that outputs are complex) + if dtype not in [dtypes_lib.float32, dtypes_lib.float64]: + _AddTest(EigGradTest, "EigGrad", name, + _GetEigGradTest(dtype, shape, compute_v)) test.main() diff --git a/tensorflow/python/ops/linalg_grad.py b/tensorflow/python/ops/linalg_grad.py index 2d8bb63825b..25f889cfe42 100644 --- a/tensorflow/python/ops/linalg_grad.py +++ b/tensorflow/python/ops/linalg_grad.py @@ -635,7 +635,7 @@ def _MatrixTriangularSolveGrad(op, grad): # To avoid nan in cases with degenerate eigenvalues or # degenerate/zero singular values in calculations of -# f and s_inv_mat, we introduce a Lorentz brodening. +# f and s_inv_mat, we introduce a Lorentz broadening. def _SafeReciprocal(x, epsilon=1E-20): return x * math_ops.reciprocal(x * x + epsilon) From 134e1008c4012ce64a93eea7d5102d7e02154baa Mon Sep 17 00:00:00 2001 From: Evgeniy Zheltonozhskiy Date: Wed, 13 Nov 2019 00:09:07 +0200 Subject: [PATCH 06/11] Replace matmul with linalg.solve --- tensorflow/python/ops/linalg_grad.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/ops/linalg_grad.py b/tensorflow/python/ops/linalg_grad.py index 25f889cfe42..6dbb7a3f611 100644 --- a/tensorflow/python/ops/linalg_grad.py +++ b/tensorflow/python/ops/linalg_grad.py @@ -658,7 +658,6 @@ def _EigGrad(op, grad_e, grad_v): if compute_v: v = op.outputs[1] vt = _linalg.adjoint(v) - w = linalg_ops.matrix_inverse(vt) # Construct the matrix f(i,j) = (i != j ? 1 / (e_i - e_j) : 0). # Notice that because of the term involving f, the gradient becomes # infinite (or NaN in practice) when eigenvalues are not unique. @@ -675,12 +674,19 @@ def _EigGrad(op, grad_e, grad_v): diag_grad_part = array_ops.matrix_diag(array_ops.matrix_diag_part( math_ops.cast(math_ops.real(vgv), vgv.dtype))) mid += f * (vgv - math_ops.matmul(math_ops.matmul(vt, v), diag_grad_part)) - grad_a = math_ops.matmul(w, math_ops.matmul(mid, vt)) + # vt is formally invertible as long as the original matrix is + # diagonalizable. However, in practice, vt may + # be ill-conditioned when matrix original matrix is close to + # non-diagonalizable one + grad_a = linalg_ops.solve(vt, math_ops.matmul(mid, vt)) else: _, v = linalg_ops.eig(op.inputs[0]) vt = _linalg.adjoint(v) - w = linalg_ops.matrix_inverse(vt) - grad_a = math_ops.matmul(w, + # vt is formally invertible as long as the original matrix is + # diagonalizable. However, in practice, vt may + # be ill-conditioned when matrix original matrix is close to + # non-diagonalizable one + grad_a = linalg_ops.solve(vt, math_ops.matmul( array_ops.matrix_diag(grad_e), vt)) From 9b346512b58c2ac020038f77ee1c8d9955d547f7 Mon Sep 17 00:00:00 2001 From: Evgenii Zheltonozhskii Date: Mon, 13 Apr 2020 20:35:24 +0300 Subject: [PATCH 07/11] Fix tests --- tensorflow/python/kernel_tests/eig_op_test.py | 12 ++++-------- tensorflow/python/ops/linalg_grad.py | 4 ++-- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/kernel_tests/eig_op_test.py b/tensorflow/python/kernel_tests/eig_op_test.py index 4cc98884fa8..d4095289a0a 100644 --- a/tensorflow/python/kernel_tests/eig_op_test.py +++ b/tensorflow/python/kernel_tests/eig_op_test.py @@ -251,12 +251,10 @@ def _GetEigGradTest(dtype_, shape_, compute_v_): return Test if __name__ == "__main__": - dtypes_to_test = [dtypes_lib.complex64, dtypes_lib.complex128] - # TODO: is gradient_check able to deal with complex outputs of real inputs? - # [ - # dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.complex64, - # dtypes_lib.complex128 - # ] + dtypes_to_test = [ + dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.complex64, + dtypes_lib.complex128 + ] for compute_v in True, False: for dtype in dtypes_to_test: for size in 1, 2, 5, 10: @@ -265,8 +263,6 @@ if __name__ == "__main__": name = "%s_%s_%s" % (dtype.name, "_".join(map(str, shape)), compute_v) _AddTest(EigTest, "Eig", name, _GetEigTest(dtype, shape, compute_v)) - # TODO: gradient_check gets wrong numeric output for real inputs - # (might be connected with the fact that outputs are complex) if dtype not in [dtypes_lib.float32, dtypes_lib.float64]: _AddTest(EigGradTest, "EigGrad", name, _GetEigGradTest(dtype, shape, compute_v)) diff --git a/tensorflow/python/ops/linalg_grad.py b/tensorflow/python/ops/linalg_grad.py index 6dbb7a3f611..6affb1c2931 100644 --- a/tensorflow/python/ops/linalg_grad.py +++ b/tensorflow/python/ops/linalg_grad.py @@ -678,7 +678,7 @@ def _EigGrad(op, grad_e, grad_v): # diagonalizable. However, in practice, vt may # be ill-conditioned when matrix original matrix is close to # non-diagonalizable one - grad_a = linalg_ops.solve(vt, math_ops.matmul(mid, vt)) + grad_a = linalg_ops.matrix_solve(vt, math_ops.matmul(mid, vt)) else: _, v = linalg_ops.eig(op.inputs[0]) vt = _linalg.adjoint(v) @@ -686,7 +686,7 @@ def _EigGrad(op, grad_e, grad_v): # diagonalizable. However, in practice, vt may # be ill-conditioned when matrix original matrix is close to # non-diagonalizable one - grad_a = linalg_ops.solve(vt, + grad_a = linalg_ops.matrix_solve(vt, math_ops.matmul( array_ops.matrix_diag(grad_e), vt)) From 1c2b025357ec4640f7d6725b9ee92cdc04f5e4a4 Mon Sep 17 00:00:00 2001 From: Evgenii Zheltonozhskii Date: Mon, 13 Apr 2020 22:17:38 +0300 Subject: [PATCH 08/11] Fix order of eigenvalues --- tensorflow/python/kernel_tests/eig_op_test.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/kernel_tests/eig_op_test.py b/tensorflow/python/kernel_tests/eig_op_test.py index d4095289a0a..55b87fb6684 100644 --- a/tensorflow/python/kernel_tests/eig_op_test.py +++ b/tensorflow/python/kernel_tests/eig_op_test.py @@ -28,6 +28,7 @@ from tensorflow.python.ops import gradient_checker_v2 from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops +from tensorflow.python.ops import sort_ops from tensorflow.python.platform import test @@ -225,14 +226,19 @@ def _GetEigGradTest(dtype_, shape_, compute_v_): with self.session(use_gpu=True): def Compute(x): e, v = linalg_ops.eig(x) + + # We sort eigenvalues by e.real+e.imag to have consistent + # order between runs + e, v = array_ops.transpose_v2(e), array_ops.transpose_v2(v) + idx = sort_ops.argsort(math_ops.real(e)+math_ops.imag(e)) + e, v = array_ops.gather(e, idx), array_ops.gather(v, idx) + e, v = array_ops.transpose_v2(e), array_ops.transpose_v2(v) + # (complex) Eigenvectors are only unique up to an arbitrary phase # We normalize the vectors such that the first component has phase 0. top_rows = v[..., 0:1, :] - if dtype_.is_complex: - angle = -math_ops.angle(top_rows) - phase = math_ops.complex(math_ops.cos(angle), math_ops.sin(angle)) - else: - phase = math_ops.sign(top_rows) + angle = -math_ops.angle(top_rows) + phase = math_ops.complex(math_ops.cos(angle), math_ops.sin(angle)) v *= phase return e, v From 06fdc2041e326ed469efbb00ae0b1221091f149f Mon Sep 17 00:00:00 2001 From: Evgenii Zheltonozhskii Date: Mon, 13 Apr 2020 23:26:53 +0300 Subject: [PATCH 09/11] Fix gathering for batched case --- tensorflow/python/kernel_tests/eig_op_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/kernel_tests/eig_op_test.py b/tensorflow/python/kernel_tests/eig_op_test.py index 55b87fb6684..92e7844f1e6 100644 --- a/tensorflow/python/kernel_tests/eig_op_test.py +++ b/tensorflow/python/kernel_tests/eig_op_test.py @@ -229,10 +229,10 @@ def _GetEigGradTest(dtype_, shape_, compute_v_): # We sort eigenvalues by e.real+e.imag to have consistent # order between runs - e, v = array_ops.transpose_v2(e), array_ops.transpose_v2(v) - idx = sort_ops.argsort(math_ops.real(e)+math_ops.imag(e)) - e, v = array_ops.gather(e, idx), array_ops.gather(v, idx) - e, v = array_ops.transpose_v2(e), array_ops.transpose_v2(v) + batch_dims = len(e.shape) - 1 + idx = sort_ops.argsort(math_ops.real(e)+math_ops.imag(e), axis=-1) + e = array_ops.gather(e, idx, batch_dims=batch_dims) + v = array_ops.gather(v, idx, batch_dims=batch_dims) # (complex) Eigenvectors are only unique up to an arbitrary phase # We normalize the vectors such that the first component has phase 0. From e32a543df62b32f7e44ee528dd8956af3ba36184 Mon Sep 17 00:00:00 2001 From: Evgenii Zheltonozhskii Date: Mon, 13 Apr 2020 23:41:22 +0300 Subject: [PATCH 10/11] Lint fixes --- tensorflow/python/kernel_tests/eig_op_test.py | 13 ++++++------- tensorflow/python/ops/linalg_grad.py | 6 +++--- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/kernel_tests/eig_op_test.py b/tensorflow/python/kernel_tests/eig_op_test.py index 92e7844f1e6..80f5f6b0e58 100644 --- a/tensorflow/python/kernel_tests/eig_op_test.py +++ b/tensorflow/python/kernel_tests/eig_op_test.py @@ -84,7 +84,7 @@ class EigTest(test.TestCase): "self_adjoint_eig_fail_if_denorms_flushed.txt")).astype(np.float32) self.assertEqual(matrix.shape, (32, 32)) matrix_tensor = constant_op.constant(matrix) - with self.session(use_gpu=True) as sess: + with self.session(use_gpu=True) as _: (e, v) = self.evaluate(linalg_ops.self_adjoint_eig(matrix_tensor)) self.assertEqual(e.size, 32) self.assertAllClose( @@ -101,9 +101,8 @@ def SortEigenValues(e): def SortEigenDecomposition(e, v): if v.ndim < 2: return e, v - else: - perm = np.argsort(e.real + e.imag, -1) - return np.take(e, perm, -1), np.take(v, perm, -1) + perm = np.argsort(e.real + e.imag, -1) + return np.take(e, perm, -1), np.take(v, perm, -1) def EquilibrateEigenVectorPhases(x, y): @@ -229,10 +228,10 @@ def _GetEigGradTest(dtype_, shape_, compute_v_): # We sort eigenvalues by e.real+e.imag to have consistent # order between runs - batch_dims = len(e.shape) - 1 + b_dims = len(e.shape) - 1 idx = sort_ops.argsort(math_ops.real(e)+math_ops.imag(e), axis=-1) - e = array_ops.gather(e, idx, batch_dims=batch_dims) - v = array_ops.gather(v, idx, batch_dims=batch_dims) + e = array_ops.gather(e, idx, batch_dims=b_dims) + v = array_ops.gather(v, idx, batch_dims=b_dims) # (complex) Eigenvectors are only unique up to an arbitrary phase # We normalize the vectors such that the first component has phase 0. diff --git a/tensorflow/python/ops/linalg_grad.py b/tensorflow/python/ops/linalg_grad.py index 6affb1c2931..0b3a419a8c1 100644 --- a/tensorflow/python/ops/linalg_grad.py +++ b/tensorflow/python/ops/linalg_grad.py @@ -687,9 +687,9 @@ def _EigGrad(op, grad_e, grad_v): # be ill-conditioned when matrix original matrix is close to # non-diagonalizable one grad_a = linalg_ops.matrix_solve(vt, - math_ops.matmul( - array_ops.matrix_diag(grad_e), - vt)) + math_ops.matmul( + array_ops.matrix_diag(grad_e), + vt)) return math_ops.cast(grad_a, op.inputs[0].dtype) @ops.RegisterGradient("SelfAdjointEigV2") From 1ca854ed52b766399030acb62d74c2fa05d44eec Mon Sep 17 00:00:00 2001 From: Evgenii Zheltonozhskii Date: Mon, 13 Apr 2020 23:57:25 +0300 Subject: [PATCH 11/11] Lint fixes --- tensorflow/python/ops/linalg_grad.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/linalg_grad.py b/tensorflow/python/ops/linalg_grad.py index 0b3a419a8c1..536539ff73d 100644 --- a/tensorflow/python/ops/linalg_grad.py +++ b/tensorflow/python/ops/linalg_grad.py @@ -688,8 +688,8 @@ def _EigGrad(op, grad_e, grad_v): # non-diagonalizable one grad_a = linalg_ops.matrix_solve(vt, math_ops.matmul( - array_ops.matrix_diag(grad_e), - vt)) + array_ops.matrix_diag(grad_e), + vt)) return math_ops.cast(grad_a, op.inputs[0].dtype) @ops.RegisterGradient("SelfAdjointEigV2")