Implement gradient for SparseSoftmax.

Essentially the same calculation as Softmax, adapted to sparse inputs/outputs.
Change: 123332988
This commit is contained in:
Zongheng Yang 2016-05-26 09:37:42 -08:00 committed by TensorFlower Gardener
parent 6b8de8ac69
commit 008d41f4cc
2 changed files with 41 additions and 3 deletions

View File

@ -637,6 +637,17 @@ class SparseSoftmaxTest(test_util.TensorFlowTestCase):
self.assertAllEqual(sp_t.indices.eval(), result.indices)
self.assertAllEqual(shape, result.shape)
def testGradient(self):
x_shape = [2, 5, 10]
with self.test_session(use_gpu=False):
for dtype in [np.float32, np.float64]:
x_np = np.random.randn(*x_shape).astype(dtype)
x_tf, nnz = _sparsify(x_np)
y_tf = tf.sparse_softmax(x_tf)
err = tf.test.compute_gradient_error(x_tf.values, (nnz,), y_tf.values,
(nnz,))
self.assertLess(err, 1e-4)
if __name__ == "__main__":
googletest.main()

View File

@ -227,6 +227,33 @@ def _SparseDenseCwiseDivGrad(op, grad):
@ops.RegisterGradient("SparseSoftmax")
def _SparseSoftmaxGrad(unused_op, unused_grad):
raise NotImplementedError("SparseSoftmax op doesn't have its gradient"
"implemented yet")
def _SparseSoftmaxGrad(op, grad):
"""Gradients for SparseSoftmax.
The calculation is the same as SoftmaxGrad:
grad_x = grad_softmax * softmax - sum(grad_softmax * softmax) * softmax
where we now only operate on the non-zero values present in the SparseTensors.
Args:
op: the SparseSoftmax op.
grad: the upstream gradient w.r.t. the non-zero SparseSoftmax output values.
Returns:
Gradients w.r.t. the input (sp_indices, sp_values, sp_shape).
"""
indices, shape = op.inputs[0], op.inputs[2]
out_vals = op.outputs[0]
sp_output = ops.SparseTensor(indices, out_vals, shape)
sp_grad = ops.SparseTensor(indices, grad, shape)
sp_product = ops.SparseTensor(
indices, sp_output.values * sp_grad.values, shape)
# [..., B, 1], dense.
sum_reduced = -sparse_ops.sparse_reduce_sum(sp_product, [-1], keep_dims=True)
# sparse [..., B, C] + dense [..., B, 1] with broadcast; outputs sparse.
sp_sum = sparse_ops.sparse_dense_cwise_add(sp_grad, sum_reduced)
grad_x = sp_sum.values * sp_output.values
return [None, grad_x, None]