Treat SparseApply* on empty sparse gradients as no-op

PiperOrigin-RevId: 353040731
Change-Id: Ibe213b002efc2622c86fb936c477c1e13820f3e4
This commit is contained in:
A. Unique TensorFlower 2021-01-21 09:51:06 -08:00 committed by TensorFlower Gardener
parent 50b1c27aca
commit 8cb8c460a3
2 changed files with 47 additions and 18 deletions

View File

@ -512,6 +512,9 @@ struct SparseApplyAdagrad<GPUDevice, T, Tindex, has_epsilon> {
const Tindex first_dim_size = var.dimension(0);
const Tindex grad_size = grad.size();
const Tindex indices_size = indices.size();
if (grad_size == 0) {
return Status::OK();
}
GpuLaunchConfig config = GetGpuLaunchConfig(grad_size, d);
return GpuLaunchKernel(
SparseApplyAdagradKernel<T, Tindex, has_epsilon>, config.block_count,
@ -570,6 +573,9 @@ struct SparseApplyProximalAdagrad<GPUDevice, T, Tindex> {
const Tindex first_dim_size = var.dimension(0);
const Tindex grad_size = grad.size();
const Tindex indices_size = indices.size();
if (grad_size == 0) {
return Status::OK();
}
GpuLaunchConfig config = GetGpuLaunchConfig(grad_size, d);
return GpuLaunchKernel(SparseApplyProximalAdagradKernel<T, Tindex>,
config.block_count, config.thread_per_block, 0,
@ -777,6 +783,9 @@ struct SparseApplyFtrl<GPUDevice, T, Tindex, has_l2_shrinkage> {
const Tindex first_dim_size = var.dimension(0);
const Tindex grad_size = grad.size();
const Tindex indices_size = indices.size();
if (grad_size == 0) {
return Status::OK();
}
GpuLaunchConfig config = GetGpuLaunchConfig(grad_size, d);
return GpuLaunchKernel(
SparseApplyFtrlKernel<T, Tindex, has_l2_shrinkage>, config.block_count,
@ -846,12 +855,14 @@ struct SparseApplyKerasMomentum<GPUDevice, T, Tindex> {
const Tindex first_dim_size = var.dimension(0);
const Tindex grad_size = grad.size();
const Tindex indices_size = indices.size();
GpuLaunchConfig config = GetGpuLaunchConfig(grad_size, d);
TF_CHECK_OK(GpuLaunchKernel(
SparseApplyKerasMomentumKernel<T, Tindex>, config.block_count,
config.thread_per_block, 0, d.stream(), var.data(), accum.data(),
lr.data(), grad.data(), indices.data(), momentum.data(), use_nesterov,
first_dim_size, grad_size, indices_size));
if (grad_size != 0) {
GpuLaunchConfig config = GetGpuLaunchConfig(grad_size, d);
TF_CHECK_OK(GpuLaunchKernel(
SparseApplyKerasMomentumKernel<T, Tindex>, config.block_count,
config.thread_per_block, 0, d.stream(), var.data(), accum.data(),
lr.data(), grad.data(), indices.data(), momentum.data(), use_nesterov,
first_dim_size, grad_size, indices_size));
}
return static_cast<Tindex>(-1);
}
};

View File

@ -223,9 +223,9 @@ class TrainingOpsTest(TensorFlowTestCase):
self._testTypesForFtrlMultiplyLinearByLr(
x, y, z, lr, grad, use_gpu=False, l1=l1, l2=l2)
def _testTypesForSparseAdagrad(self, x, y, lr, grad, indices):
def _testTypesForSparseAdagrad(self, x, y, lr, grad, indices, use_gpu):
self.setUp()
with self.session(use_gpu=True):
with self.session(use_gpu=use_gpu):
var = variables.VariableV1(x)
accum = variables.VariableV1(y)
self.evaluate(variables.global_variables_initializer())
@ -251,11 +251,12 @@ class TrainingOpsTest(TensorFlowTestCase):
lr,
grad,
indices,
use_gpu,
l1=0.0,
l2=0.0,
lr_power=-0.5):
self.setUp()
with self.session(use_gpu=False):
with self.session(use_gpu=use_gpu):
var = variables.VariableV1(x)
accum = variables.VariableV1(y)
linear = variables.VariableV1(z)
@ -327,8 +328,9 @@ class TrainingOpsTest(TensorFlowTestCase):
@test_util.run_v1_only("SparseApplyAdagrad op returns a ref, so it is not "
"supported in eager mode.")
def testSparseApplyAdagrad(self):
for (dtype, index_type) in itertools.product(
[np.float16, np.float32, np.float64], [np.int32, np.int64]):
for (dtype, index_type,
use_gpu) in itertools.product([np.float16, np.float32, np.float64],
[np.int32, np.int64], [False, True]):
x_val = [np.arange(10), np.arange(10, 20), np.arange(20, 30)]
y_val = [np.arange(1, 11), np.arange(11, 21), np.arange(21, 31)]
x = np.array(x_val).astype(dtype)
@ -337,13 +339,19 @@ class TrainingOpsTest(TensorFlowTestCase):
grad_val = [np.arange(10), np.arange(10)]
grad = np.array(grad_val).astype(dtype)
indices = np.array([0, 2]).astype(index_type)
self._testTypesForSparseAdagrad(x, y, lr, grad, indices)
self._testTypesForSparseAdagrad(x, y, lr, grad, indices, use_gpu)
# Empty sparse gradients.
empty_grad = np.zeros([0, 10], dtype=dtype)
empty_indices = np.zeros([0], dtype=index_type)
self._testTypesForSparseAdagrad(x, y, lr, empty_grad, empty_indices,
use_gpu)
@test_util.run_v1_only("SparseApplyAdagrad op returns a ref, so it is not "
"supported in eager mode.")
def testSparseApplyAdagradDim1(self):
for (dtype, index_type) in itertools.product(
[np.float16, np.float32, np.float64], [np.int32, np.int64]):
for (dtype, index_type,
use_gpu) in itertools.product([np.float16, np.float32, np.float64],
[np.int32, np.int64], [False, True]):
x_val = [[1.0], [2.0], [3.0]]
y_val = [[4.0], [5.0], [6.0]]
x = np.array(x_val).astype(dtype)
@ -352,13 +360,18 @@ class TrainingOpsTest(TensorFlowTestCase):
grad_val = [[1.5], [2.5]]
grad = np.array(grad_val).astype(dtype)
indices = np.array([0, 2]).astype(index_type)
self._testTypesForSparseAdagrad(x, y, lr, grad, indices)
self._testTypesForSparseAdagrad(x, y, lr, grad, indices, use_gpu)
@test_util.run_v1_only("SparseApplyFtrl op returns a ref, so it is not "
"supported in eager mode.")
def testSparseApplyFtrlDim1(self):
for (dtype, index_type) in itertools.product(
[np.float16, np.float32, np.float64], [np.int32, np.int64]):
for (dtype, index_type,
use_gpu) in itertools.product([np.float16, np.float32, np.float64],
[np.int32, np.int64], [False, True]):
# TODO(b/178042695): This configuration leads to a "too many resources
# requested for launch" error.
if (dtype, index_type, use_gpu) == (np.float64, np.int64, True):
continue
x_val = [[0.0], [0.0], [0.0]]
y_val = [[4.0], [5.0], [6.0]]
z_val = [[0.0], [0.0], [0.0]]
@ -369,7 +382,12 @@ class TrainingOpsTest(TensorFlowTestCase):
grad_val = [[1.5], [2.5]]
grad = np.array(grad_val).astype(dtype)
indices = np.array([0, 2]).astype(index_type)
self._testTypesForSparseFtrl(x, y, z, lr, grad, indices)
self._testTypesForSparseFtrl(x, y, z, lr, grad, indices, use_gpu)
# Empty sparse gradients.
empty_grad = np.zeros([0, 1], dtype=dtype)
empty_indices = np.zeros([0], dtype=index_type)
self._testTypesForSparseFtrl(x, y, z, lr, empty_grad, empty_indices,
use_gpu)
@test_util.run_v1_only("SparseApplyFtrlMultiplyLinearByLr op returns a ref, "
"so it is not supported in eager mode.")