Treat SparseApply* on empty sparse gradients as no-op
PiperOrigin-RevId: 353040731 Change-Id: Ibe213b002efc2622c86fb936c477c1e13820f3e4
This commit is contained in:
parent
50b1c27aca
commit
8cb8c460a3
@ -512,6 +512,9 @@ struct SparseApplyAdagrad<GPUDevice, T, Tindex, has_epsilon> {
|
||||
const Tindex first_dim_size = var.dimension(0);
|
||||
const Tindex grad_size = grad.size();
|
||||
const Tindex indices_size = indices.size();
|
||||
if (grad_size == 0) {
|
||||
return Status::OK();
|
||||
}
|
||||
GpuLaunchConfig config = GetGpuLaunchConfig(grad_size, d);
|
||||
return GpuLaunchKernel(
|
||||
SparseApplyAdagradKernel<T, Tindex, has_epsilon>, config.block_count,
|
||||
@ -570,6 +573,9 @@ struct SparseApplyProximalAdagrad<GPUDevice, T, Tindex> {
|
||||
const Tindex first_dim_size = var.dimension(0);
|
||||
const Tindex grad_size = grad.size();
|
||||
const Tindex indices_size = indices.size();
|
||||
if (grad_size == 0) {
|
||||
return Status::OK();
|
||||
}
|
||||
GpuLaunchConfig config = GetGpuLaunchConfig(grad_size, d);
|
||||
return GpuLaunchKernel(SparseApplyProximalAdagradKernel<T, Tindex>,
|
||||
config.block_count, config.thread_per_block, 0,
|
||||
@ -777,6 +783,9 @@ struct SparseApplyFtrl<GPUDevice, T, Tindex, has_l2_shrinkage> {
|
||||
const Tindex first_dim_size = var.dimension(0);
|
||||
const Tindex grad_size = grad.size();
|
||||
const Tindex indices_size = indices.size();
|
||||
if (grad_size == 0) {
|
||||
return Status::OK();
|
||||
}
|
||||
GpuLaunchConfig config = GetGpuLaunchConfig(grad_size, d);
|
||||
return GpuLaunchKernel(
|
||||
SparseApplyFtrlKernel<T, Tindex, has_l2_shrinkage>, config.block_count,
|
||||
@ -846,12 +855,14 @@ struct SparseApplyKerasMomentum<GPUDevice, T, Tindex> {
|
||||
const Tindex first_dim_size = var.dimension(0);
|
||||
const Tindex grad_size = grad.size();
|
||||
const Tindex indices_size = indices.size();
|
||||
GpuLaunchConfig config = GetGpuLaunchConfig(grad_size, d);
|
||||
TF_CHECK_OK(GpuLaunchKernel(
|
||||
SparseApplyKerasMomentumKernel<T, Tindex>, config.block_count,
|
||||
config.thread_per_block, 0, d.stream(), var.data(), accum.data(),
|
||||
lr.data(), grad.data(), indices.data(), momentum.data(), use_nesterov,
|
||||
first_dim_size, grad_size, indices_size));
|
||||
if (grad_size != 0) {
|
||||
GpuLaunchConfig config = GetGpuLaunchConfig(grad_size, d);
|
||||
TF_CHECK_OK(GpuLaunchKernel(
|
||||
SparseApplyKerasMomentumKernel<T, Tindex>, config.block_count,
|
||||
config.thread_per_block, 0, d.stream(), var.data(), accum.data(),
|
||||
lr.data(), grad.data(), indices.data(), momentum.data(), use_nesterov,
|
||||
first_dim_size, grad_size, indices_size));
|
||||
}
|
||||
return static_cast<Tindex>(-1);
|
||||
}
|
||||
};
|
||||
|
@ -223,9 +223,9 @@ class TrainingOpsTest(TensorFlowTestCase):
|
||||
self._testTypesForFtrlMultiplyLinearByLr(
|
||||
x, y, z, lr, grad, use_gpu=False, l1=l1, l2=l2)
|
||||
|
||||
def _testTypesForSparseAdagrad(self, x, y, lr, grad, indices):
|
||||
def _testTypesForSparseAdagrad(self, x, y, lr, grad, indices, use_gpu):
|
||||
self.setUp()
|
||||
with self.session(use_gpu=True):
|
||||
with self.session(use_gpu=use_gpu):
|
||||
var = variables.VariableV1(x)
|
||||
accum = variables.VariableV1(y)
|
||||
self.evaluate(variables.global_variables_initializer())
|
||||
@ -251,11 +251,12 @@ class TrainingOpsTest(TensorFlowTestCase):
|
||||
lr,
|
||||
grad,
|
||||
indices,
|
||||
use_gpu,
|
||||
l1=0.0,
|
||||
l2=0.0,
|
||||
lr_power=-0.5):
|
||||
self.setUp()
|
||||
with self.session(use_gpu=False):
|
||||
with self.session(use_gpu=use_gpu):
|
||||
var = variables.VariableV1(x)
|
||||
accum = variables.VariableV1(y)
|
||||
linear = variables.VariableV1(z)
|
||||
@ -327,8 +328,9 @@ class TrainingOpsTest(TensorFlowTestCase):
|
||||
@test_util.run_v1_only("SparseApplyAdagrad op returns a ref, so it is not "
|
||||
"supported in eager mode.")
|
||||
def testSparseApplyAdagrad(self):
|
||||
for (dtype, index_type) in itertools.product(
|
||||
[np.float16, np.float32, np.float64], [np.int32, np.int64]):
|
||||
for (dtype, index_type,
|
||||
use_gpu) in itertools.product([np.float16, np.float32, np.float64],
|
||||
[np.int32, np.int64], [False, True]):
|
||||
x_val = [np.arange(10), np.arange(10, 20), np.arange(20, 30)]
|
||||
y_val = [np.arange(1, 11), np.arange(11, 21), np.arange(21, 31)]
|
||||
x = np.array(x_val).astype(dtype)
|
||||
@ -337,13 +339,19 @@ class TrainingOpsTest(TensorFlowTestCase):
|
||||
grad_val = [np.arange(10), np.arange(10)]
|
||||
grad = np.array(grad_val).astype(dtype)
|
||||
indices = np.array([0, 2]).astype(index_type)
|
||||
self._testTypesForSparseAdagrad(x, y, lr, grad, indices)
|
||||
self._testTypesForSparseAdagrad(x, y, lr, grad, indices, use_gpu)
|
||||
# Empty sparse gradients.
|
||||
empty_grad = np.zeros([0, 10], dtype=dtype)
|
||||
empty_indices = np.zeros([0], dtype=index_type)
|
||||
self._testTypesForSparseAdagrad(x, y, lr, empty_grad, empty_indices,
|
||||
use_gpu)
|
||||
|
||||
@test_util.run_v1_only("SparseApplyAdagrad op returns a ref, so it is not "
|
||||
"supported in eager mode.")
|
||||
def testSparseApplyAdagradDim1(self):
|
||||
for (dtype, index_type) in itertools.product(
|
||||
[np.float16, np.float32, np.float64], [np.int32, np.int64]):
|
||||
for (dtype, index_type,
|
||||
use_gpu) in itertools.product([np.float16, np.float32, np.float64],
|
||||
[np.int32, np.int64], [False, True]):
|
||||
x_val = [[1.0], [2.0], [3.0]]
|
||||
y_val = [[4.0], [5.0], [6.0]]
|
||||
x = np.array(x_val).astype(dtype)
|
||||
@ -352,13 +360,18 @@ class TrainingOpsTest(TensorFlowTestCase):
|
||||
grad_val = [[1.5], [2.5]]
|
||||
grad = np.array(grad_val).astype(dtype)
|
||||
indices = np.array([0, 2]).astype(index_type)
|
||||
self._testTypesForSparseAdagrad(x, y, lr, grad, indices)
|
||||
self._testTypesForSparseAdagrad(x, y, lr, grad, indices, use_gpu)
|
||||
|
||||
@test_util.run_v1_only("SparseApplyFtrl op returns a ref, so it is not "
|
||||
"supported in eager mode.")
|
||||
def testSparseApplyFtrlDim1(self):
|
||||
for (dtype, index_type) in itertools.product(
|
||||
[np.float16, np.float32, np.float64], [np.int32, np.int64]):
|
||||
for (dtype, index_type,
|
||||
use_gpu) in itertools.product([np.float16, np.float32, np.float64],
|
||||
[np.int32, np.int64], [False, True]):
|
||||
# TODO(b/178042695): This configuration leads to a "too many resources
|
||||
# requested for launch" error.
|
||||
if (dtype, index_type, use_gpu) == (np.float64, np.int64, True):
|
||||
continue
|
||||
x_val = [[0.0], [0.0], [0.0]]
|
||||
y_val = [[4.0], [5.0], [6.0]]
|
||||
z_val = [[0.0], [0.0], [0.0]]
|
||||
@ -369,7 +382,12 @@ class TrainingOpsTest(TensorFlowTestCase):
|
||||
grad_val = [[1.5], [2.5]]
|
||||
grad = np.array(grad_val).astype(dtype)
|
||||
indices = np.array([0, 2]).astype(index_type)
|
||||
self._testTypesForSparseFtrl(x, y, z, lr, grad, indices)
|
||||
self._testTypesForSparseFtrl(x, y, z, lr, grad, indices, use_gpu)
|
||||
# Empty sparse gradients.
|
||||
empty_grad = np.zeros([0, 1], dtype=dtype)
|
||||
empty_indices = np.zeros([0], dtype=index_type)
|
||||
self._testTypesForSparseFtrl(x, y, z, lr, empty_grad, empty_indices,
|
||||
use_gpu)
|
||||
|
||||
@test_util.run_v1_only("SparseApplyFtrlMultiplyLinearByLr op returns a ref, "
|
||||
"so it is not supported in eager mode.")
|
||||
|
Loading…
x
Reference in New Issue
Block a user